Module src.tests.test_data_world
Expand source code
import pathlib
from typing import Tuple
from src.utils import get_paths
from src.data.world import get_meta_data, get_input_data, get_full_data, world_features
import pandas as pd
INPUT_FILE = "SOCA_GLOBAL2_20200310.csv"
META_FILE = "METADATA_20200310.csv"
PATHS = get_paths()
def test_meta_data():
# get full path
meta_file = PATHS.data_processed.joinpath(META_FILE)
# assert meta file exists
error_msg = f"File '{meta_file.name}' doesn't exist. Check name or directory."
assert meta_file.exists(), error_msg
# assert meta file is a file
error_msg = f"File '{meta_file.name}' isn't a file. Check name or directory."
assert meta_file.is_file(), error_msg
def test_get_meta_data():
# get full path
meta_df = get_meta_data()
assert isinstance(meta_df, pd.DataFrame)
# check number of samples
n_samples = 25413
error_msg = f"Incorrect number of samples: {meta_df.shape[0]} =/= {n_samples}"
assert meta_df.shape[0] == n_samples, error_msg
# check meta feature names
meta_features = ["wmo", "n_cycle", "N", "lon", "lat", "juld", "date"]
error_msg = f"Missing features in meta data."
assert meta_df.columns.tolist() == meta_features, error_msg
def test_input_data():
# get full path
data_file = PATHS.data_processed.joinpath(INPUT_FILE)
# assert exists
error_msg = f"File '{data_file.name}' doesn't exist. Check name or directory."
assert data_file.exists(), error_msg
# assert meta file is a file
error_msg = f"File '{data_file.name}' isn't a file. Check name or directory."
assert data_file.is_file(), error_msg
def test_get_input_data():
# get full path
input_df = get_input_data()
assert isinstance(input_df, pd.DataFrame)
# check number of samples
n_samples = 25413
error_msg = f"Incorrect number of samples: {input_df.shape[0]} =/= {n_samples}"
assert input_df.shape[0] == n_samples, error_msg
# check data feature names
input_meta_features = ["N", "wmo", "n_cycle"]
input_features = [
"sla",
"PAR",
"RHO_WN_412",
"RHO_WN_443",
"RHO_WN_490",
"RHO_WN_555",
"RHO_WN_670",
"doy_sin",
"doy_cos",
"x_cart",
"y_cart",
"z_cart",
"PC1",
"PC2",
"PC3",
"PC4",
"PC5",
"PC6",
"PC7",
"PC1.1",
"PC2.1",
"PC3.1",
"PC1.2",
"PC2.2",
"PC3.2",
"PC4.1",
]
output_features = [
"bbp",
"bbp.1",
"bbp.2",
"bbp.3",
"bbp.4",
"bbp.5",
"bbp.6",
"bbp.7",
"bbp.8",
"bbp.9",
"bbp.10",
"bbp.11",
"bbp.12",
"bbp.13",
"bbp.14",
"bbp.15",
"bbp.16",
"bbp.17",
"bbp.18",
]
features = input_meta_features + input_features + output_features
error_msg = f"Missing features in input data."
assert input_df.columns.tolist() == features, error_msg
def test_full_data():
full_df = get_full_data()
# checks - check indices match metadata
error_msg = f"Missing features in input data."
assert full_df.index.names == world_features.meta, error_msg
# checks - check column names match feature names
error_msg = f"Missing features in input data."
features = world_features.input + world_features.output
assert full_df.columns.tolist() == features, error_msg
Functions
def test_full_data()
-
Expand source code
def test_full_data(): full_df = get_full_data() # checks - check indices match metadata error_msg = f"Missing features in input data." assert full_df.index.names == world_features.meta, error_msg # checks - check column names match feature names error_msg = f"Missing features in input data." features = world_features.input + world_features.output assert full_df.columns.tolist() == features, error_msg
def test_get_input_data()
-
Expand source code
def test_get_input_data(): # get full path input_df = get_input_data() assert isinstance(input_df, pd.DataFrame) # check number of samples n_samples = 25413 error_msg = f"Incorrect number of samples: {input_df.shape[0]} =/= {n_samples}" assert input_df.shape[0] == n_samples, error_msg # check data feature names input_meta_features = ["N", "wmo", "n_cycle"] input_features = [ "sla", "PAR", "RHO_WN_412", "RHO_WN_443", "RHO_WN_490", "RHO_WN_555", "RHO_WN_670", "doy_sin", "doy_cos", "x_cart", "y_cart", "z_cart", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC1.1", "PC2.1", "PC3.1", "PC1.2", "PC2.2", "PC3.2", "PC4.1", ] output_features = [ "bbp", "bbp.1", "bbp.2", "bbp.3", "bbp.4", "bbp.5", "bbp.6", "bbp.7", "bbp.8", "bbp.9", "bbp.10", "bbp.11", "bbp.12", "bbp.13", "bbp.14", "bbp.15", "bbp.16", "bbp.17", "bbp.18", ] features = input_meta_features + input_features + output_features error_msg = f"Missing features in input data." assert input_df.columns.tolist() == features, error_msg
def test_get_meta_data()
-
Expand source code
def test_get_meta_data(): # get full path meta_df = get_meta_data() assert isinstance(meta_df, pd.DataFrame) # check number of samples n_samples = 25413 error_msg = f"Incorrect number of samples: {meta_df.shape[0]} =/= {n_samples}" assert meta_df.shape[0] == n_samples, error_msg # check meta feature names meta_features = ["wmo", "n_cycle", "N", "lon", "lat", "juld", "date"] error_msg = f"Missing features in meta data." assert meta_df.columns.tolist() == meta_features, error_msg
def test_input_data()
-
Expand source code
def test_input_data(): # get full path data_file = PATHS.data_processed.joinpath(INPUT_FILE) # assert exists error_msg = f"File '{data_file.name}' doesn't exist. Check name or directory." assert data_file.exists(), error_msg # assert meta file is a file error_msg = f"File '{data_file.name}' isn't a file. Check name or directory." assert data_file.is_file(), error_msg
def test_meta_data()
-
Expand source code
def test_meta_data(): # get full path meta_file = PATHS.data_processed.joinpath(META_FILE) # assert meta file exists error_msg = f"File '{meta_file.name}' doesn't exist. Check name or directory." assert meta_file.exists(), error_msg # assert meta file is a file error_msg = f"File '{meta_file.name}' isn't a file. Check name or directory." assert meta_file.is_file(), error_msg