Source code for PyFHD.pyfhd_tools.test_utils

from PyFHD.io.pyfhd_io import recarray_to_dict
import numpy as np
from scipy.io import readsav
from pathlib import Path
import numpy.testing as npt
from colorama import Fore
from colorama import Style
from PyFHD.io.pyfhd_io import save
from numpy.typing import NDArray



[docs]
def get_data(data_dir: Path, data_filename: str, *args: list[str]) -> list:
    """
    This function is designed to read npy or sav files in a
    data directory inside test_fhd_*. Ensure the data file
    has been made with the scripts inside the scripts directory.
    Use splitter.py to put the files and directories in the right
    format if you have used histogram runner and rebin runner.
    Paths are expected to be of data_dir/data/function_name/[data,expected]_filename.npy
    data_dir is given by pytest-datadir, it should be the directory where the test file is in.

    Parameters
    ----------
    data_dir : Path
        This should be the dir passed through from pytest-datadir
    data_filename : atr
        The name of the file for the input
    *args : list[str]
        If given, is expected to be more filenames

    Returns
    -------
    return_list: list
        Contains just the input if only one file given, otherwise, it also gives the output if other files given
    """
    # Put as Paths and read the files
    input_path = Path(data_dir, data_filename)
    if input_path.suffix == ".sav":
        input = readsav(input_path, python_dict=True)
    else:
        input = np.load(input_path, allow_pickle=True)
    if len(args) > 0:
        return_list = [input]
        for file in args:
            path = Path(data_dir, file)
            if path.suffix == ".sav":
                output = readsav(path, python_dict=True)
            else:
                output = np.load(path, allow_pickle=True)
            return_list.append(output)
        return return_list
    # Return the input and expected
    return input




[docs]
def get_data_items(data_dir: Path, data_with_item_path: Path, *args: list[str]) -> list:
    """
    Takes all the path inputs from tests and processes them so they're ready for use.

    Parameters
    ----------
    data_dir : Path
        Path to the data directory
    data_with_item_path : Path
        Path to the data that contains only an item
    *args : Paths
        Give more paths to more data with items that need to be extracted

    Returns
    -------
    return_list: list
        Variable(s) required to do the test
    """
    # Retrieve the files and their contents
    data = get_data(data_dir, data_with_item_path)
    # Get the key, then use the key to get the item
    key = list(data.item().keys())[0]
    item = data.item().get(key)
    # Process the args list if there is one
    if len(args) > 0:
        # Add to return_list
        return_list = [item]
        for path in args:
            data = get_data(data_dir, path)
            key = list(data.item().keys())[0]
            item_in_data = data.item().get(key)
            return_list.append(item_in_data)
        return return_list
    # Return them
    return item




[docs]
def get_data_sav(data_dir: Path, sav_file: Path, *args: list[Path]) -> list:
    """
    Takes all the path inputs from tests and processes them so they're ready for use.
    More specifically takes in sav files

    Parameters
    ----------
    data_dir : Path
        Path to the data directory
    sav_file : Path
        Path to the sav file, which will load a python dictionary
    args: list[Path]
        If given, is expected to be more filenames

    Returns
    -------
    return_list: list
        Contains just the data if only one file given, otherwise, it also gives the output if other files given
    """
    data = get_data(data_dir, sav_file)
    key = list(data.keys())[0]
    data = data[key]
    if len(args) > 0:
        # Add to return_list
        return_list = [data]
        for path in args:
            data = get_data(data_dir, path)
            key = list(data.keys())[0]
            data = data[key]
            return_list.append(data)
        return return_list
    return data




[docs]
def get_savs(data_dir: Path, sav_file: Path, *args: list[Path]) -> dict | list[dict]:
    """
    Takes in the path for many sav files and reads them without
    reading their keys. Assumes the sav files here have more than one key.
    If you use one sav_path only then the function acts as a wrapper for scipy's readsav.

    Parameters
    ----------
    data_dir : Path
        Path to the data directory
    sav_file : Path
        Path to the sav file, which will load a python dictionary
    args: Paths
        If given, is expected to be more filenames

    Returns
    -------
    data: dict | list[dict]
        Either a dict of one sav file or the dicts of multiple sav files
    """
    data = readsav(Path(data_dir, sav_file), python_dict=True)
    if len(args) > 0:
        data = [data]
        for file in args:
            new_data = readsav(Path(data_dir, file), python_dict=True)
            data.append(new_data)
    return data




[docs]
def try_assert_all_close(
    actual: NDArray, target: NDArray, name: str, tolerance=1e-8
) -> None:
    """
    Uses the numpy testing assert_all_close but uses a try and except wrapper around it to print
    the error instead of doing an AssertionError which stops the running of the program. This is helpful
    when doing testing with expected precision errors, but wanting to avoid stopping the program or constantly
    setting the tolerances on multiple assert statements.

    Parameters
    ----------
    actual : NDArray
        The array we calculated
    target : NDArray
        The array we actually want to calculate
    name : str
        The name of the variable we are testing
    tolerance : float, optional
        This is the tolerance for the error in absolute values, by default 1e-8
    """
    try:
        npt.assert_allclose(actual, target, atol=tolerance)
        print(
            Fore.GREEN
            + Style.BRIGHT
            + "Test Passed for {}".format(name)
            + Style.RESET_ALL
        )
    except AssertionError as error:
        print(
            Fore.RED
            + Style.BRIGHT
            + "Test Failed for {}:".format(name)
            + Style.RESET_ALL
            + "{}".format(error)
            + Style.RESET_ALL
        )




[docs]
def convert_to_h5(test_path: Path, save_path: Path, *args: list[Path]) -> None:
    """
    For every file specified as an arg, read the file from the test_path into a python dictionary.
    If it's a dict or recarray that contaisn recarrays, convert all the recarrays using recarray_to_dict.
    The files can be .npy or .sav files. The python dict will then be written into a HDF5 file for testing
    purposes.

    This function was made to convert many of the .npy and .sav files into something that can be read and written more
    easily by other packages other than numpyt or scipy.

    Parameters
    ----------
    test_path : Path
        The path to a directory with all the files inside it
    save_path : Path
        The path to the file for saving the HDF5
    *args : list[Path]
        A list of file names to be read in, can be .npy or .sav files
    """
    to_save = {}
    # Process the file differently depending on whether its IDL or numpy files
    for file in args:
        if file.endswith(".sav"):
            var = readsav(Path(test_path, file), python_dict=True)
            # Convert to nested dictionaries
            var = recarray_to_dict(var)
        elif file.endswith(".npy"):
            var = np.load(Path(test_path, file), allow_pickle=True).item()
        for key in var:
            to_save[key] = var[key]
    save(save_path, to_save, "to_save")




[docs]
def sav_file_vis_arr_swap_axes(sav_file_vis_arr: NDArray) -> NDArray:
    """After saving arrays from IDL like `vis_arr` and `vis_model_arr` into
    and IDL .sav file, and subsequently loading in via scipy.io.readsav,
    they come out in a shape/format unsuitable for PyFHD. Use this function
    to reshape into shape = (n_pol, n_freq, n_baselines)

    Parameters
    ----------
    sav_file_vis_arr : NDArray
        Array as read in by scipy.io.readsav, if `n_pol = 2` should have `shape=(2,)`

    Returns
    -------
    NDArray
        Returns the array with `shape=(n_pol, n_freq, n_baselines)`
    """

    n_pol = sav_file_vis_arr.shape[0]

    vis_arr = np.empty(
        (n_pol, sav_file_vis_arr[0].shape[1], sav_file_vis_arr[0].shape[0]),
        dtype=sav_file_vis_arr[0].dtype,
    )

    for pol in range(n_pol):
        vis_arr[pol, :, :] = sav_file_vis_arr[pol].transpose()

    return vis_arr




[docs]
def print_types(dictionary: dict, dict_name: str, indent_level: int = 1) -> None:
    """
    When generating the tests, Sometimes I'd find it useful to see the types of all the keys and value pairs inside
    the dictionary I'm manipulating. The Debug mode is helpful for this too, but this can be easily used
    inside a notebook if experimenting in there too.

    Parameters
    ----------
    dictionary : dict
        The dictionary to print the types of
    dict_name : str
        The name of the dict
    indent_level : int
        Sets the indent levels for printing as it's a recursive function, by default 1
    """
    for key in dictionary.keys():
        # Print this if it's a NumPy array
        if type(dictionary[key]) == np.ndarray:
            print(
                f"{dict_name}[{key}] : {dictionary[key].dtype} {dictionary[key].shape}\n{indent_level * 2 * ' '}Inside Type: {type(dictionary[key][0])}"
            )
            if type(dictionary[key][0]) == np.ndarray:
                print(
                    f"{indent_level * 2 * ' '}NumPy Array Dtype: {dictionary[key][0].dtype}"
                )
        # Recursively call the function on another sub dict
        elif type(dictionary[key]) == dict:
            print(f"{dict_name}[{key}]  : {type(dictionary[key])}")
            print_types(
                dictionary[key], dict_name=f"  {key}", indent_level=indent_level + 2
            )
        # If it's an object, might be useful to print the value
        elif type(dictionary[key]) == object:
            print(f"{dict_name}[{key}]  : {type(dictionary[key])}")
            print(dictionary[key])
        # Otherwise just print it out
        else:
            print(f"{dict_name}[{key}]  : {type(dictionary[key])}")