Coverage for PyFHD/io/pyfhd_io.py: 74%
203 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
1import os
2import numpy as np
3import h5py
4from logging import Logger
5from pathlib import Path
6from typing import Any
7from numpy.typing import NDArray, DTypeLike
8from scipy.io import readsav
11def dtype_picker(dtype: DTypeLike) -> type:
12 """
13 Picks the double precision type for the given dtype for saving the hdf5 file to ensure everything
14 is saved without losing information.
16 Parameters
17 ----------
18 dtype : type
19 The numpy dtype of an array
21 Returns
22 -------
23 type
24 The corresponding double precision type
25 """
26 if np.issubdtype(dtype, np.integer):
27 return np.int64
28 elif np.issubdtype(dtype, np.floating):
29 return np.float64
30 elif np.issubdtype(dtype, np.complexfloating):
31 return np.complex128
32 else:
33 # Should never get here, this should throw an error
34 return None
37@np.vectorize
38def _is_complex(value: Any) -> bool:
39 """
40 Finds if a value is complex, this works regardless of the array type
41 unlike np.iscomplex or np.iscomplexobj which can't handle object arrays.
42 This being vectorized also allows us to check this for any complex type,
43 whether it be the python complex type or a numpy complex type
45 Parameters
46 ----------
47 value : Any
48 The value to check in a NumPy array
50 Returns
51 -------
52 bool
53 True if value is a complex, False otherwise
54 """
55 return np.iscomplexobj(value)
58@np.vectorize
59def _is_string(value: Any) -> bool:
60 """
61 Finds if a value is a string or not, works regardless of the array type.
62 There is no string check available for object arrays
64 Parameters
65 ----------
66 value : Any
67 A value to check
69 Returns
70 -------
71 bool
72 True if value is a str, False otherwise
73 """
74 return isinstance(value, str)
77@np.vectorize
78def _is_none(value: Any) -> bool:
79 """
80 Checks for a none object and is vectorized to work across any numpy array
81 even if it's an object array.
83 Parameters
84 ----------
85 value : Any
86 A value to be checked if None
88 Returns
89 -------
90 bool
91 True if value is None, otherwise False
92 """
93 return value is None
96@np.vectorize
97def _decode_byte_arr(value: NDArray[np.byte]) -> str:
98 """
99 Decodes a byte string into a string
101 Parameters
102 ----------
103 value : NDArray[np.byte]
104 Value to decode
106 Returns
107 -------
108 str
109 The decoded value
110 """
111 return value.decode()
114def format_array(array: NDArray[Any]) -> NDArray[Any]:
115 """
116 Find any `None` values in an object array and replaces them with empty
117 strings if we're dealing with a string array, or `NaNs` if we're
118 dealing with a Number array. If complex, the NaN will be `nan + nanj`.
119 If a string array is found, convert the string array to a bytes array,
120 in all other cases leave the array alone as it should be ready to save
121 into a HDF5 file.
123 Parameters
124 ----------
125 array : NDArray[Any]
126 The array to find None in and if so convert from object array
128 Returns
129 -------
130 array: NDArray[Any]
131 Array without None objects and in the correct dtype
132 """
133 # Got an error with the vectorized functions on empty arrays
134 if array.size == 0 or array.dtype != object: 134 ↛ 139line 134 didn't jump to line 139 because the condition on line 134 was always true
135 if np.issubdtype(array.dtype, np.str_):
136 return array.astype(np.bytes_)
137 else:
138 return array
139 if np.any(_is_string(array)):
140 # This avoids the np.where deprecation warning
141 # Also replaces any None values in place, no copies of the array are made
142 array[array == None] = ""
143 array = array.astype(bytes)
144 else:
145 try:
146 if array.dtype == object:
147 # Replace any Nones with NaN's in place, no copies made
148 array[array == None] = np.nan
149 if np.any(_is_complex(array)):
150 # Set the type to complex128 to be sure its double precision complex
151 array = array.astype(np.complex128)
152 # Replace with complex NaNs in place
153 array[np.isnan(array.real)] = np.nan * 0j
154 else:
155 # Ensure it's a float array if we do have
156 array = array.astype(np.float64)
157 except TypeError:
158 # Sometimes we deal with structured/record arrays like
159 # astropy's FITS_rec, let's leave them alone as we intend
160 # on saving them raw
161 pass
162 return array
165def save_dataset(
166 h5py_obj: h5py.File | h5py.Group,
167 key: str,
168 value: Any,
169 to_chunk: dict[str, dict],
170 variable_lengths: [str, DTypeLike],
171 logger: Logger | None,
172) -> bool:
173 """
174 A general function for saving a dataset inside a HDF5 File or Group. It's used exclusively for saving
175 a dictionary into a HDF5 file, hence why we take a `key` and `value` pair. The `to_chunk` parameter is
176 explained in the `save` function, please look there for explanation. In the case of finding a None object
177 an Empty Dataset is saved and the is_none is returned as True, so the attribute associated with the key
178 can also be set to True to indicate to PyFHD later that the value is meant to be None when reading in the
179 dataset again.
181 Parameters
182 ----------
183 h5py_obj : h5py.File | h5py.Group
184 A h5py object that has access to the `create_dataset` and `create_group` methods
185 key : str
186 The key from the dictionary we're saving
187 value : Any
188 The value from the dictionary
189 to_chunk : dict[str, dict]
190 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary
191 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells
192 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk`
193 to True which enables h5py to guess the chunk size for you. By default {}
194 variable_lengths : dict[str, DTypeLike]
195 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is
196 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but
197 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable
198 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`,
199 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately
200 during a `create_dataset` call. By default {}
201 logger : Logger | None
202 PyFHD's Logger
204 Returns
205 -------
206 is_none : bool
207 True if the value is None, False otherwise
209 See Also
210 --------
211 PyFHD.io.pyfhd_io.save : Save a HDF5 file
212 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object
213 """
214 is_none = False
215 # Match the type
216 match value:
217 case dict():
218 group = h5py_obj.create_group(key)
219 # dict_to_group will be recursively called if there is another dict
220 # in this dict
221 dict_to_group(group, value, to_chunk, variable_lengths, logger)
222 case np.ndarray():
223 if key not in variable_lengths: 223 ↛ 230line 223 didn't jump to line 230 because the condition on line 223 was always true
224 # Find and replace all None objects
225 value = format_array(value)
226 value_dtype = dtype_picker(value.dtype)
227 else:
228 # Since we're dealing with variable length arrays, we need to use a special dtype
229 # and process each array individually
230 for i, arr in enumerate(value):
231 value[i] = format_array(arr)
232 value_dtype = variable_lengths[key]
233 # If we want it to be chunked do that, always compress it
234 if key in to_chunk: 234 ↛ 235line 234 didn't jump to line 235 because the condition on line 234 was never true
235 h5py_obj.create_dataset(
236 key,
237 shape=to_chunk[key]["shape"],
238 data=value,
239 dtype=value_dtype,
240 chunks=to_chunk[key]["chunk"],
241 compression="gzip",
242 )
243 else:
244 h5py_obj.create_dataset(
245 key,
246 shape=value.shape,
247 data=value,
248 dtype=value_dtype,
249 compression="gzip",
250 )
251 case list():
252 # Was easier to convert to a NumPy array to get vectorization
253 # Given that H5Py converts it into a NumPy array anyway, we can
254 # at least control the conversion (if we need to)
255 if key in variable_lengths: 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true
256 value = np.array(value, dtype=object)
257 for i, arr in enumerate(value):
258 value[i] = format_array(arr)
259 data_dtype = variable_lengths[key]
260 else:
261 try:
262 value = np.array(value)
263 value = format_array(value)
264 data_dtype = dtype_picker(value.dtype)
265 except ValueError as e:
266 if "inhomogeneous" in str(e):
267 logger.warning(
268 f"Failed to save {key} as an array as the list couldn't turn into a NumPy array, trying to save as a variable length array. Please add {key} to the variable_lengths dictionary in the save function in future."
269 )
270 value = np.array(value, dtype=object)
271 for i, arr in enumerate(value):
272 value[i] = format_array(arr)
273 data_dtype = h5py.vlen_dtype(dtype_picker(value[0].dtype))
274 else:
275 logger.info(
276 f"You received an error not related to the array being inhomogeneous, Here's the error: {e}"
277 )
278 h5py_obj.create_dataset(
279 key, data=value, dtype=data_dtype, compression="gzip"
280 )
281 case Path(): 281 ↛ 283line 281 didn't jump to line 283 because the pattern on line 281 never matched
282 # If we find a Path object, convert it to a string
283 value = str(value)
284 h5py_obj.create_dataset(key, data=value)
285 case None:
286 is_none = True
287 # In the case we get something that is none, create empty dataset
288 h5py_obj.create_dataset(key, dtype="b")
289 case _:
290 try:
291 # Store the value in a single size dataset, used for ints, floats, strings etc
292 h5py_obj.create_dataset(key, data=value)
293 except ValueError:
294 if logger is not None:
295 logger.error(
296 f"Failed to save {key}, the type of key was {type(value)}"
297 )
298 return is_none
301def dict_to_group(
302 group: h5py.Group,
303 to_convert: dict,
304 to_chunk: dict[str, dict],
305 variable_lengths: dict[str, DTypeLike],
306 logger: Logger | None,
307) -> None:
308 """
309 Converts a dictionary to a HDF5 group. This is called in the event a dictionary is found inside
310 a dictionary that is being saved in a HDF5 file. Creates a subgroup for the hdf5 file with everything
311 turning into individual datasets.
313 Parameters
314 ----------
315 group : h5py.Group
316 The created group to save the dictionary in
317 to_convert : dict
318 The dictionary to save into the group
319 to_chunk : dict[str, dict]
320 The chunking dictionary, see `save` for more information
321 variable_lengths : dict[str, DTypeLike]
322 The variable length dictionary, see `save` for more information
323 logger : Logger
324 PyFHD's Logger
326 See Also
327 --------
328 PyFHD.io.pyfhd_io.save : Save a HDF5 file
329 """
330 for key in to_convert:
331 group.attrs[key] = save_dataset(
332 group, key, to_convert[key], to_chunk, variable_lengths, logger
333 )
336def save(
337 file_name: Path,
338 to_save: NDArray[Any] | dict,
339 dataset_name: str,
340 logger: Logger | None = None,
341 to_chunk: dict[str, dict] = {},
342 variable_lengths: dict[str, DTypeLike] = {},
343) -> None:
344 """
345 Saves a numpy array or dictionary into a hdf5 file using h5py, with compression applied to all arrays/datasets.
346 An array will be saved as a single dataset, while a dictionary will be saved where each key will be a dataset
347 unless the key points a dictionary in which case a group will be created and `dict_to_group` called to turn each
348 key in that sub dict into a dataset (or another group if it's another sub dictionary). This function should be
349 kept as general as possible, if something needs formatting for saving, format it before calling this function.
350 If you are converting a sav file to hdf5 with this function, use `recarray_to_dict` which converts the sav output
351 from readsav into a proper python dictionary (rather than recarrays or weird array shapes, objects arrays etc.)
353 Parameters
354 ----------
355 file_name : Path
356 The file to save as hdf5 should be /path/to/file_name.h5 (or .hdf5)
357 to_save : NDArray[Any] | dict
358 The dictionary or numpy array to save into the hdf5 file
359 dataset_name : str
360 Used in the case that the to_save variable is an array, this name will
361 be used as the key for the dataset in the hdf5 file.
362 logger : Logger, optional
363 PyFHD's Logger, by default None (in case you don't want to use the logger for testing)
364 to_chunk : dict[str, dict], optional
365 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary
366 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells
367 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk`
368 to True which enables h5py to guess the chunk size for you. By default {}
369 variable_lengths : dict[str, DTypeLike], optional
370 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is
371 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but
372 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable
373 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`,
374 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately
375 during a `create_dataset` call. By default {}
378 See Also
379 --------
380 PyFHD.io.pyfhd_io.load : Load a HDF5 file
381 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object
382 PyFHD.io.pyfhd_io.recarray_to_dict : Turns any record arrays into dicts, also formats object arrays into the correct dtype array
383 PyFHD.io.pyfhd_io.save_dataset : Saves a single dataset based off a dictionary key-value pair
384 PyFHD.io.pyfhd_io.format_array : Finds any None is an array and replaces them appropriately
385 """
386 # Create a custom vectorized function to check for complex numbers
387 # is_complex_vectorized = np.vectorize(is_complex)
388 with h5py.File(file_name, "w") as h5_file:
389 match to_save:
390 case np.ndarray():
391 if logger: 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true
392 logger.info(f"Writing the {dataset_name} array to {file_name}")
393 h5_file.attrs[dataset_name] = save_dataset(
394 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger
395 )
396 case dict(): 396 ↛ 407line 396 didn't jump to line 407 because the pattern on line 396 always matched
397 if logger:
398 logger.info(
399 f"Writing the {dataset_name} dict to {file_name}, each key will be a dataset, if the key contains a dict then it will be a group."
400 )
401 for key in to_save:
402 # We're using the attributes as a mask, where if True then we know
403 # the dataset is representing a None object.
404 h5_file.attrs[key] = save_dataset(
405 h5_file, key, to_save[key], to_chunk, variable_lengths, logger
406 )
407 case _:
408 h5_file.attrs[dataset_name] = save_dataset(
409 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger
410 )
411 if logger:
412 logger.warning(
413 "Not a dict or numpy array, PyFHD won't write other types at this time, refer to PyFHD.io.pyfhd_io.save to see what is supported"
414 )
417def load_dataset(
418 h5py_obj: h5py.File | h5py.Group, key: str, dataset: h5py.Dataset
419) -> Any:
420 """
421 Loads a single dataset from a HDF5 File or Group, the key here is the dataset name from the
422 file or group and is only used to check the attributes of said file or group. If the attribute
423 associated with the key is True, then we assume the value saved is an empty dataset and we should
424 return None. If this is False, load the value and check if this value should be a single value. There
425 are special checks for byte arrays, if there is byte arrays, PyFHD assumes these are meant to be strings.
427 Parameters
428 ----------
429 h5py_obj : h5py.File | h5py.Group
430 A HDF5 file or group
431 key : str
432 The dataset name
433 dataset : h5py.Dataset
434 The dataset we are loading
436 Returns
437 -------
438 Any
439 The value stored in the HDF5 Dataset
441 See Also
442 --------
443 PyFHD.io.pyfhd_io.load : Load a HDF5 file
444 """
445 # If the corresponding attribute is True set the current
446 # key to None as its an empty dataset
447 if h5py_obj.attrs[key]:
448 return None
449 else:
450 if dataset.shape == ():
451 value = dataset[()]
452 else:
453 value = dataset[:]
454 if isinstance(value, np.ndarray) and value.dtype.kind == "S":
455 value = _decode_byte_arr(value)
456 if isinstance(value, bytes):
457 value = value.decode()
458 return value
461def group_to_dict(group: h5py.Group) -> dict:
462 """
463 When loading a hdf5 file into a dictionary, this turns a group into a dictionary,
464 and then returns the dictionary.
466 Parameters
467 ----------
468 group : h5py.Group
469 A h5py group to turn into a dictionary
471 Returns
472 -------
473 return_dict: dict
474 The group turned into a dictionary
475 """
476 return_dict = {}
477 for key in group:
478 match group[key]:
479 case h5py.Dataset():
480 return_dict[key] = load_dataset(group, key, group[key])
481 case h5py.Group(): 481 ↛ 477line 481 didn't jump to line 477 because the pattern on line 481 always matched
482 return_dict[key] = group_to_dict(group[key])
483 return return_dict
486def load(
487 file_name: Path, logger: Logger | None = None, lazy_load: bool = False
488) -> dict[str, object] | NDArray[Any] | h5py.File:
489 """
490 Loads a HDF5 file into PyFHD, it reads the HDF5 into an array if the
491 HDF5 file contains a single dataset, while a HDF5 which contains multiple
492 datasets will load them into a dictionary. Any groups will be convered to
493 sub dictionaries using `group_to_dict`
495 Parameters
496 ----------
497 file_name : Path
498 The /path/to/the/hdf5.h5
499 logger : Logger
500 PyFHD's Logger
501 lazy_load : bool, optional
502 Set to true if you wish to lazy load the file, currently the only file that will be
503 supported to do this in PyFHD will be the beam/psf file, but support for other files can
504 be done easily enough, by default False
507 Returns
508 -------
509 return_dict | array | h5_file: dict[str, object] | NDArray[Any] | h5py.File
510 Returns a dict in the case the HDF5 file contains multple datasets,
511 An array if the HDF5 contains one dataset or h5py File object if the
512 file is lazy loaded to conserve memory.
514 See Also
515 --------
516 PyFHD.io.pyfhd_io.save : Save a HDF5 file
517 PyFHD.io.pyfhd_io.group_to_dict : Converts a h5py Group object to a dictionary
518 """
519 h5_file = h5py.File(file_name, "r")
520 if lazy_load:
521 return h5_file
522 try:
523 if len(h5_file.keys()) == 1:
524 # Assume that it contains only one numpy array, in which case read the array
525 key = list(h5_file.keys())[0]
526 if logger: 526 ↛ 527line 526 didn't jump to line 527 because the condition on line 526 was never true
527 logger.info(f"Loading {key} from {file_name} into an array")
528 array = load_dataset(h5_file, key, h5_file[key])
529 return array
530 else:
531 return_dict = {}
532 if logger:
533 logger.info(f"Loading {file_name} into a dictionary")
534 for key in h5_file:
535 match h5_file[key]:
536 case h5py.Dataset():
537 return_dict[key] = load_dataset(h5_file, key, h5_file[key])
538 case h5py.Group(): 538 ↛ 534line 538 didn't jump to line 534 because the pattern on line 538 always matched
539 return_dict[key] = group_to_dict(h5_file[key])
540 return return_dict
541 finally:
542 if not lazy_load:
543 h5_file.close()
546def recarray_to_dict(data: np.recarray | dict) -> dict:
547 """
548 Turns a record array into a dict, but does it as a deep convert. This was needed due to scipy's readsav
549 returning an inception like experience of record arrays. This would mean to access values from something
550 like the obs structure for a test, the code had to be obs[0]['baseline_info'][0]['tile_a'], which was became
551 untenable as the full python translation won't require these leaving us two codebases for IDL compatible and
552 Python compatible. Instead, this function turns all record arrays into dictionaries, which are easier to understand
553 and are faster.
555 This was made specifically to work with the readsav function, to get compatibility with general recarrays remove the
556 zero index, as readsav for some reason adds a single dimension to all recarrays.
558 This was updated later to also take a dictionary which may contain record arrays too.
560 This was also updated later to turn object arrays into multidimensional arrays if they can be one. In the
561 case the object array couldn't be turned into a multidimensional array it was turned into a list
563 Parameters
564 ----------
565 data : np.recarray or dict
566 A record array or dictionary maybe containing nested record arrays
568 Returns
569 -------
570 data: dict
571 A potentially nested dictionaries of dictionaries
572 """
573 # Convert the original record array into a dictionary
574 if type(data) == np.recarray:
575 data = {name.lower(): data[name] for name in data.dtype.names}
576 # For every key, if it's a record array, recursively call the function
577 for key in data:
578 # Every now and then you do get object arrays that contain only one element or arrays that contain only one element
579 # These are not useful so I will extract the element out
580 if type(data[key]) == np.ndarray and data[key].size == 1:
581 data[key] = data[key][0]
582 # Sometimes the recarray is in a standard numpy object array and other times its not for some reason...
583 if type(data[key]) == np.recarray:
584 data[key] = recarray_to_dict(data[key])
585 elif type(data[key]) == np.ndarray and type(data[key][0]) == np.recarray: 585 ↛ 586line 585 didn't jump to line 586 because the condition on line 585 was never true
586 data[key] = recarray_to_dict(data[key][0])
587 # We found a single array with only None
588 elif type(data[key]) == np.ndarray and isinstance(data[key][0], type(None)):
589 # Get all the None values and turn them into NaNs
590 none_values = np.where(data[key] == None)
591 if np.size(none_values) > 0: 591 ↛ 595line 591 didn't jump to line 595 because the condition on line 591 was always true
592 data[key][none_values] = np.nan
593 # If all of the values were None, then set the array dtype to float64
594 # (as we don't know what dtype it actually was), probably only relevant for testing
595 if np.size(none_values) == np.size(data[key]): 595 ↛ 577line 595 didn't jump to line 577 because the condition on line 595 was always true
596 data[key] = data[key].astype(np.float64)
597 # Assume we found a string array since it's bytes, convert to a string list
598 elif type(data[key]) == np.ndarray and isinstance(data[key].flat[0], bytes):
599 data[key] = [x.decode().strip() for x in data[key]]
600 # Found only bytes, assume it's a string, convert the string
601 elif isinstance(data[key], bytes):
602 data[key] = data[key].decode()
603 # You can also get object arrays which themselves contain numpy arrays, it's best to turn these
604 # into multidimensional arrays. If they can't turn into multidimensional arrays due to them
605 # being different types or not of the same size then it will convert the numpy object array
606 # into a list of objects instead.
607 elif (
608 type(data[key]) == np.ndarray
609 and data[key].dtype == object
610 and type(data[key][0]) == np.ndarray
611 ):
612 try:
613 # Get all the None values and turn them into NaNs
614 none_values = np.nonzero(_is_none(data[key]))
615 if np.size(none_values) > 0:
616 data[key][none_values] = np.nan
617 # If all of the values were None, then set the array dtype to float64
618 # (as we don't know what dtype it actually was), probably only relevant for testing
619 if (np.size(none_values) // len(data[key].shape)) == np.size(data[key]): 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true
620 data[key] = data[key].astype(np.float64)
621 # If it's not an object array, numpy will stack the axes, which isn't desired here
622 # as we want to maintain the multidimensional nature of the data. So we'll create an
623 # array of the desired size using the shape of the first element.
624 elif data[key][0].dtype != object:
625 new_array = np.empty(
626 [data[key].size, *data[key][0].shape], dtype=data[key][0].dtype
627 )
628 for idx in range(new_array.shape[0]):
629 new_array[idx] = data[key][idx]
630 data[key] = new_array
631 else:
632 # For an object array you can flatten it, and stack all inner arrays together until it's not an object array
633 # Crucially this assumes the array not as an object array can fit in memory! If you're doing the beam_ptr
634 # conversion take this into consideration
635 while data[key].dtype == object:
636 data[key] = np.vstack(data[key].flatten()).reshape(
637 list(data[key].shape) + list(data[key].flat[0].shape)
638 )
639 except ValueError:
640 data[key] = list(x for x in data[key])
641 return data
644def convert_sav_to_dict(sav_path: str, logger: Logger, tmp_dir="temp_pyfhd"):
645 """
646 Given a path to an IDL style .sav file, load into a python dictionary
647 using scipy.io.readsav.
649 If the file was saved with the IDL /compress option, the readsav function
650 has to save a decompressed version of the file. By default this uses
651 the tempfile module to find a location, but this usually finds a bad
652 location with little storage when called on a super cluster. So explicitly
653 make our own temp dir `tmp_pyfhd` where the code is being called. It is
654 assumed many files are to be converted, so `tmp_pyfhd` should be deleted
655 after all calls.
657 Mostly used just for testing, if you;re not a developer you can safely ignore this function
659 Parameters
660 ----------
661 sav_path : str
662 Filepath for an IDL .sav file
663 logger : Logger
664 The logger to output any error messages to
665 tmp_dir : str
666 Dir to place temporary files, creates the directory if doesn't exist.
667 Default: "tmp_pyfhd".
669 Returns
670 --------
671 sav_dict : dict
672 Dictionary containing whatever was in the .sav file
674 """
676 if os.path.isfile(sav_path): 676 ↛ 692line 676 didn't jump to line 692 because the condition on line 676 was always true
677 # logger.info(f"{sav_path} found, converting now.")
679 # Ensure the tmp dir exists, create if not
680 os.makedirs(tmp_dir, exist_ok=True)
682 # Strip off any leading path to leave just the file name
683 temp_name = f"{tmp_dir}/{sav_path.split('/')[-1]}"
685 # Load into a dictionary, decompressed and saving a temporary file if need
686 # be
687 sav_dict = readsav(sav_path, python_dict=True, uncompressed_file_name=temp_name)
689 return sav_dict
690 else:
691 # sys.exit(f"{sav_path} does not exist. Cannot grid so exiting")
692 logger.error(f"{sav_path} doesn't exist, please check your input path")
694 for handler in logger.handlers:
695 handler.close()
696 exit()