Coverage for PyFHD/io/pyfhd

1import os

2import numpy as np

3import h5py

4from logging import Logger

5from pathlib import Path

6from typing import Any

7from numpy.typing import NDArray, DTypeLike

8from scipy.io import readsav

11def dtype_picker(dtype: DTypeLike) -> type:

12 """

13 Picks the double precision type for the given dtype for saving the hdf5 file to ensure everything

14 is saved without losing information.

16 Parameters

17 ----------

18 dtype : type

19 The numpy dtype of an array

21 Returns

22 -------

23 type

24 The corresponding double precision type

25 """

26 if np.issubdtype(dtype, np.integer):

27 return np.int64

28 elif np.issubdtype(dtype, np.floating):

29 return np.float64

30 elif np.issubdtype(dtype, np.complexfloating):

31 return np.complex128

32 else:

33 # Should never get here, this should throw an error

34 return None

37@np.vectorize

38def _is_complex(value: Any) -> bool:

39 """

40 Finds if a value is complex, this works regardless of the array type

41 unlike np.iscomplex or np.iscomplexobj which can't handle object arrays.

42 This being vectorized also allows us to check this for any complex type,

43 whether it be the python complex type or a numpy complex type

45 Parameters

46 ----------

47 value : Any

48 The value to check in a NumPy array

50 Returns

51 -------

52 bool

53 True if value is a complex, False otherwise

54 """

55 return np.iscomplexobj(value)

58@np.vectorize

59def _is_string(value: Any) -> bool:

60 """

61 Finds if a value is a string or not, works regardless of the array type.

62 There is no string check available for object arrays

64 Parameters

65 ----------

66 value : Any

67 A value to check

69 Returns

70 -------

71 bool

72 True if value is a str, False otherwise

73 """

74 return isinstance(value, str)

77@np.vectorize

78def _is_none(value: Any) -> bool:

79 """

80 Checks for a none object and is vectorized to work across any numpy array

81 even if it's an object array.

83 Parameters

84 ----------

85 value : Any

86 A value to be checked if None

88 Returns

89 -------

90 bool

91 True if value is None, otherwise False

92 """

93 return value is None

96@np.vectorize

97def _decode_byte_arr(value: NDArray[np.byte]) -> str:

98 """

99 Decodes a byte string into a string

100

101 Parameters

102 ----------

103 value : NDArray[np.byte]

104 Value to decode

105

106 Returns

107 -------

108 str

109 The decoded value

110 """

111 return value.decode()

112

113

114def format_array(array: NDArray[Any]) -> NDArray[Any]:

115 """

116 Find any `None` values in an object array and replaces them with empty

117 strings if we're dealing with a string array, or `NaNs` if we're

118 dealing with a Number array. If complex, the NaN will be `nan + nanj`.

119 If a string array is found, convert the string array to a bytes array,

120 in all other cases leave the array alone as it should be ready to save

121 into a HDF5 file.

122

123 Parameters

124 ----------

125 array : NDArray[Any]

126 The array to find None in and if so convert from object array

127

128 Returns

129 -------

130 array: NDArray[Any]

131 Array without None objects and in the correct dtype

132 """

133 # Got an error with the vectorized functions on empty arrays

134 if array.size == 0 or array.dtype != object: 134 ↛ 139line 134 didn't jump to line 139 because the condition on line 134 was always true

135 if np.issubdtype(array.dtype, np.str_):

136 return array.astype(np.bytes_)

137 else:

138 return array

139 if np.any(_is_string(array)):

140 # This avoids the np.where deprecation warning

141 # Also replaces any None values in place, no copies of the array are made

142 array[array == None] = ""

143 array = array.astype(bytes)

144 else:

145 try:

146 if array.dtype == object:

147 # Replace any Nones with NaN's in place, no copies made

148 array[array == None] = np.nan

149 if np.any(_is_complex(array)):

150 # Set the type to complex128 to be sure its double precision complex

151 array = array.astype(np.complex128)

152 # Replace with complex NaNs in place

153 array[np.isnan(array.real)] = np.nan * 0j

154 else:

155 # Ensure it's a float array if we do have

156 array = array.astype(np.float64)

157 except TypeError:

158 # Sometimes we deal with structured/record arrays like

159 # astropy's FITS_rec, let's leave them alone as we intend

160 # on saving them raw

161 pass

162 return array

163

164

165def save_dataset(

166 h5py_obj: h5py.File | h5py.Group,

167 key: str,

168 value: Any,

169 to_chunk: dict[str, dict],

170 variable_lengths: [str, DTypeLike],

171 logger: Logger | None,

172) -> bool:

173 """

174 A general function for saving a dataset inside a HDF5 File or Group. It's used exclusively for saving

175 a dictionary into a HDF5 file, hence why we take a `key` and `value` pair. The `to_chunk` parameter is

176 explained in the `save` function, please look there for explanation. In the case of finding a None object

177 an Empty Dataset is saved and the is_none is returned as True, so the attribute associated with the key

178 can also be set to True to indicate to PyFHD later that the value is meant to be None when reading in the

179 dataset again.

180

181 Parameters

182 ----------

183 h5py_obj : h5py.File | h5py.Group

184 A h5py object that has access to the `create_dataset` and `create_group` methods

185 key : str

186 The key from the dictionary we're saving

187 value : Any

188 The value from the dictionary

189 to_chunk : dict[str, dict]

190 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary

191 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells

192 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk`

193 to True which enables h5py to guess the chunk size for you. By default {}

194 variable_lengths : dict[str, DTypeLike]

195 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is

196 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but

197 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable

198 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`,

199 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately

200 during a `create_dataset` call. By default {}

201 logger : Logger | None

202 PyFHD's Logger

203

204 Returns

205 -------

206 is_none : bool

207 True if the value is None, False otherwise

208

209 See Also

210 --------

211 PyFHD.io.pyfhd_io.save : Save a HDF5 file

212 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object

213 """

214 is_none = False

215 # Match the type

216 match value:

217 case dict():

218 group = h5py_obj.create_group(key)

219 # dict_to_group will be recursively called if there is another dict

220 # in this dict

221 dict_to_group(group, value, to_chunk, variable_lengths, logger)

222 case np.ndarray():

223 if key not in variable_lengths: 223 ↛ 230line 223 didn't jump to line 230 because the condition on line 223 was always true

224 # Find and replace all None objects

225 value = format_array(value)

226 value_dtype = dtype_picker(value.dtype)

227 else:

228 # Since we're dealing with variable length arrays, we need to use a special dtype

229 # and process each array individually

230 for i, arr in enumerate(value):

231 value[i] = format_array(arr)

232 value_dtype = variable_lengths[key]

233 # If we want it to be chunked do that, always compress it

234 if key in to_chunk: 234 ↛ 235line 234 didn't jump to line 235 because the condition on line 234 was never true

235 h5py_obj.create_dataset(

236 key,

237 shape=to_chunk[key]["shape"],

238 data=value,

239 dtype=value_dtype,

240 chunks=to_chunk[key]["chunk"],

241 compression="gzip",

242 )

243 else:

244 h5py_obj.create_dataset(

245 key,

246 shape=value.shape,

247 data=value,

248 dtype=value_dtype,

249 compression="gzip",

250 )

251 case list():

252 # Was easier to convert to a NumPy array to get vectorization

253 # Given that H5Py converts it into a NumPy array anyway, we can

254 # at least control the conversion (if we need to)

255 if key in variable_lengths: 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true

256 value = np.array(value, dtype=object)

257 for i, arr in enumerate(value):

258 value[i] = format_array(arr)

259 data_dtype = variable_lengths[key]

260 else:

261 try:

262 value = np.array(value)

263 value = format_array(value)

264 data_dtype = dtype_picker(value.dtype)

265 except ValueError as e:

266 if "inhomogeneous" in str(e):

267 logger.warning(

268 f"Failed to save {key} as an array as the list couldn't turn into a NumPy array, trying to save as a variable length array. Please add {key} to the variable_lengths dictionary in the save function in future."

269 )

270 value = np.array(value, dtype=object)

271 for i, arr in enumerate(value):

272 value[i] = format_array(arr)

273 data_dtype = h5py.vlen_dtype(dtype_picker(value[0].dtype))

274 else:

275 logger.info(

276 f"You received an error not related to the array being inhomogeneous, Here's the error: {e}"

277 )

278 h5py_obj.create_dataset(

279 key, data=value, dtype=data_dtype, compression="gzip"

280 )

281 case Path(): 281 ↛ 283line 281 didn't jump to line 283 because the pattern on line 281 never matched

282 # If we find a Path object, convert it to a string

283 value = str(value)

284 h5py_obj.create_dataset(key, data=value)

285 case None:

286 is_none = True

287 # In the case we get something that is none, create empty dataset

288 h5py_obj.create_dataset(key, dtype="b")

289 case _:

290 try:

291 # Store the value in a single size dataset, used for ints, floats, strings etc

292 h5py_obj.create_dataset(key, data=value)

293 except ValueError:

294 if logger is not None:

295 logger.error(

296 f"Failed to save {key}, the type of key was {type(value)}"

297 )

298 return is_none

299

300

301def dict_to_group(

302 group: h5py.Group,

303 to_convert: dict,

304 to_chunk: dict[str, dict],

305 variable_lengths: dict[str, DTypeLike],

306 logger: Logger | None,

307) -> None:

308 """

309 Converts a dictionary to a HDF5 group. This is called in the event a dictionary is found inside

310 a dictionary that is being saved in a HDF5 file. Creates a subgroup for the hdf5 file with everything

311 turning into individual datasets.

312

313 Parameters

314 ----------

315 group : h5py.Group

316 The created group to save the dictionary in

317 to_convert : dict

318 The dictionary to save into the group

319 to_chunk : dict[str, dict]

320 The chunking dictionary, see `save` for more information

321 variable_lengths : dict[str, DTypeLike]

322 The variable length dictionary, see `save` for more information

323 logger : Logger

324 PyFHD's Logger

325

326 See Also

327 --------

328 PyFHD.io.pyfhd_io.save : Save a HDF5 file

329 """

330 for key in to_convert:

331 group.attrs[key] = save_dataset(

332 group, key, to_convert[key], to_chunk, variable_lengths, logger

333 )

334

335

336def save(

337 file_name: Path,

338 to_save: NDArray[Any] | dict,

339 dataset_name: str,

340 logger: Logger | None = None,

341 to_chunk: dict[str, dict] = {},

342 variable_lengths: dict[str, DTypeLike] = {},

343) -> None:

344 """

345 Saves a numpy array or dictionary into a hdf5 file using h5py, with compression applied to all arrays/datasets.

346 An array will be saved as a single dataset, while a dictionary will be saved where each key will be a dataset

347 unless the key points a dictionary in which case a group will be created and `dict_to_group` called to turn each

348 key in that sub dict into a dataset (or another group if it's another sub dictionary). This function should be

349 kept as general as possible, if something needs formatting for saving, format it before calling this function.

350 If you are converting a sav file to hdf5 with this function, use `recarray_to_dict` which converts the sav output

351 from readsav into a proper python dictionary (rather than recarrays or weird array shapes, objects arrays etc.)

352

353 Parameters

354 ----------

355 file_name : Path

356 The file to save as hdf5 should be /path/to/file_name.h5 (or .hdf5)

357 to_save : NDArray[Any] | dict

358 The dictionary or numpy array to save into the hdf5 file

359 dataset_name : str

360 Used in the case that the to_save variable is an array, this name will

361 be used as the key for the dataset in the hdf5 file.

362 logger : Logger, optional

363 PyFHD's Logger, by default None (in case you don't want to use the logger for testing)

364 to_chunk : dict[str, dict], optional

365 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary

366 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells

367 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk`

368 to True which enables h5py to guess the chunk size for you. By default {}

369 variable_lengths : dict[str, DTypeLike], optional

370 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is

371 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but

372 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable

373 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`,

374 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately

375 during a `create_dataset` call. By default {}

376

377

378 See Also

379 --------

380 PyFHD.io.pyfhd_io.load : Load a HDF5 file

381 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object

382 PyFHD.io.pyfhd_io.recarray_to_dict : Turns any record arrays into dicts, also formats object arrays into the correct dtype array

383 PyFHD.io.pyfhd_io.save_dataset : Saves a single dataset based off a dictionary key-value pair

384 PyFHD.io.pyfhd_io.format_array : Finds any None is an array and replaces them appropriately

385 """

386 # Create a custom vectorized function to check for complex numbers

387 # is_complex_vectorized = np.vectorize(is_complex)

388 with h5py.File(file_name, "w") as h5_file:

389 match to_save:

390 case np.ndarray():

391 if logger: 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true

392 logger.info(f"Writing the {dataset_name} array to {file_name}")

393 h5_file.attrs[dataset_name] = save_dataset(

394 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger

395 )

396 case dict(): 396 ↛ 407line 396 didn't jump to line 407 because the pattern on line 396 always matched

397 if logger:

398 logger.info(

399 f"Writing the {dataset_name} dict to {file_name}, each key will be a dataset, if the key contains a dict then it will be a group."

400 )

401 for key in to_save:

402 # We're using the attributes as a mask, where if True then we know

403 # the dataset is representing a None object.

404 h5_file.attrs[key] = save_dataset(

405 h5_file, key, to_save[key], to_chunk, variable_lengths, logger

406 )

407 case _:

408 h5_file.attrs[dataset_name] = save_dataset(

409 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger

410 )

411 if logger:

412 logger.warning(

413 "Not a dict or numpy array, PyFHD won't write other types at this time, refer to PyFHD.io.pyfhd_io.save to see what is supported"

414 )

415

416

417def load_dataset(

418 h5py_obj: h5py.File | h5py.Group, key: str, dataset: h5py.Dataset

419) -> Any:

420 """

421 Loads a single dataset from a HDF5 File or Group, the key here is the dataset name from the

422 file or group and is only used to check the attributes of said file or group. If the attribute

423 associated with the key is True, then we assume the value saved is an empty dataset and we should

424 return None. If this is False, load the value and check if this value should be a single value. There

425 are special checks for byte arrays, if there is byte arrays, PyFHD assumes these are meant to be strings.

426

427 Parameters

428 ----------

429 h5py_obj : h5py.File | h5py.Group

430 A HDF5 file or group

431 key : str

432 The dataset name

433 dataset : h5py.Dataset

434 The dataset we are loading

435

436 Returns

437 -------

438 Any

439 The value stored in the HDF5 Dataset

440

441 See Also

442 --------

443 PyFHD.io.pyfhd_io.load : Load a HDF5 file

444 """

445 # If the corresponding attribute is True set the current

446 # key to None as its an empty dataset

447 if h5py_obj.attrs[key]:

448 return None

449 else:

450 if dataset.shape == ():

451 value = dataset[()]

452 else:

453 value = dataset[:]

454 if isinstance(value, np.ndarray) and value.dtype.kind == "S":

455 value = _decode_byte_arr(value)

456 if isinstance(value, bytes):

457 value = value.decode()

458 return value

459

460

461def group_to_dict(group: h5py.Group) -> dict:

462 """

463 When loading a hdf5 file into a dictionary, this turns a group into a dictionary,

464 and then returns the dictionary.

465

466 Parameters

467 ----------

468 group : h5py.Group

469 A h5py group to turn into a dictionary

470

471 Returns

472 -------

473 return_dict: dict

474 The group turned into a dictionary

475 """

476 return_dict = {}

477 for key in group:

478 match group[key]:

479 case h5py.Dataset():

480 return_dict[key] = load_dataset(group, key, group[key])

481 case h5py.Group(): 481 ↛ 477line 481 didn't jump to line 477 because the pattern on line 481 always matched

482 return_dict[key] = group_to_dict(group[key])

483 return return_dict

484

485

486def load(

487 file_name: Path, logger: Logger | None = None, lazy_load: bool = False

488) -> dict[str, object] | NDArray[Any] | h5py.File:

489 """

490 Loads a HDF5 file into PyFHD, it reads the HDF5 into an array if the

491 HDF5 file contains a single dataset, while a HDF5 which contains multiple

492 datasets will load them into a dictionary. Any groups will be convered to

493 sub dictionaries using `group_to_dict`

494

495 Parameters

496 ----------

497 file_name : Path

498 The /path/to/the/hdf5.h5

499 logger : Logger

500 PyFHD's Logger

501 lazy_load : bool, optional

502 Set to true if you wish to lazy load the file, currently the only file that will be

503 supported to do this in PyFHD will be the beam/psf file, but support for other files can

504 be done easily enough, by default False

505

506

507 Returns

508 -------

509 return_dict | array | h5_file: dict[str, object] | NDArray[Any] | h5py.File

510 Returns a dict in the case the HDF5 file contains multple datasets,

511 An array if the HDF5 contains one dataset or h5py File object if the

512 file is lazy loaded to conserve memory.

513

514 See Also

515 --------

516 PyFHD.io.pyfhd_io.save : Save a HDF5 file

517 PyFHD.io.pyfhd_io.group_to_dict : Converts a h5py Group object to a dictionary

518 """

519 h5_file = h5py.File(file_name, "r")

520 if lazy_load:

521 return h5_file

522 try:

523 if len(h5_file.keys()) == 1:

524 # Assume that it contains only one numpy array, in which case read the array

525 key = list(h5_file.keys())[0]

526 if logger: 526 ↛ 527line 526 didn't jump to line 527 because the condition on line 526 was never true

527 logger.info(f"Loading {key} from {file_name} into an array")

528 array = load_dataset(h5_file, key, h5_file[key])

529 return array

530 else:

531 return_dict = {}

532 if logger:

533 logger.info(f"Loading {file_name} into a dictionary")

534 for key in h5_file:

535 match h5_file[key]:

536 case h5py.Dataset():

537 return_dict[key] = load_dataset(h5_file, key, h5_file[key])

538 case h5py.Group(): 538 ↛ 534line 538 didn't jump to line 534 because the pattern on line 538 always matched

539 return_dict[key] = group_to_dict(h5_file[key])

540 return return_dict

541 finally:

542 if not lazy_load:

543 h5_file.close()

544

545

546def recarray_to_dict(data: np.recarray | dict) -> dict:

547 """

548 Turns a record array into a dict, but does it as a deep convert. This was needed due to scipy's readsav

549 returning an inception like experience of record arrays. This would mean to access values from something

550 like the obs structure for a test, the code had to be obs[0]['baseline_info'][0]['tile_a'], which was became

551 untenable as the full python translation won't require these leaving us two codebases for IDL compatible and

552 Python compatible. Instead, this function turns all record arrays into dictionaries, which are easier to understand

553 and are faster.

554

555 This was made specifically to work with the readsav function, to get compatibility with general recarrays remove the

556 zero index, as readsav for some reason adds a single dimension to all recarrays.

557

558 This was updated later to also take a dictionary which may contain record arrays too.

559

560 This was also updated later to turn object arrays into multidimensional arrays if they can be one. In the

561 case the object array couldn't be turned into a multidimensional array it was turned into a list

562

563 Parameters

564 ----------

565 data : np.recarray or dict

566 A record array or dictionary maybe containing nested record arrays

567

568 Returns

569 -------

570 data: dict

571 A potentially nested dictionaries of dictionaries

572 """

573 # Convert the original record array into a dictionary

574 if type(data) == np.recarray:

575 data = {name.lower(): data[name] for name in data.dtype.names}

576 # For every key, if it's a record array, recursively call the function

577 for key in data:

578 # Every now and then you do get object arrays that contain only one element or arrays that contain only one element

579 # These are not useful so I will extract the element out

580 if type(data[key]) == np.ndarray and data[key].size == 1:

581 data[key] = data[key][0]

582 # Sometimes the recarray is in a standard numpy object array and other times its not for some reason...

583 if type(data[key]) == np.recarray:

584 data[key] = recarray_to_dict(data[key])

585 elif type(data[key]) == np.ndarray and type(data[key][0]) == np.recarray: 585 ↛ 586line 585 didn't jump to line 586 because the condition on line 585 was never true

586 data[key] = recarray_to_dict(data[key][0])

587 # We found a single array with only None

588 elif type(data[key]) == np.ndarray and isinstance(data[key][0], type(None)):

589 # Get all the None values and turn them into NaNs

590 none_values = np.where(data[key] == None)

591 if np.size(none_values) > 0: 591 ↛ 595line 591 didn't jump to line 595 because the condition on line 591 was always true

592 data[key][none_values] = np.nan

593 # If all of the values were None, then set the array dtype to float64

594 # (as we don't know what dtype it actually was), probably only relevant for testing

595 if np.size(none_values) == np.size(data[key]): 595 ↛ 577line 595 didn't jump to line 577 because the condition on line 595 was always true

596 data[key] = data[key].astype(np.float64)

597 # Assume we found a string array since it's bytes, convert to a string list

598 elif type(data[key]) == np.ndarray and isinstance(data[key].flat[0], bytes):

599 data[key] = [x.decode().strip() for x in data[key]]

600 # Found only bytes, assume it's a string, convert the string

601 elif isinstance(data[key], bytes):

602 data[key] = data[key].decode()

603 # You can also get object arrays which themselves contain numpy arrays, it's best to turn these

604 # into multidimensional arrays. If they can't turn into multidimensional arrays due to them

605 # being different types or not of the same size then it will convert the numpy object array

606 # into a list of objects instead.

607 elif (

608 type(data[key]) == np.ndarray

609 and data[key].dtype == object

610 and type(data[key][0]) == np.ndarray

611 ):

612 try:

613 # Get all the None values and turn them into NaNs

614 none_values = np.nonzero(_is_none(data[key]))

615 if np.size(none_values) > 0:

616 data[key][none_values] = np.nan

617 # If all of the values were None, then set the array dtype to float64

618 # (as we don't know what dtype it actually was), probably only relevant for testing

619 if (np.size(none_values) // len(data[key].shape)) == np.size(data[key]): 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true

620 data[key] = data[key].astype(np.float64)

621 # If it's not an object array, numpy will stack the axes, which isn't desired here

622 # as we want to maintain the multidimensional nature of the data. So we'll create an

623 # array of the desired size using the shape of the first element.

624 elif data[key][0].dtype != object:

625 new_array = np.empty(

626 [data[key].size, *data[key][0].shape], dtype=data[key][0].dtype

627 )

628 for idx in range(new_array.shape[0]):

629 new_array[idx] = data[key][idx]

630 data[key] = new_array

631 else:

632 # For an object array you can flatten it, and stack all inner arrays together until it's not an object array

633 # Crucially this assumes the array not as an object array can fit in memory! If you're doing the beam_ptr

634 # conversion take this into consideration

635 while data[key].dtype == object:

636 data[key] = np.vstack(data[key].flatten()).reshape(

637 list(data[key].shape) + list(data[key].flat[0].shape)

638 )

639 except ValueError:

640 data[key] = list(x for x in data[key])

641 return data

642

643

644def convert_sav_to_dict(sav_path: str, logger: Logger, tmp_dir="temp_pyfhd"):

645 """

646 Given a path to an IDL style .sav file, load into a python dictionary

647 using scipy.io.readsav.

648

649 If the file was saved with the IDL /compress option, the readsav function

650 has to save a decompressed version of the file. By default this uses

651 the tempfile module to find a location, but this usually finds a bad

652 location with little storage when called on a super cluster. So explicitly

653 make our own temp dir `tmp_pyfhd` where the code is being called. It is

654 assumed many files are to be converted, so `tmp_pyfhd` should be deleted

655 after all calls.

656

657 Mostly used just for testing, if you;re not a developer you can safely ignore this function

658

659 Parameters

660 ----------

661 sav_path : str

662 Filepath for an IDL .sav file

663 logger : Logger

664 The logger to output any error messages to

665 tmp_dir : str

666 Dir to place temporary files, creates the directory if doesn't exist.

667 Default: "tmp_pyfhd".

668

669 Returns

670 --------

671 sav_dict : dict

672 Dictionary containing whatever was in the .sav file

673

674 """

675

676 if os.path.isfile(sav_path): 676 ↛ 692line 676 didn't jump to line 692 because the condition on line 676 was always true

677 # logger.info(f"{sav_path} found, converting now.")

678

679 # Ensure the tmp dir exists, create if not

680 os.makedirs(tmp_dir, exist_ok=True)

681

682 # Strip off any leading path to leave just the file name

683 temp_name = f"{tmp_dir}/{sav_path.split('/')[-1]}"

684

685 # Load into a dictionary, decompressed and saving a temporary file if need

686 # be

687 sav_dict = readsav(sav_path, python_dict=True, uncompressed_file_name=temp_name)

688

689 return sav_dict

690 else:

691 # sys.exit(f"{sav_path} does not exist. Cannot grid so exiting")

692 logger.error(f"{sav_path} doesn't exist, please check your input path")

693

694 for handler in logger.handlers:

695 handler.close()

696 exit()

Coverage for PyFHD/io/pyfhd_io.py: 74%

203 statements