Coverage for PyFHD/io/pyfhd_io.py: 74%

203 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 10:58 +0800

1import os 

2import numpy as np 

3import h5py 

4from logging import Logger 

5from pathlib import Path 

6from typing import Any 

7from numpy.typing import NDArray, DTypeLike 

8from scipy.io import readsav 

9 

10 

11def dtype_picker(dtype: DTypeLike) -> type: 

12 """ 

13 Picks the double precision type for the given dtype for saving the hdf5 file to ensure everything 

14 is saved without losing information. 

15 

16 Parameters 

17 ---------- 

18 dtype : type 

19 The numpy dtype of an array 

20 

21 Returns 

22 ------- 

23 type 

24 The corresponding double precision type 

25 """ 

26 if np.issubdtype(dtype, np.integer): 

27 return np.int64 

28 elif np.issubdtype(dtype, np.floating): 

29 return np.float64 

30 elif np.issubdtype(dtype, np.complexfloating): 

31 return np.complex128 

32 else: 

33 # Should never get here, this should throw an error 

34 return None 

35 

36 

37@np.vectorize 

38def _is_complex(value: Any) -> bool: 

39 """ 

40 Finds if a value is complex, this works regardless of the array type 

41 unlike np.iscomplex or np.iscomplexobj which can't handle object arrays. 

42 This being vectorized also allows us to check this for any complex type, 

43 whether it be the python complex type or a numpy complex type 

44 

45 Parameters 

46 ---------- 

47 value : Any 

48 The value to check in a NumPy array 

49 

50 Returns 

51 ------- 

52 bool 

53 True if value is a complex, False otherwise 

54 """ 

55 return np.iscomplexobj(value) 

56 

57 

58@np.vectorize 

59def _is_string(value: Any) -> bool: 

60 """ 

61 Finds if a value is a string or not, works regardless of the array type. 

62 There is no string check available for object arrays 

63 

64 Parameters 

65 ---------- 

66 value : Any 

67 A value to check 

68 

69 Returns 

70 ------- 

71 bool 

72 True if value is a str, False otherwise 

73 """ 

74 return isinstance(value, str) 

75 

76 

77@np.vectorize 

78def _is_none(value: Any) -> bool: 

79 """ 

80 Checks for a none object and is vectorized to work across any numpy array 

81 even if it's an object array. 

82 

83 Parameters 

84 ---------- 

85 value : Any 

86 A value to be checked if None 

87 

88 Returns 

89 ------- 

90 bool 

91 True if value is None, otherwise False 

92 """ 

93 return value is None 

94 

95 

96@np.vectorize 

97def _decode_byte_arr(value: NDArray[np.byte]) -> str: 

98 """ 

99 Decodes a byte string into a string 

100 

101 Parameters 

102 ---------- 

103 value : NDArray[np.byte] 

104 Value to decode 

105 

106 Returns 

107 ------- 

108 str 

109 The decoded value 

110 """ 

111 return value.decode() 

112 

113 

114def format_array(array: NDArray[Any]) -> NDArray[Any]: 

115 """ 

116 Find any `None` values in an object array and replaces them with empty 

117 strings if we're dealing with a string array, or `NaNs` if we're 

118 dealing with a Number array. If complex, the NaN will be `nan + nanj`. 

119 If a string array is found, convert the string array to a bytes array, 

120 in all other cases leave the array alone as it should be ready to save 

121 into a HDF5 file. 

122 

123 Parameters 

124 ---------- 

125 array : NDArray[Any] 

126 The array to find None in and if so convert from object array 

127 

128 Returns 

129 ------- 

130 array: NDArray[Any] 

131 Array without None objects and in the correct dtype 

132 """ 

133 # Got an error with the vectorized functions on empty arrays 

134 if array.size == 0 or array.dtype != object: 134 ↛ 139line 134 didn't jump to line 139 because the condition on line 134 was always true

135 if np.issubdtype(array.dtype, np.str_): 

136 return array.astype(np.bytes_) 

137 else: 

138 return array 

139 if np.any(_is_string(array)): 

140 # This avoids the np.where deprecation warning 

141 # Also replaces any None values in place, no copies of the array are made 

142 array[array == None] = "" 

143 array = array.astype(bytes) 

144 else: 

145 try: 

146 if array.dtype == object: 

147 # Replace any Nones with NaN's in place, no copies made 

148 array[array == None] = np.nan 

149 if np.any(_is_complex(array)): 

150 # Set the type to complex128 to be sure its double precision complex 

151 array = array.astype(np.complex128) 

152 # Replace with complex NaNs in place 

153 array[np.isnan(array.real)] = np.nan * 0j 

154 else: 

155 # Ensure it's a float array if we do have 

156 array = array.astype(np.float64) 

157 except TypeError: 

158 # Sometimes we deal with structured/record arrays like 

159 # astropy's FITS_rec, let's leave them alone as we intend 

160 # on saving them raw 

161 pass 

162 return array 

163 

164 

165def save_dataset( 

166 h5py_obj: h5py.File | h5py.Group, 

167 key: str, 

168 value: Any, 

169 to_chunk: dict[str, dict], 

170 variable_lengths: [str, DTypeLike], 

171 logger: Logger | None, 

172) -> bool: 

173 """ 

174 A general function for saving a dataset inside a HDF5 File or Group. It's used exclusively for saving 

175 a dictionary into a HDF5 file, hence why we take a `key` and `value` pair. The `to_chunk` parameter is 

176 explained in the `save` function, please look there for explanation. In the case of finding a None object 

177 an Empty Dataset is saved and the is_none is returned as True, so the attribute associated with the key 

178 can also be set to True to indicate to PyFHD later that the value is meant to be None when reading in the 

179 dataset again. 

180 

181 Parameters 

182 ---------- 

183 h5py_obj : h5py.File | h5py.Group 

184 A h5py object that has access to the `create_dataset` and `create_group` methods 

185 key : str 

186 The key from the dictionary we're saving 

187 value : Any 

188 The value from the dictionary 

189 to_chunk : dict[str, dict] 

190 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary 

191 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells 

192 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk` 

193 to True which enables h5py to guess the chunk size for you. By default {} 

194 variable_lengths : dict[str, DTypeLike] 

195 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is 

196 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but 

197 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable 

198 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`, 

199 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately 

200 during a `create_dataset` call. By default {} 

201 logger : Logger | None 

202 PyFHD's Logger 

203 

204 Returns 

205 ------- 

206 is_none : bool 

207 True if the value is None, False otherwise 

208 

209 See Also 

210 -------- 

211 PyFHD.io.pyfhd_io.save : Save a HDF5 file 

212 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object 

213 """ 

214 is_none = False 

215 # Match the type 

216 match value: 

217 case dict(): 

218 group = h5py_obj.create_group(key) 

219 # dict_to_group will be recursively called if there is another dict 

220 # in this dict 

221 dict_to_group(group, value, to_chunk, variable_lengths, logger) 

222 case np.ndarray(): 

223 if key not in variable_lengths: 223 ↛ 230line 223 didn't jump to line 230 because the condition on line 223 was always true

224 # Find and replace all None objects 

225 value = format_array(value) 

226 value_dtype = dtype_picker(value.dtype) 

227 else: 

228 # Since we're dealing with variable length arrays, we need to use a special dtype 

229 # and process each array individually 

230 for i, arr in enumerate(value): 

231 value[i] = format_array(arr) 

232 value_dtype = variable_lengths[key] 

233 # If we want it to be chunked do that, always compress it 

234 if key in to_chunk: 234 ↛ 235line 234 didn't jump to line 235 because the condition on line 234 was never true

235 h5py_obj.create_dataset( 

236 key, 

237 shape=to_chunk[key]["shape"], 

238 data=value, 

239 dtype=value_dtype, 

240 chunks=to_chunk[key]["chunk"], 

241 compression="gzip", 

242 ) 

243 else: 

244 h5py_obj.create_dataset( 

245 key, 

246 shape=value.shape, 

247 data=value, 

248 dtype=value_dtype, 

249 compression="gzip", 

250 ) 

251 case list(): 

252 # Was easier to convert to a NumPy array to get vectorization 

253 # Given that H5Py converts it into a NumPy array anyway, we can 

254 # at least control the conversion (if we need to) 

255 if key in variable_lengths: 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true

256 value = np.array(value, dtype=object) 

257 for i, arr in enumerate(value): 

258 value[i] = format_array(arr) 

259 data_dtype = variable_lengths[key] 

260 else: 

261 try: 

262 value = np.array(value) 

263 value = format_array(value) 

264 data_dtype = dtype_picker(value.dtype) 

265 except ValueError as e: 

266 if "inhomogeneous" in str(e): 

267 logger.warning( 

268 f"Failed to save {key} as an array as the list couldn't turn into a NumPy array, trying to save as a variable length array. Please add {key} to the variable_lengths dictionary in the save function in future." 

269 ) 

270 value = np.array(value, dtype=object) 

271 for i, arr in enumerate(value): 

272 value[i] = format_array(arr) 

273 data_dtype = h5py.vlen_dtype(dtype_picker(value[0].dtype)) 

274 else: 

275 logger.info( 

276 f"You received an error not related to the array being inhomogeneous, Here's the error: {e}" 

277 ) 

278 h5py_obj.create_dataset( 

279 key, data=value, dtype=data_dtype, compression="gzip" 

280 ) 

281 case Path(): 281 ↛ 283line 281 didn't jump to line 283 because the pattern on line 281 never matched

282 # If we find a Path object, convert it to a string 

283 value = str(value) 

284 h5py_obj.create_dataset(key, data=value) 

285 case None: 

286 is_none = True 

287 # In the case we get something that is none, create empty dataset 

288 h5py_obj.create_dataset(key, dtype="b") 

289 case _: 

290 try: 

291 # Store the value in a single size dataset, used for ints, floats, strings etc 

292 h5py_obj.create_dataset(key, data=value) 

293 except ValueError: 

294 if logger is not None: 

295 logger.error( 

296 f"Failed to save {key}, the type of key was {type(value)}" 

297 ) 

298 return is_none 

299 

300 

301def dict_to_group( 

302 group: h5py.Group, 

303 to_convert: dict, 

304 to_chunk: dict[str, dict], 

305 variable_lengths: dict[str, DTypeLike], 

306 logger: Logger | None, 

307) -> None: 

308 """ 

309 Converts a dictionary to a HDF5 group. This is called in the event a dictionary is found inside 

310 a dictionary that is being saved in a HDF5 file. Creates a subgroup for the hdf5 file with everything 

311 turning into individual datasets. 

312 

313 Parameters 

314 ---------- 

315 group : h5py.Group 

316 The created group to save the dictionary in 

317 to_convert : dict 

318 The dictionary to save into the group 

319 to_chunk : dict[str, dict] 

320 The chunking dictionary, see `save` for more information 

321 variable_lengths : dict[str, DTypeLike] 

322 The variable length dictionary, see `save` for more information 

323 logger : Logger 

324 PyFHD's Logger 

325 

326 See Also 

327 -------- 

328 PyFHD.io.pyfhd_io.save : Save a HDF5 file 

329 """ 

330 for key in to_convert: 

331 group.attrs[key] = save_dataset( 

332 group, key, to_convert[key], to_chunk, variable_lengths, logger 

333 ) 

334 

335 

336def save( 

337 file_name: Path, 

338 to_save: NDArray[Any] | dict, 

339 dataset_name: str, 

340 logger: Logger | None = None, 

341 to_chunk: dict[str, dict] = {}, 

342 variable_lengths: dict[str, DTypeLike] = {}, 

343) -> None: 

344 """ 

345 Saves a numpy array or dictionary into a hdf5 file using h5py, with compression applied to all arrays/datasets. 

346 An array will be saved as a single dataset, while a dictionary will be saved where each key will be a dataset 

347 unless the key points a dictionary in which case a group will be created and `dict_to_group` called to turn each 

348 key in that sub dict into a dataset (or another group if it's another sub dictionary). This function should be 

349 kept as general as possible, if something needs formatting for saving, format it before calling this function. 

350 If you are converting a sav file to hdf5 with this function, use `recarray_to_dict` which converts the sav output 

351 from readsav into a proper python dictionary (rather than recarrays or weird array shapes, objects arrays etc.) 

352 

353 Parameters 

354 ---------- 

355 file_name : Path 

356 The file to save as hdf5 should be /path/to/file_name.h5 (or .hdf5) 

357 to_save : NDArray[Any] | dict 

358 The dictionary or numpy array to save into the hdf5 file 

359 dataset_name : str 

360 Used in the case that the to_save variable is an array, this name will 

361 be used as the key for the dataset in the hdf5 file. 

362 logger : Logger, optional 

363 PyFHD's Logger, by default None (in case you don't want to use the logger for testing) 

364 to_chunk : dict[str, dict], optional 

365 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dictionary 

366 which should contain two key-value pairs, `shape` which should be the `shape` of the array and `chunk` which tells 

367 hdf5 how to chunk the dataset when it's being read/written. If you're not sure how to `chunk` the dataset, set `chunk` 

368 to True which enables h5py to guess the chunk size for you. By default {} 

369 variable_lengths : dict[str, DTypeLike], optional 

370 A dictionary where each key-value pair represents a key in the to_save dictionary, and the value is a dtype. This is 

371 for special cases where you must save an array of variable length arrays. H5Py does support variable length arrays, but 

372 you must use a special type, using the `h5py.vlen_dtype()` you can create a dtype which accepts object arrays of variable 

373 lengths. For example if you wish to have variable integer array called `ija`, you would use `h5py.vlen_dtype(np.int64)`, 

374 and save use it in the variable_lengths dictionary like so, `{'ija': h5py.vlen_dtype(np.int64)}`, which will set the dtype appropriately 

375 during a `create_dataset` call. By default {} 

376 

377 

378 See Also 

379 -------- 

380 PyFHD.io.pyfhd_io.load : Load a HDF5 file 

381 PyFHD.io.pyfhd_io.dict_to_group : Converts a dictionary to a h5py Group Object 

382 PyFHD.io.pyfhd_io.recarray_to_dict : Turns any record arrays into dicts, also formats object arrays into the correct dtype array 

383 PyFHD.io.pyfhd_io.save_dataset : Saves a single dataset based off a dictionary key-value pair 

384 PyFHD.io.pyfhd_io.format_array : Finds any None is an array and replaces them appropriately 

385 """ 

386 # Create a custom vectorized function to check for complex numbers 

387 # is_complex_vectorized = np.vectorize(is_complex) 

388 with h5py.File(file_name, "w") as h5_file: 

389 match to_save: 

390 case np.ndarray(): 

391 if logger: 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true

392 logger.info(f"Writing the {dataset_name} array to {file_name}") 

393 h5_file.attrs[dataset_name] = save_dataset( 

394 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger 

395 ) 

396 case dict(): 396 ↛ 407line 396 didn't jump to line 407 because the pattern on line 396 always matched

397 if logger: 

398 logger.info( 

399 f"Writing the {dataset_name} dict to {file_name}, each key will be a dataset, if the key contains a dict then it will be a group." 

400 ) 

401 for key in to_save: 

402 # We're using the attributes as a mask, where if True then we know 

403 # the dataset is representing a None object. 

404 h5_file.attrs[key] = save_dataset( 

405 h5_file, key, to_save[key], to_chunk, variable_lengths, logger 

406 ) 

407 case _: 

408 h5_file.attrs[dataset_name] = save_dataset( 

409 h5_file, dataset_name, to_save, to_chunk, variable_lengths, logger 

410 ) 

411 if logger: 

412 logger.warning( 

413 "Not a dict or numpy array, PyFHD won't write other types at this time, refer to PyFHD.io.pyfhd_io.save to see what is supported" 

414 ) 

415 

416 

417def load_dataset( 

418 h5py_obj: h5py.File | h5py.Group, key: str, dataset: h5py.Dataset 

419) -> Any: 

420 """ 

421 Loads a single dataset from a HDF5 File or Group, the key here is the dataset name from the 

422 file or group and is only used to check the attributes of said file or group. If the attribute 

423 associated with the key is True, then we assume the value saved is an empty dataset and we should 

424 return None. If this is False, load the value and check if this value should be a single value. There 

425 are special checks for byte arrays, if there is byte arrays, PyFHD assumes these are meant to be strings. 

426 

427 Parameters 

428 ---------- 

429 h5py_obj : h5py.File | h5py.Group 

430 A HDF5 file or group 

431 key : str 

432 The dataset name 

433 dataset : h5py.Dataset 

434 The dataset we are loading 

435 

436 Returns 

437 ------- 

438 Any 

439 The value stored in the HDF5 Dataset 

440 

441 See Also 

442 -------- 

443 PyFHD.io.pyfhd_io.load : Load a HDF5 file 

444 """ 

445 # If the corresponding attribute is True set the current 

446 # key to None as its an empty dataset 

447 if h5py_obj.attrs[key]: 

448 return None 

449 else: 

450 if dataset.shape == (): 

451 value = dataset[()] 

452 else: 

453 value = dataset[:] 

454 if isinstance(value, np.ndarray) and value.dtype.kind == "S": 

455 value = _decode_byte_arr(value) 

456 if isinstance(value, bytes): 

457 value = value.decode() 

458 return value 

459 

460 

461def group_to_dict(group: h5py.Group) -> dict: 

462 """ 

463 When loading a hdf5 file into a dictionary, this turns a group into a dictionary, 

464 and then returns the dictionary. 

465 

466 Parameters 

467 ---------- 

468 group : h5py.Group 

469 A h5py group to turn into a dictionary 

470 

471 Returns 

472 ------- 

473 return_dict: dict 

474 The group turned into a dictionary 

475 """ 

476 return_dict = {} 

477 for key in group: 

478 match group[key]: 

479 case h5py.Dataset(): 

480 return_dict[key] = load_dataset(group, key, group[key]) 

481 case h5py.Group(): 481 ↛ 477line 481 didn't jump to line 477 because the pattern on line 481 always matched

482 return_dict[key] = group_to_dict(group[key]) 

483 return return_dict 

484 

485 

486def load( 

487 file_name: Path, logger: Logger | None = None, lazy_load: bool = False 

488) -> dict[str, object] | NDArray[Any] | h5py.File: 

489 """ 

490 Loads a HDF5 file into PyFHD, it reads the HDF5 into an array if the 

491 HDF5 file contains a single dataset, while a HDF5 which contains multiple 

492 datasets will load them into a dictionary. Any groups will be convered to 

493 sub dictionaries using `group_to_dict` 

494 

495 Parameters 

496 ---------- 

497 file_name : Path 

498 The /path/to/the/hdf5.h5 

499 logger : Logger 

500 PyFHD's Logger 

501 lazy_load : bool, optional 

502 Set to true if you wish to lazy load the file, currently the only file that will be 

503 supported to do this in PyFHD will be the beam/psf file, but support for other files can 

504 be done easily enough, by default False 

505 

506 

507 Returns 

508 ------- 

509 return_dict | array | h5_file: dict[str, object] | NDArray[Any] | h5py.File 

510 Returns a dict in the case the HDF5 file contains multple datasets, 

511 An array if the HDF5 contains one dataset or h5py File object if the 

512 file is lazy loaded to conserve memory. 

513 

514 See Also 

515 -------- 

516 PyFHD.io.pyfhd_io.save : Save a HDF5 file 

517 PyFHD.io.pyfhd_io.group_to_dict : Converts a h5py Group object to a dictionary 

518 """ 

519 h5_file = h5py.File(file_name, "r") 

520 if lazy_load: 

521 return h5_file 

522 try: 

523 if len(h5_file.keys()) == 1: 

524 # Assume that it contains only one numpy array, in which case read the array 

525 key = list(h5_file.keys())[0] 

526 if logger: 526 ↛ 527line 526 didn't jump to line 527 because the condition on line 526 was never true

527 logger.info(f"Loading {key} from {file_name} into an array") 

528 array = load_dataset(h5_file, key, h5_file[key]) 

529 return array 

530 else: 

531 return_dict = {} 

532 if logger: 

533 logger.info(f"Loading {file_name} into a dictionary") 

534 for key in h5_file: 

535 match h5_file[key]: 

536 case h5py.Dataset(): 

537 return_dict[key] = load_dataset(h5_file, key, h5_file[key]) 

538 case h5py.Group(): 538 ↛ 534line 538 didn't jump to line 534 because the pattern on line 538 always matched

539 return_dict[key] = group_to_dict(h5_file[key]) 

540 return return_dict 

541 finally: 

542 if not lazy_load: 

543 h5_file.close() 

544 

545 

546def recarray_to_dict(data: np.recarray | dict) -> dict: 

547 """ 

548 Turns a record array into a dict, but does it as a deep convert. This was needed due to scipy's readsav 

549 returning an inception like experience of record arrays. This would mean to access values from something 

550 like the obs structure for a test, the code had to be obs[0]['baseline_info'][0]['tile_a'], which was became 

551 untenable as the full python translation won't require these leaving us two codebases for IDL compatible and 

552 Python compatible. Instead, this function turns all record arrays into dictionaries, which are easier to understand 

553 and are faster. 

554 

555 This was made specifically to work with the readsav function, to get compatibility with general recarrays remove the 

556 zero index, as readsav for some reason adds a single dimension to all recarrays. 

557 

558 This was updated later to also take a dictionary which may contain record arrays too. 

559 

560 This was also updated later to turn object arrays into multidimensional arrays if they can be one. In the 

561 case the object array couldn't be turned into a multidimensional array it was turned into a list 

562 

563 Parameters 

564 ---------- 

565 data : np.recarray or dict 

566 A record array or dictionary maybe containing nested record arrays 

567 

568 Returns 

569 ------- 

570 data: dict 

571 A potentially nested dictionaries of dictionaries 

572 """ 

573 # Convert the original record array into a dictionary 

574 if type(data) == np.recarray: 

575 data = {name.lower(): data[name] for name in data.dtype.names} 

576 # For every key, if it's a record array, recursively call the function 

577 for key in data: 

578 # Every now and then you do get object arrays that contain only one element or arrays that contain only one element 

579 # These are not useful so I will extract the element out 

580 if type(data[key]) == np.ndarray and data[key].size == 1: 

581 data[key] = data[key][0] 

582 # Sometimes the recarray is in a standard numpy object array and other times its not for some reason... 

583 if type(data[key]) == np.recarray: 

584 data[key] = recarray_to_dict(data[key]) 

585 elif type(data[key]) == np.ndarray and type(data[key][0]) == np.recarray: 585 ↛ 586line 585 didn't jump to line 586 because the condition on line 585 was never true

586 data[key] = recarray_to_dict(data[key][0]) 

587 # We found a single array with only None 

588 elif type(data[key]) == np.ndarray and isinstance(data[key][0], type(None)): 

589 # Get all the None values and turn them into NaNs 

590 none_values = np.where(data[key] == None) 

591 if np.size(none_values) > 0: 591 ↛ 595line 591 didn't jump to line 595 because the condition on line 591 was always true

592 data[key][none_values] = np.nan 

593 # If all of the values were None, then set the array dtype to float64 

594 # (as we don't know what dtype it actually was), probably only relevant for testing 

595 if np.size(none_values) == np.size(data[key]): 595 ↛ 577line 595 didn't jump to line 577 because the condition on line 595 was always true

596 data[key] = data[key].astype(np.float64) 

597 # Assume we found a string array since it's bytes, convert to a string list 

598 elif type(data[key]) == np.ndarray and isinstance(data[key].flat[0], bytes): 

599 data[key] = [x.decode().strip() for x in data[key]] 

600 # Found only bytes, assume it's a string, convert the string 

601 elif isinstance(data[key], bytes): 

602 data[key] = data[key].decode() 

603 # You can also get object arrays which themselves contain numpy arrays, it's best to turn these 

604 # into multidimensional arrays. If they can't turn into multidimensional arrays due to them 

605 # being different types or not of the same size then it will convert the numpy object array 

606 # into a list of objects instead. 

607 elif ( 

608 type(data[key]) == np.ndarray 

609 and data[key].dtype == object 

610 and type(data[key][0]) == np.ndarray 

611 ): 

612 try: 

613 # Get all the None values and turn them into NaNs 

614 none_values = np.nonzero(_is_none(data[key])) 

615 if np.size(none_values) > 0: 

616 data[key][none_values] = np.nan 

617 # If all of the values were None, then set the array dtype to float64 

618 # (as we don't know what dtype it actually was), probably only relevant for testing 

619 if (np.size(none_values) // len(data[key].shape)) == np.size(data[key]): 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true

620 data[key] = data[key].astype(np.float64) 

621 # If it's not an object array, numpy will stack the axes, which isn't desired here 

622 # as we want to maintain the multidimensional nature of the data. So we'll create an 

623 # array of the desired size using the shape of the first element. 

624 elif data[key][0].dtype != object: 

625 new_array = np.empty( 

626 [data[key].size, *data[key][0].shape], dtype=data[key][0].dtype 

627 ) 

628 for idx in range(new_array.shape[0]): 

629 new_array[idx] = data[key][idx] 

630 data[key] = new_array 

631 else: 

632 # For an object array you can flatten it, and stack all inner arrays together until it's not an object array 

633 # Crucially this assumes the array not as an object array can fit in memory! If you're doing the beam_ptr 

634 # conversion take this into consideration 

635 while data[key].dtype == object: 

636 data[key] = np.vstack(data[key].flatten()).reshape( 

637 list(data[key].shape) + list(data[key].flat[0].shape) 

638 ) 

639 except ValueError: 

640 data[key] = list(x for x in data[key]) 

641 return data 

642 

643 

644def convert_sav_to_dict(sav_path: str, logger: Logger, tmp_dir="temp_pyfhd"): 

645 """ 

646 Given a path to an IDL style .sav file, load into a python dictionary 

647 using scipy.io.readsav. 

648 

649 If the file was saved with the IDL /compress option, the readsav function 

650 has to save a decompressed version of the file. By default this uses 

651 the tempfile module to find a location, but this usually finds a bad 

652 location with little storage when called on a super cluster. So explicitly 

653 make our own temp dir `tmp_pyfhd` where the code is being called. It is 

654 assumed many files are to be converted, so `tmp_pyfhd` should be deleted 

655 after all calls. 

656 

657 Mostly used just for testing, if you;re not a developer you can safely ignore this function 

658 

659 Parameters 

660 ---------- 

661 sav_path : str 

662 Filepath for an IDL .sav file 

663 logger : Logger 

664 The logger to output any error messages to 

665 tmp_dir : str 

666 Dir to place temporary files, creates the directory if doesn't exist. 

667 Default: "tmp_pyfhd". 

668 

669 Returns 

670 -------- 

671 sav_dict : dict 

672 Dictionary containing whatever was in the .sav file 

673 

674 """ 

675 

676 if os.path.isfile(sav_path): 676 ↛ 692line 676 didn't jump to line 692 because the condition on line 676 was always true

677 # logger.info(f"{sav_path} found, converting now.") 

678 

679 # Ensure the tmp dir exists, create if not 

680 os.makedirs(tmp_dir, exist_ok=True) 

681 

682 # Strip off any leading path to leave just the file name 

683 temp_name = f"{tmp_dir}/{sav_path.split('/')[-1]}" 

684 

685 # Load into a dictionary, decompressed and saving a temporary file if need 

686 # be 

687 sav_dict = readsav(sav_path, python_dict=True, uncompressed_file_name=temp_name) 

688 

689 return sav_dict 

690 else: 

691 # sys.exit(f"{sav_path} does not exist. Cannot grid so exiting") 

692 logger.error(f"{sav_path} doesn't exist, please check your input path") 

693 

694 for handler in logger.handlers: 

695 handler.close() 

696 exit()