Coverage for PyFHD/pyfhd_tools/pyfhd_utils.py: 78%

475 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 10:58 +0800

1import subprocess 

2from copy import deepcopy 

3from logging import Logger 

4from math import factorial, pi 

5from sys import exit 

6import h5py 

7import numpy as np 

8from astropy import units as u 

9from astropy.coordinates import SkyCoord 

10from numba import njit 

11from numpy.typing import ArrayLike, NDArray 

12from scipy.ndimage import label, median_filter 

13 

14 

15@njit 

16def get_bins( 

17 min: int | float, max: int | float, bin_size: int 

18) -> NDArray[np.float64 | np.int64]: 

19 """ 

20 Calculates the bins for the histogram and reverse indices based on a 

21 minimum and maximum value, plus a given bin size. It mirrors IDL's way of 

22 calculating the bin edges. In IDL bins seem to be right hand side of the bin open 

23 like IDL, even at the end. However, in comparison to NumPy, the last bin is always 

24 the maximum value. This does utilize numba and parallelization to make it faster. 

25 

26 Parameters 

27 ---------- 

28 min : int | float 

29 The minimum chosen for the histogram 

30 max : int | float 

31 The maximum chosen for the histogram 

32 bin_size : int 

33 The bin size chosen for the histogram. This histogram always uses bins 

34 of equal widths. 

35 

36 Returns 

37 ------- 

38 bins: NDArray[np.float64 | np.int64] 

39 A NumPy array of all the bins ranging from min (bin[0]) to the max (bin[-1]). The step 

40 is bin_size. 

41 

42 See Also 

43 -------- 

44 histogram: Calculates the bins, histogram and reverse indices 

45 get_hist: Calculates histogram only 

46 get_ri: Calculates the reverse indices only 

47 """ 

48 

49 return np.arange(min, max + bin_size, bin_size) 

50 

51 

52@njit 

53def get_hist( 

54 data: NDArray[np.floating | np.integer | np.complexfloating], 

55 bins: NDArray[np.float64 | np.int64], 

56 min: int | float, 

57 max: int | float, 

58) -> NDArray[np.int64]: 

59 """ 

60 Calculates the histogram based on the given bins and data, taking into account 

61 the minimum and maximum 

62 

63 Parameters 

64 ---------- 

65 data : NDArray[np.floating | np.integer | np.complexfloating] 

66 A NumPy array that is of one dtype float, int, or complex. Cannot be object 

67 bins : NDArray[np.float64 | np.int64] 

68 A NumPy array of bins for the histogram 

69 min : int | float 

70 The minimum set for the data 

71 max : int | float 

72 The maximum set for the data 

73 

74 Returns 

75 ------- 

76 hist: NDArray[np.int64] 

77 A histogram of the data using the bins found in the bins array. 

78 

79 See Also 

80 -------- 

81 histogram: Calculates the bins, histogram and reverse indices. 

82 get_bins: Calculates the bins only 

83 get_ri: Calculates the reverse indices only 

84 """ 

85 

86 bin_l = bins.size 

87 # Setup the histogram 

88 hist = np.zeros(bin_l, dtype=np.int64) 

89 # Setup the things required for the indexing 

90 n = bin_l - 1 

91 bin_min = bins[0] 

92 bin_max = bins[-1] 

93 # In the case that the bin min and bin max is the same, 

94 # then finding the index doesn't matter so set the divider 

95 # to 1 (as there is only 1 bin!) 

96 if bin_min == bin_max: 

97 bin_divider = 1 

98 else: 

99 bin_divider = bin_max - bin_min 

100 # Now loop through the data 

101 for idx in range(data.size): 

102 # Check if its inside the range we set 

103 if data[idx] < min or data[idx] > max: 

104 continue 

105 # Calculate the index for indices and histogram 

106 bin_i = int(n * (data[idx] - bin_min) / bin_divider) 

107 # Add to the histogram 

108 hist[bin_i] += 1 

109 return hist 

110 

111 

112@njit 

113def get_ri( 

114 data: NDArray[np.floating | np.integer | np.complexfloating], 

115 bins: NDArray[np.float64 | np.int64], 

116 hist: NDArray[np.int64], 

117 min: int | float, 

118 max: int | float, 

119) -> NDArray[np.int64]: 

120 """ 

121 Calculates the reverse indices of a data and histogram. 

122 The function replicates IDL's REVERSE_INDICES keyword within 

123 their HISTOGRAM function. The reverse indices array which is returned 

124 by this function can be hard to understand at first, I will explain it here 

125 and also link JD Smith's famous article on IDL's HISTOGRAM. 

126 

127 The reverse indices array is two vectors concatenated together. The first vector contains 

128 indexes for the second vector, this vector should be the size of bins + 1. 

129 The second vector contains indexes from the data itself, and should be the size of data. 

130 The justification for having such an array is to quickly make adjustments to certain bins 

131 without having to search the array multiple times, thus avoiding multiple O(data.size) loops. 

132 

133 The first vector indexes contain the starting positions of each bin in the second vector. For example, 

134 between the indexes given by first_vector[0] and first_vector[1] of the second vector should be all the 

135 indexes of bins[0] from inside the data. So if I wanted to make adjustments to the entire first bin, 

136 and only the first bin I can use the reverse indices array, ri to do this. Let's say I wanted to flag 

137 all values of bins[0] with -1 for some reason to make them invalid in other calculations with the data, 

138 then I could do this: 

139 

140 `data[ri[ri[0] : ri[1]]] = -1` 

141 

142 Or more generally 

143 

144 `data[ri[ri[i] : ri[i + 1]]] = -1` 

145 

146 Where i is 0 <= i <= bins.size. 

147 

148 If you wish to gain a better understanding of how this get_ri function works, and the associated 

149 histogram function I have created here, please use the link given in the Notes section. This 

150 link will take you JD Smith's article on IDL's HISTOGRAM, it is an article which explains the 

151 IDL HISTOGRAM function better than IDL's own documentation. If you must gain a deeper understanding, 

152 read it once, gasp and get your shocks and many cries of why out of your system, then read it again. 

153 And keep reading it till you understand, as per the editor's note on the article: 

154 

155 "...If you read it enough, the secrets of the command will be revealed to you. Stranger things have happened" 

156 

157 Parameters 

158 ---------- 

159 data : NDArray[np.floating | np.integer | np.complexfloating] 

160 A NumPy array of the data 

161 bins : NDArray[np.float64 | np.int64] 

162 A NumPy array containing the bins for the histogram 

163 hist : NDArray[np.int64] 

164 A NumPy array containing the histogram 

165 min : int | float 

166 The minimum for the dataset 

167 max : int | float 

168 The maximum for the dataset 

169 

170 Returns 

171 ------- 

172 ri : NDArray[np.int64] 

173 An array containing the reverse indices, which is two vectors, the first vector containing indexes for the second vector. 

174 The second vector contains indexes for the data. 

175 

176 See Also 

177 --------- 

178 histogram: Calculates the bins, histogram and reverse indices. 

179 get_bins: Calculates the bins only 

180 get_hist: Calculates histogram only 

181 

182 Notes 

183 ------ 

184 'HISTOGRAM: The Breathless Horror and Disgust' : http://www.idlcoyote.com/tips/histogram_tutorial.html 

185 """ 

186 

187 bin_l = bins.size 

188 # Setup the first vector 

189 data_idx = bin_l + 1 

190 first_v = [bin_l + 1] 

191 # Add the bins 

192 for bin in hist: 

193 data_idx += bin 

194 first_v.append(data_idx) 

195 # Setup the reverse indices 

196 ri = np.zeros(bin_l + 1 + data.size, dtype=np.int64) 

197 ri[0 : bin_l + 1] = first_v 

198 # Create a tracking array to keep track of where we are in the data indexing 

199 tracker = np.array(first_v) 

200 # Setup the things required for the indexing 

201 n = bin_l - 1 

202 bin_min = bins[0] 

203 bin_max = bins[-1] 

204 if bin_min == bin_max: 

205 bin_divider = 1 

206 else: 

207 bin_divider = bin_max - bin_min 

208 # Setup counter to remove elements in case of min or max being set 

209 counter = 0 

210 for idx in range(data.size): 

211 # Check if its inside the range we set 

212 if data[idx] < min or data[idx] > max: 

213 counter += 1 

214 continue 

215 # Calculate the index for indices and histogram 

216 bin_i = int(n * (data[idx] - bin_min) / bin_divider) 

217 # Record the current index and update the tracking array 

218 ri[tracker[bin_i]] = idx 

219 tracker[bin_i] += 1 

220 ri = ri[: ri.size - counter] 

221 return ri 

222 

223 

224def histogram( 

225 data: NDArray[np.floating | np.integer | np.complexfloating], 

226 bin_size: int = 1, 

227 num_bins: int | None = None, 

228 min: int | float | None = None, 

229 max: int | float | None = None, 

230) -> tuple[NDArray[np.int64], NDArray[np.float64 | np.int64], NDArray[np.int64]]: 

231 """ 

232 The histogram function combines the use of the get_bins, get_hist and get_ri 

233 functions into one function. For the descriptions and docs of those functions 

234 look in See Also. This function will return the histogram, bin/bin_edges and 

235 the reverse indices. 

236 

237 Parameters 

238 ---------- 

239 data : NDArray[np.floating | np.integer | np.complexfloating] 

240 A NumPy array containing the data we want a histogram of 

241 bin_size : int, optional 

242 Sets the bin size for this histogram, by default 1 

243 num_bins : int | None, optional 

244 Set the number of bins this does override bin_size completely, by default None 

245 min : int | float | None, optional 

246 Set a minimum for the dataset, by default None 

247 max : int | float | None, optional 

248 Set a maximum for the dataset, by default None 

249 

250 Returns 

251 ------- 

252 hist : NDArray[np.int64] 

253 The histogram of the data 

254 bins : NDArray[np.float64 | np.int64] 

255 The bins of the histogram 

256 ri : NDArray[np.int64] 

257 The reverse indices array for the histogram and data 

258 

259 See Also 

260 -------- 

261 get_bins: Calculates the bins only 

262 get_hist: Calculates histogram only 

263 get_ri: Calculates the reverse indices only 

264 """ 

265 

266 # If the minimum has not been set, set it 

267 if min is None: 

268 min = np.min(data) 

269 # If the maximum has not been set, set it 

270 # Check if the max argument was used, set to True, if we set max here by data turn it off. 

271 if max is None: 

272 max = np.max(data) 

273 # If the number of bins has been set use that 

274 if num_bins is not None: 

275 bin_size = (max - min) / num_bins 

276 # Need to add checks if max is below min or max below min 

277 if min > max: 277 ↛ 278line 277 didn't jump to line 278 because the condition on line 277 was never true

278 max = min 

279 if max < min: 279 ↛ 280line 279 didn't jump to line 280 because the condition on line 279 was never true

280 min = max 

281 # IDL uses the bin_size as equal throughout min to max 

282 bins = get_bins(min, max, bin_size) 

283 # However, if we set a max, we must adjust the last bin to max according to IDL specifications 

284 # And we only do this in the case max was by an argument 

285 if (bins[-1] > max) or num_bins is not None: 

286 bins = bins[:-1] 

287 # Flatten the data 

288 data_flat = data.flatten() 

289 # Get the histogram 

290 hist = get_hist(data_flat, bins, min, max) 

291 # Get the reverse indices 

292 ri = get_ri(data_flat, bins, hist, min, max) 

293 # Return 

294 return hist, bins, ri 

295 

296 

297def l_m_n( 

298 obs: dict, 

299 psf: dict | h5py.File, 

300 obsdec: float | None = None, 

301 obsra: float | None = None, 

302 declination_arr: NDArray[np.floating] | None = None, 

303 right_ascension_arr: NDArray[np.floating] | None = None, 

304) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]: 

305 """ 

306 Calculate the directional cosines l,m,n given an RA, Dec, and phase centre for 

307 each pixel in the input arrays given, or from the hyperresolved RA/Dec array 

308 created during beam creation. 

309 

310 Parameters 

311 ---------- 

312 obs: dict 

313 Observation metadata dictionary. 

314 psf: dict | h5py.File 

315 Beam dictionary. 

316 obsdec: float | None, optional 

317 The Dec phase centre for the observation, by default None and set to 

318 the phase centre saved in the observation metadata dicitonary 

319 (obs['obsdec']) 

320 obsra: float | None, optional 

321 The RA phase centre for the observation, by default None and set to 

322 the phase centre saved in the observation metadata dicitonary 

323 (obs['obsra']) 

324 declination_arr: NDArray[np.floating] | None, optional 

325 An array of declinations, by default None and set to the hyperresolved Dec 

326 array in psf['image_info']['dec_arr'] 

327 right_ascension_arr: NDArray[np.floating] | None, optional 

328 An array of right ascensions, by default None and set to the hyperresolved RA 

329 array in psf['image_info']['ra_arr'] 

330 

331 Returns 

332 ------- 

333 l_mode : NDArray[np.float64] 

334 Directional cosine l array, or the cosine of the vector that contributes 

335 to the x-axis 

336 m_mode : NDArray[np.float64] 

337 Directional cosine m array, or the cosine of the vector that contributes 

338 to the y-axis 

339 n_tracked : NDArray[np.float64] 

340 Directional cosine n array, or the cosine of the vector that contributes 

341 to the z-axis, dependent on the phase centre. 

342 """ 

343 # If the variables passed through are None them 

344 if obsdec is None: 

345 obsdec = obs["obsdec"] 

346 if obsra is None: 

347 obsra = obs["obsra"] 

348 if declination_arr is None: 

349 declination_arr = psf["image_info"]["dec_arr"] 

350 if right_ascension_arr is None: 

351 right_ascension_arr = psf["image_info"]["ra_arr"] 

352 

353 # Convert all the degrees given into radians 

354 obsdec = np.radians(obsdec) 

355 obsra = np.radians(obsra) 

356 declination_arr = np.radians(declination_arr) 

357 right_ascension_arr = np.radians(right_ascension_arr) 

358 

359 # Calculate l mode, m mode and the phase-tracked n mode of pixel centers 

360 cdec0 = np.cos(obsdec) 

361 sdec0 = np.sin(obsdec) 

362 cdec = np.cos(declination_arr) 

363 sdec = np.sin(declination_arr) 

364 cdra = np.cos(right_ascension_arr - obsra) 

365 sdra = np.sin(right_ascension_arr - obsra) 

366 l_mode = sdra * cdec 

367 m_mode = cdec0 * sdec - cdec * sdec0 * cdra 

368 # n=1 at phase center, so reference from there for phase tracking 

369 n_tracked = (sdec * sdec0 + cdec * cdec0 * cdra) - 1 

370 

371 # find any NaN values 

372 nan_vals = np.where(np.isnan(n_tracked)) 

373 # If any found, replace them with 0's 

374 if np.size(nan_vals) > 0: 374 ↛ 380line 374 didn't jump to line 380 because the condition on line 374 was always true

375 n_tracked[nan_vals] = 0 

376 l_mode[nan_vals] = 0 

377 m_mode[nan_vals] = 0 

378 

379 # Return the modes 

380 return l_mode, m_mode, n_tracked 

381 

382 

383def rebin_columns( 

384 a: NDArray[np.integer | np.floating | np.complexfloating], 

385 ax: int, 

386 shape: tuple, 

387 col_sizer: int, 

388) -> NDArray[np.integer | np.floating | np.complexfloating]: 

389 """ 

390 Performs expansion on the columns of a 1D or 2D array using interpolation to fill in the values that are created 

391 by expanding in the space between existing values. This function assumes the rows have already been expanded 

392 to the required number. 

393 

394 Parameters 

395 ---------- 

396 a : NDArray[np.integer | np.floating | np.complexfloating] 

397 An array to be expanded 

398 ax : int 

399 The axis we're expanding, almost always set to 1 

400 shape : tuple 

401 The shape of the original array 

402 col_sizer : int 

403 The number of columns we're adding in 

404 

405 Returns 

406 ------- 

407 rebinned : NDArray[np.integer | np.floating | np.complexfloating] 

408 An interpolated array of a containing shape[1] * col_sizer columns 

409 

410 See Also 

411 -------- 

412 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation 

413 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape 

414 """ 

415 # tile the range of col_sizer 

416 tiles = np.tile(np.arange(col_sizer), (shape[0], shape[1] // col_sizer - 1)) 

417 # Get the differences between the columns 

418 differences = np.diff(a, axis=ax) / col_sizer 

419 # Multiply this by the tiles 

420 inferences_non_pad = np.repeat(differences, col_sizer, axis=ax) * tiles 

421 # Pad the zeros for the last two rows, and remove the extra zeros to make inferences same shape as desired shape 

422 inferences = np.pad(inferences_non_pad, (0, col_sizer))[:-col_sizer] 

423 if np.issubdtype(a.dtype, np.integer): 

424 inferences = np.floor(inferences).astype("int") 

425 # Now get our final array by adding the repeat of our rows rebinned to the inferences 

426 rebinned = inferences + np.repeat(a, col_sizer, axis=ax) 

427 return rebinned 

428 

429 

430def rebin_rows( 

431 a: NDArray[np.integer | np.floating | np.complexfloating], 

432 ax: int, 

433 shape: tuple, 

434 old_shape: tuple, 

435 row_sizer: int, 

436) -> NDArray[np.integer | np.floating | np.complexfloating]: 

437 """ 

438 Performs expansion on the rows of array `a` to the number of rows in shape[0] using interpolation to fill between any 

439 new values added when adding new rows between existing values. 

440 

441 Parameters 

442 ---------- 

443 a : NDArray[np.integer | np.floating | np.complexfloating] 

444 The array to be rebinned 

445 ax : int 

446 The axis we're expanding, almost always set to 0 

447 shape : tuple 

448 The new shape we're expanding to 

449 old_shape : tuple 

450 The shape of the array a 

451 row_sizer : int 

452 The number of rows we're expanding by 

453 

454 Returns 

455 ------- 

456 row_rebinned : NDArray[np.integer | np.floating | np.complexfloating] 

457 The interpolated array with row_sizer extra columns between existing rows 

458 

459 See Also 

460 -------- 

461 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation 

462 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape 

463 """ 

464 # Tile the range of row_sizer 

465 tiles = np.tile( 

466 np.array_split(np.arange(row_sizer), row_sizer), 

467 ((shape[0] - row_sizer) // row_sizer, old_shape[1]), 

468 ) 

469 # Get the differences between values 

470 differences = np.diff(a, axis=ax) / row_sizer 

471 # Multiply differences array by tiles to get desired bins 

472 inferences_non_pad = np.repeat(differences, row_sizer, axis=ax) * tiles 

473 # Pad the inferences to get the same shape as above 

474 inferences = np.pad(inferences_non_pad, (0, row_sizer))[:, :-row_sizer] 

475 if np.issubdtype(a.dtype, np.integer): 

476 inferences = np.floor(inferences).astype("int") 

477 # Add this to the original array that has been repeated to match the size of inference 

478 row_rebinned = inferences + np.repeat(a, row_sizer, axis=ax) 

479 return row_rebinned 

480 

481 

482def rebin( 

483 a: NDArray[np.integer | np.floating | np.complexfloating], 

484 shape: ArrayLike, 

485 sample: bool = False, 

486) -> NDArray[np.integer | np.floating | np.complexfloating]: 

487 """ 

488 Resizes a 2D array by averaging or repeating elements, new dimensions must be integral factors of original dimensions. 

489 

490 In the case of expanding an existing array, rebin will interpolate between the original values with a linear function. 

491 In the case of compressing an existing array, rebin will average 

492 

493 Parameters 

494 ---------- 

495 a : NDArray[np.integer | np.floating | np.complexfloating] 

496 Input array. 

497 new_shape : ArrayLike 

498 Shape of the output array in (rows, columns) 

499 Must be a factor or multiple of a.shape 

500 sample: bool, optional 

501 Use this to get samples using rebin, rather than interpolation, by default False. 

502 

503 Returns 

504 ------- 

505 rebinned : NDArray[np.integer | np.floating | np.complexfloating] 

506 If the new shape is smaller of the input array, the data are averaged, 

507 if the new shape is bigger array elements are repeated and interpolated 

508 

509 Examples 

510 -------- 

511 >>> test = np.array([0,10,20,30]) 

512 >>> rebin(test, (1,8)) # Expand Columns 

513 array([ 0, 5, 10, 15, 20, 25, 30, 30]) 

514 >>> rebin(test, (2,8)) # Expand Rows and Columns 

515 array([[ 0, 5, 10, 15, 20, 25, 30, 30], 

516 [ 0, 5, 10, 15, 20, 25, 30, 30]]) 

517 >>> data = np.array([[ -5, 4, 2, -8, 1], 

518 [ 3, 0, 5, -5, 1], 

519 [ 6, -7, 4, -4, -8], 

520 [ -1, -5, -14, 2, 1]]) 

521 >>> rebin(data, (8,10)) # 2D Array example 

522 array([[ -5, -1, 4, 3, 2, -3, -8, -4, 1, 1], 

523 [ -1, 0, 2, 2, 3, -2, -7, -3, 1, 1], 

524 [ 3, 1, 0, 2, 5, 0, -5, -2, 1, 1], 

525 [ 4, 0, -4, 0, 4, -1, -5, -5, -4, -4], 

526 [ 6, -1, -7, -2, 4, 0, -4, -6, -8, -8], 

527 [ 2, -2, -6, -6, -5, -3, -1, -3, -4, -4], 

528 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1], 

529 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1]]) 

530 >>> rebin(data, (2,5)) # Compression 

531 array([[-1, 2, 3, -6, 1], 

532 [ 2, -6, -5, -1, -3]]) 

533 >>> to_compress = np.array([[3, 9, 7, 0, 1, 5], 

534 [7, 7, 1, 9, 7, 3], 

535 [9, 2, 2, 3, 1, 1], 

536 [0, 3, 5, 0, 4, 3], 

537 [5, 7, 7, 1, 9, 1], 

538 [7, 2, 1, 1, 3, 0]]) 

539 >>> rebin(to_compress, (3,3)) # Compression 

540 array([[6, 4, 4], 

541 [3, 2, 2], 

542 [5, 2, 3]]) 

543 

544 References 

545 ---------- 

546 [1] https://stackoverflow.com/a/8090605 

547 

548 See Also 

549 -------- 

550 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation 

551 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation 

552 """ 

553 old_shape = a.shape 

554 # Prevent more processing than needed if we want the same shape 

555 if old_shape == shape: 

556 return a 

557 if len(old_shape) == 1: 

558 old_shape = (1, old_shape[0]) 

559 # Sample the original array using rebin 

560 if sample: 

561 # If its a 1D array then... 

562 if old_shape[0] == 1: 

563 if shape[1] > old_shape[1]: 

564 rebinned = np.repeat(a, shape[1] // old_shape[1], axis=0) 

565 else: 

566 rebinned = a[:: old_shape[1] // shape[1]] 

567 # Assume its a 2D array 

568 else: 

569 # Do the Rows first 

570 if shape[0] >= old_shape[0]: 

571 # Expand Rows 

572 rebinned = np.repeat(a, shape[0] // old_shape[0], axis=0) 

573 else: 

574 # Compress Rows 

575 rebinned = a[:: old_shape[0] // shape[0], :] 

576 # Then do the columns 

577 if shape[1] >= old_shape[1]: 

578 # Expand Columns 

579 rebinned = np.repeat(rebinned, shape[1] // old_shape[1], axis=1) 

580 else: 

581 # Compress columns 

582 rebinned = rebinned[:, :: old_shape[1] // shape[1]] 

583 # Return the rebinned without adjusting dtype as none of the functions above change it 

584 return rebinned 

585 

586 # If we are downsizing 

587 elif shape[0] < old_shape[0] or shape[1] < old_shape[1]: 

588 if (max(old_shape[0], shape[0]) % min(old_shape[0], shape[0]) != 0) or ( 588 ↛ 591line 588 didn't jump to line 591 because the condition on line 588 was never true

589 max(old_shape[1], shape[1]) % min(old_shape[1], shape[1]) != 0 

590 ): 

591 raise ValueError("Your new shape should be a factor of the original shape") 

592 # If we are increasing the rows or columns and reducing the other, increase them now and change the old shape 

593 if shape[0] > old_shape[0]: 

594 a = np.tile(a, (shape[0], 1)) 

595 old_shape = a.shape 

596 elif shape[1] > old_shape[1]: 

597 a = np.tile(a, (1, shape[1])) 

598 old_shape = a.shape 

599 # Create the shape we need (rows, rows that can fit in old_shape, cols, cols that can fit into old_shape) 

600 sh = shape[0], old_shape[0] // shape[0], shape[1], old_shape[1] // shape[1] 

601 # Create the 4D array 

602 rebinned = np.reshape(a, sh) 

603 # Get the average of the columns first 

604 rebinned = rebinned.mean(-1) 

605 # To ensure that we get the result same as IDL 

606 # it seems to fix the values after every calculation if integer 

607 if np.issubdtype(a.dtype, np.integer): 

608 rebinned = np.fix(rebinned).astype("int") 

609 # Now get it for the rows 

610 rebinned = rebinned.mean(1) 

611 # If we are expecting 1D array ensure it gets returned as a 1D array 

612 if shape[0] == 1: 

613 rebinned = rebinned[0] 

614 # To ensure that we get the result same as IDL 

615 # it seems to fix the values after every calculation if integer 

616 if np.issubdtype(a.dtype, np.integer): 

617 rebinned = np.fix(rebinned).astype("int") 

618 

619 # Otherwise we are expanding 

620 else: 

621 if shape[0] % old_shape[0] != 0 or shape[1] % old_shape[1] != 0: 621 ↛ 622line 621 didn't jump to line 622 because the condition on line 621 was never true

622 raise ValueError( 

623 "Your new shape should be a multiple of the original shape" 

624 ) 

625 # Get the size changes of the row and column separately 

626 row_sizer = shape[0] // old_shape[0] 

627 col_sizer = shape[1] // old_shape[1] 

628 ax = 0 

629 # If 1D array then do along the columns 

630 if old_shape[0] == 1: 

631 rebinned = rebin_columns(a, ax, shape, col_sizer) 

632 if shape[0] == 1: 

633 rebinned = rebinned[0] 

634 # Else its a 2D array 

635 else: 

636 row_rebinned = rebin_rows(a, ax, shape, old_shape, row_sizer) 

637 # If it matches the new shape, then return it 

638 if row_rebinned.shape == shape: 

639 return row_rebinned 

640 else: 

641 ax = 1 

642 rebinned = rebin_columns(row_rebinned, ax, shape, col_sizer) 

643 return rebinned 

644 

645 

646def weight_invert( 

647 weights: ( 

648 NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number 

649 ), 

650 threshold: float | None = None, 

651 use_abs: bool = False, 

652) -> NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number: 

653 """ 

654 The weights invert function cleans the weights given by removing 

655 the values that are 0, NaN or Inf ready for additional calculation. 

656 If a threshold is set, then we check the values that match the threshold 

657 instead of checking for zeros. 

658 

659 Parameters 

660 ---------- 

661 weights: NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number 

662 An array of values of some dtype 

663 threshold: float | None, optional 

664 A real number set as the threshold for the array. 

665 By default its set to None, in this case function checks 

666 for zeros, by default None 

667 use_abs: bool, optional 

668 If True, take the absolute value (sometimes useful for complex numbers) 

669 By default this is False, so will leave as a complex number and invert, by default False 

670 

671 Returns 

672 ------- 

673 result: NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number 

674 The weights array that has had NaNs and Infinities removed, and zeros OR 

675 values that don't meet the threshold. 

676 """ 

677 # IDL is able to treat one number as an array (because every number is aprrently an array of size 1?) 

678 # As such we need to check if it's a number less than or equal to 0 and make a zeros array of size 1 

679 if np.isscalar(weights): 

680 result = np.zeros(1, dtype=type(weights)) 

681 weights = np.array([weights], dtype=type(weights)) 

682 else: 

683 result = np.zeros_like(weights, dtype=weights.dtype) 

684 weights_use = weights 

685 if use_abs or np.iscomplexobj(weights_use): 

686 """ 

687 Python and IDL use the where function on complex numbers differently. 

688 On Python, if you apply a real threshold, it applies to only the real numbers, 

689 and if you apply an imaginary threshold it applies to only imaginary numbers. 

690 For example Python: 

691 test = np.array([1j, 2 + 2j, 3j]) 

692 np.where(test >= 2) == array([1]) 

693 np.where(test >= 2j) == array([1,2]) 

694 Meanwhile in IDL: 

695 test = COMPLEX([0,2,0],[1,2,3]) ;COMPLEX(REAL, IMAGINARY) 

696 where(test ge 2) == [1, 2] 

697 where(test ge COMPLEX(0,2)) == [1, 2] 

698 

699 IDL on the otherhand, uses the ABS function on COMPLEX numbers before using WHERE. 

700 Hence the behaviour we're seeing above. This is why we also check for a complexobj 

701 in the if statement 

702 """ 

703 weights_use = np.abs(weights) 

704 

705 # If threshold has been set then... 

706 if threshold is not None: 

707 # Get the indexes which meet the threshold 

708 # As indicated before IDL applies abs before using where to complex numbers 

709 i_use = np.where(weights_use >= threshold) 

710 else: 

711 # Otherwise get where they are not zero 

712 i_use = np.nonzero(weights_use) 

713 

714 if np.size(i_use) > 0: 

715 result[i_use] = 1 / weights[i_use] 

716 

717 # Replace all NaNs with Zeros 

718 if np.size(np.where(np.isnan(result))) != 0: 

719 result[np.where(np.isnan(result))] = 0 

720 # Replace all Infinities with Zeros 

721 if np.size(np.where(np.isinf(result))) != 0: 721 ↛ 722line 721 didn't jump to line 722 because the condition on line 721 was never true

722 result[np.where(np.isinf(result))] = 0 

723 

724 # If the result is an array containing 1 result, then return the result, not an array 

725 if np.size(result) == 1: 

726 return result[0] 

727 return result 

728 

729 

730def array_match( 

731 array_1: NDArray[np.integer | np.floating | np.complexfloating], 

732 value_match: NDArray[np.integer | np.floating | np.complexfloating], 

733 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None = None, 

734) -> NDArray[np.int64]: 

735 """ 

736 Find the indices of the input array which match the array of values provided. 

737 If a second input array is provided, then find the indices where the values 

738 provided match in both input arrays. This matching can only be done between 

739 integer values. 

740 

741 Parameters 

742 ---------- 

743 array_1: NDArray[np.integer | np.floating | np.complexfloating] 

744 The input array to match the values in 

745 value_match: NDArray[np.integer | np.floating | np.complexfloating] 

746 The values to find in the input array 

747 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None, optional 

748 A second array to match the values in via a logical AND, by default is None 

749 

750 Returns 

751 ------- 

752 match_indices: NDArray[np.int64] 

753 The indices of the input array(s) which match the values provided 

754 

755 Raises 

756 ------ 

757 ValueError 

758 Gets raised if value_match is None 

759 """ 

760 

761 if value_match is None: 761 ↛ 762line 761 didn't jump to line 762 because the condition on line 761 was never true

762 raise ValueError("Value Match Should be a value not None") 

763 # If array_2 has been supplied, compare which mins and maxes to use based on two arrays 

764 if array_2 is not None and np.size(array_2) > 0: 

765 min_use = np.min([np.min(array_1), np.min(array_2)]) 

766 max_use = np.max([np.max(array_1), np.max(array_2)]) 

767 # Also compute the histogram for array_2 

768 hist2, _, ri2 = histogram(array_2, min=min_use, max=max_use) 

769 else: 

770 # If the second array wasn't supplied 

771 min_use = np.min(array_1) 

772 max_use = np.max(array_1) 

773 # Supply a second hist 

774 hist2 = np.arange(max_use - min_use + 1) 

775 # Get the histogram for the first 

776 hist1, _, ri1 = histogram(array_1, min=min_use, max=max_use) 

777 # Arrays should be the same size, does addition 

778 hist_combined = hist1 + hist2 

779 bins = np.where(hist_combined > 0) 

780 

781 # Select the values to be used 

782 hist_v1, bins_v1, _ = histogram(bins + min_use) 

783 omin = bins_v1[0] 

784 omax = bins_v1[-1] 

785 hist_v2, _, _ = histogram(value_match, min=omin, max=omax) 

786 vals = np.nonzero(np.bitwise_and(hist_v1, hist_v2))[0] + omin - min_use 

787 n_match = vals.size 

788 

789 if n_match == 0: 

790 return -1 

791 

792 ind_arr = np.zeros_like(array_1) 

793 for vi in range(n_match): 

794 i = vals[vi] 

795 if hist1[i] > 0: 795 ↛ 797line 795 didn't jump to line 797 because the condition on line 795 was always true

796 ind_arr[ri1[ri1[i] : ri1[i + 1]]] = 1 

797 if hist2[i] > 0: 

798 ind_arr[ri2[ri2[i] : ri2[i + 1]]] = 1 

799 

800 match_indices = np.nonzero(ind_arr)[0] 

801 # Return our matching indices 

802 return match_indices 

803 

804 

805def meshgrid( 

806 dimension: int, elements: int, axis: int | None = None, return_integer: bool = False 

807) -> NDArray[np.int64 | np.float64]: 

808 """ 

809 Generates a 2D array of X or Y values. Could be replaced by a another function 

810 

811 Parameters 

812 ---------- 

813 dimension: int 

814 Sets the column size of the array to return 

815 elements: int 

816 Sets the row size of the array to return 

817 axis = int | None, optional 

818 Set axis = 1 for X values, set axis = 2 for Y values, by default is None 

819 return integer: bool, optional 

820 dtype is implied by dimension and/or elements 

821 by default. If True, sets return array to int, by default is False 

822 

823 Returns 

824 ------- 

825 result: NDArray[np.int64 | np.float64] 

826 A numpy array of shape (elements, dimension) that is 

827 a modified np.arange(elements * dimension). 

828 """ 

829 if axis is None: 

830 axis = elements 

831 elements = dimension 

832 # If elements is set as 2 

833 if axis == 2: 

834 # Replicate LINDGEN by doing arange and resizing, result is floored 

835 result = np.reshape( 

836 np.floor(np.arange(elements * dimension) / dimension), (elements, dimension) 

837 ) 

838 else: 

839 # Replicate LINDGEN by doing arange and resizing 

840 result = np.reshape( 

841 np.arange(elements * dimension) % dimension, (elements, dimension) 

842 ) 

843 # If we need to return a float, then set the result and return 

844 if return_integer: 

845 return result.astype("int") 

846 # Otherwise return as is 

847 else: 

848 return result 

849 

850 

851def deriv_coefficients(n: int, divide_factorial: bool = False) -> NDArray[np.float64]: 

852 """ 

853 Computes an array of coefficients resulting in taking the 

854 n-th derivative of a function of the form x^a (a must not 

855 be a positive integer less than n) 

856 

857 Parameters 

858 ---------- 

859 n: int 

860 Decides the length of coefficients 

861 divide_factorial: bool, optional 

862 Determine if we need to divide by the factorial, by default is False 

863 

864 Returns 

865 ------- 

866 coeff: NDArray[np.float64] 

867 An array of coefficients 

868 """ 

869 if n <= 0: 

870 return 0 

871 # Set up the array 

872 coeff = np.zeros(n) 

873 # Set the first coefficient to 1 

874 coeff[0] = 1 

875 # For every coefficient 

876 for m in range(1, n): 

877 # Had to do m + 1 for the range as IDL coeff[1:1] == coeff[1], but Python coeff[1:1] == array([]) 

878 coeff[1 : m + 1] += -m * coeff[0:m] 

879 # If we are to divide by the factorial do that to each coefficient 

880 if divide_factorial: 

881 for m in range(n): 

882 coeff[m] /= factorial(m + 1) 

883 

884 # Return coefficients 

885 return coeff 

886 

887 

888def idl_argunique( 

889 arr: NDArray[np.integer | np.floating | np.complexfloating], 

890) -> NDArray[np.int64]: 

891 """ 

892 In IDL the UNIQ function returns the indexes of the unique values within 

893 an array (that is assumed to be sorted beforehand). In NumPy they use the first 

894 unique index when using return index, where as in IDL they use the last index. 

895 

896 Parameters 

897 ---------- 

898 arr : NDArray[np.integer | np.floating | np.complexfloating] 

899 A sorted numpy array of any type. 

900 

901 Returns 

902 ------- 

903 NDArray[np.int64] 

904 An array containing indexes of the last occurence of the unique value in arr 

905 

906 Examples 

907 -------- 

908 >>> test = np.array([10, 30, 5, 100, 200, 75, 200, 100, 30]) 

909 >>> idl_argunique(test) 

910 array([0, 1, 3, 4, 6, 8]) 

911 """ 

912 return np.searchsorted(arr, np.unique(arr), side="right") - 1 

913 

914 

915def angle_difference( 

916 ra1: float, 

917 dec1: float, 

918 ra2: float, 

919 dec2: float, 

920 degree: bool = False, 

921 nearest: bool = False, 

922) -> float: 

923 """ 

924 Calculates the angle difference between two given celestial coordinates. 

925 

926 Parameters 

927 ---------- 

928 ra1 : float 

929 Right Ascension of a coordinate 1 in radians or degrees 

930 dec1 : float 

931 Declination of a coordinate 1 in radians or degrees 

932 ra2 : float 

933 Right Ascension of a coordinate 2 in radians or degrees 

934 dec2 : float 

935 Declination of a coordinate 2 in radians or degrees 

936 degree : bool, optional 

937 If True, then we treate the coordinates given as degrees, by default False 

938 nearest : bool, optional 

939 Calculate implied angle instead, by default False 

940 

941 Returns 

942 ------- 

943 relative_angle : float 

944 The angle difference in degrees 

945 """ 

946 

947 if degree: 947 ↛ 950line 947 didn't jump to line 950 because the condition on line 947 was always true

948 unit = u.deg 

949 else: 

950 unit = u.rad 

951 coord1 = SkyCoord(ra=ra1 * unit, dec=dec1 * unit) 

952 coord2 = SkyCoord(ra=ra2 * unit, dec=dec2 * unit) 

953 # Use built in astropy separtion instead of calculating it 

954 relative_angle = coord1.separation(coord2).value 

955 if nearest: 

956 return max(relative_angle, 2 * pi - relative_angle) 

957 else: 

958 return relative_angle 

959 

960 

961def parallactic_angle(latitude: float, hour_angle: float, dec: float) -> float: 

962 """ 

963 Calculates the parallactic angle given latitude (usually a declination), hour_angle and another declination 

964 

965 Parameters 

966 ---------- 

967 latitude : float 

968 An angle in degrees, usually a declination in this package 

969 hour_angle : float 

970 The hour angle in degrees 

971 dec : float 

972 A declination in degrees 

973 

974 Returns 

975 ------- 

976 parallactic_angle : float 

977 The angle between the great circle through a celestial object and the zenith, and the hour circle of the object 

978 """ 

979 

980 y_term = np.sin(np.radians(hour_angle)) 

981 x_term = np.cos(np.radians(dec)) * np.tan(np.radians(latitude)) - np.sin( 

982 np.radians(dec) 

983 ) * np.cos(np.radians(hour_angle)) 

984 return np.degrees(np.arctan(y_term / x_term)) 

985 

986 

987def simple_deproject_w_term( 

988 obs: dict, 

989 params: dict, 

990 vis_arr: NDArray[np.complex128], 

991 direction: float, 

992 logger: Logger, 

993) -> NDArray[np.complex128]: 

994 """ 

995 Applies a w-term deprojection to the visibility array 

996 

997 Parameters 

998 ---------- 

999 obs : dict 

1000 The observation data structure 

1001 params : dict 

1002 The data from the UVFITS file 

1003 vis_arr : NDArray[np.complex128] 

1004 The visibility array 

1005 direction : float 

1006 The direction we apply to the phase 

1007 

1008 Returns 

1009 ------- 

1010 vis_arr : NDArray[np.complex128] 

1011 The visibility array with the deprojection applied 

1012 """ 

1013 

1014 icomp = 1j 

1015 zcen = np.outer(obs["baseline_info"]["freq"], params["ww"]) 

1016 sign = 1 if direction > 0 else -1 

1017 phase = np.exp(direction * icomp * zcen) 

1018 for pol_i in range(obs["n_pol"]): 

1019 vis_arr[pol_i, :, :] *= phase 

1020 

1021 sign_str = " +1" if sign > 0 else " -1" 

1022 logger.info(f"Applying simple w-term deprojection:{sign_str}") 

1023 

1024 return vis_arr 

1025 

1026 

1027def resistant_mean( 

1028 array: NDArray[np.integer | np.floating | np.complexfloating], 

1029 deviations: int, 

1030 mad_scale: float = 0.67449999999999999, 

1031 sigma_coeff: NDArray[np.float64] = np.array( 

1032 [ 

1033 0.020142000000000000, 

1034 -0.23583999999999999, 

1035 0.90722999999999998, 

1036 -0.15404999999999999, 

1037 ] 

1038 ), 

1039) -> int | float | complex | np.number: 

1040 """ 

1041 The resistant_mean function translate the IDLAstro function resistant_mean from IDL to Python using NumPy. 

1042 The values mad_scale and sigma_coeff are also retrieved from the same IDLAstro function when running in Double 

1043 Precision Mode. 

1044 

1045 The resistant_mean gets the mean of an array which has had a median absolute deviation threshold applied to the 

1046 absolute deviations of the array to exclude outliers. 

1047 

1048 If resistant_mean needs to be optimized, it can be vectorized easily enough 

1049 

1050 Parameters 

1051 ---------- 

1052 array : NDArray[np.integer | np.floating | np.complexfloating] 

1053 A 1 dimensional array of values, multidimensional arrays should be flattened before use 

1054 deviations : int 

1055 The number of median absolute deviations from the median we want use to exclude outliers 

1056 mad_scale : float, optional 

1057 The scale factor for the median absolute deviation, by default 0.67449999999999999 

1058 sigma_coeff : NDArray[np.float64], optional 

1059 The coefficients applied to the polynomial equation to the standard deviation of the points excluded by the outliers for additional exclusion, by default np.array([0.020142000000000000, -0.23583999999999999 , 0.90722999999999998 , -0.15404999999999999]) 

1060 

1061 Returns 

1062 ------- 

1063 resistant_mean : Number 

1064 The mean of the array with outliers excluded using median absolute deviation 

1065 

1066 References 

1067 ---------- 

1068 .. IDLAstro, RESISTANT_Mean, https://idlastro.gsfc.nasa.gov/ftp/pro/robust/resistant_mean.pro 

1069 """ 

1070 # Calculate median of the real part of the array 

1071 median = np.median(array.real) 

1072 # Get the absolute deviation (residuals) 

1073 abs_dev = np.abs(array - median) 

1074 # Calculate Median Absolute Deviation 

1075 # I could have used scipy's median_abs_deviation to get this, but by doing this manually I can guarantee the same behaviour as IDL 

1076 mad = np.median(abs_dev) / mad_scale 

1077 # Use MAD and the number of deviations 

1078 mad_threshold = deviations * mad 

1079 # Subset the array by the deviations and residuals 

1080 no_outliers = array[np.where(abs_dev <= mad_threshold)] 

1081 # If the deviations is less than 4.5, change the sigma (standard deviation of the subarray) by using a polyval with set sigma coefficient 

1082 # This compensates Sigma for truncation 

1083 # Calculate the standard deviation of the rela and imag separately 

1084 sigma = np.std(no_outliers.real) + np.std(no_outliers.imag) * 1j 

1085 # Set the deviationsX to 1.0 if it's less than 1 

1086 deviationsX = max(deviations, 1.0) 

1087 if deviationsX <= 4.5: 1087 ↛ 1089line 1087 didn't jump to line 1089 because the condition on line 1087 was always true

1088 sigma = sigma / np.polyval(sigma_coeff, deviationsX) 

1089 sigma_threshold = sigma * deviations 

1090 # Use the sigma threshold to again remove outliers from the array 

1091 # Also take the absolute value of sigma_threshold to get the same behaviour as LE in IDL 

1092 subarray = array[np.where(abs_dev <= np.abs(sigma_threshold))] 

1093 # Get the mean of the subset array which contains no outliers 

1094 return np.mean(subarray) 

1095 

1096 

1097def run_command(cmd: str, dry_run=False): 

1098 """ 

1099 Runs the command string `cmd` using `subprocess.run`. Returns any text output to stdout 

1100 

1101 Parameters 

1102 ---------- 

1103 cmd : str 

1104 The command to run on the command line 

1105 dry_run : bool 

1106 If True, don't actually run the command. Defaults to False (so defaults to running the command) 

1107 """ 

1108 

1109 if dry_run: 

1110 stdout = "This was a dry run, not launching IDL code\n" 

1111 else: 

1112 stdout = subprocess.run(cmd.split(), stdout=subprocess.PIPE, text=True).stdout 

1113 

1114 return stdout 

1115 

1116 

1117def vis_weights_update( 

1118 vis_weights: NDArray[np.float64], obs: dict, psf: dict | h5py.File, params: dict 

1119) -> tuple[NDArray[np.float64], dict]: 

1120 """ 

1121 Update the visibility weights array to match any updates to the observation 

1122 metadata dictionary, including flagged times, frequencies, tiles, and 

1123 min/max baseline length. 

1124 

1125 Parameters 

1126 ---------- 

1127 vis_weights : NDArray[np.float64] 

1128 Visibility weights array. 

1129 obs : dict 

1130 Observation metadata dictionary 

1131 psf: dict | h5py.File 

1132 Beam dictionary 

1133 params : dict 

1134 Visibility metadata dictionary 

1135 

1136 Returns 

1137 ------- 

1138 vis_weights : NDArray[np.float64] 

1139 Updated vis_weights given the modified observation metadata dictionary 

1140 obs : dict 

1141 The observation metadata dictionary, now containing the correct summary 

1142 statistics of the new flagging 

1143 """ 

1144 kx_arr = params["uu"] / obs["kpix"] 

1145 ky_arr = params["vv"] / obs["kpix"] 

1146 dist_test = np.sqrt(kx_arr**2 + ky_arr**2) * obs["kpix"] 

1147 dist_test = np.outer(obs["baseline_info"]["freq"], dist_test) 

1148 flag_dist_i = np.where( 

1149 (dist_test < obs["min_baseline"]) | (dist_test > obs["max_baseline"]) 

1150 ) 

1151 conj_i = np.where(ky_arr > 0) 

1152 if conj_i[0].size > 0: 1152 ↛ 1155line 1152 didn't jump to line 1155 because the condition on line 1152 was always true

1153 kx_arr[conj_i] = -kx_arr[conj_i] 

1154 ky_arr[conj_i] = -ky_arr[conj_i] 

1155 psf_dim = psf["dim"] 

1156 if isinstance(psf, h5py.File): 1156 ↛ 1157line 1156 didn't jump to line 1157 because the condition on line 1156 was never true

1157 psf_dim = psf_dim[0] 

1158 xcen = np.outer(obs["baseline_info"]["freq"], kx_arr) 

1159 xmin = np.floor(xcen) + obs["dimension"] / 2 - (psf_dim / 2 - 1) 

1160 ycen = np.outer(obs["baseline_info"]["freq"], ky_arr) 

1161 ymin = np.floor(ycen) + obs["elements"] / 2 - (psf_dim / 2 - 1) 

1162 

1163 range_test_x_i = np.where( 

1164 (xmin <= 0) | ((xmin + psf_dim - 1) >= obs["dimension"] - 1) 

1165 ) 

1166 if range_test_x_i[0].size > 0: 1166 ↛ 1169line 1166 didn't jump to line 1169 because the condition on line 1166 was always true

1167 xmin[range_test_x_i] = -1 

1168 ymin[range_test_x_i] = -1 

1169 range_test_y_i = np.where( 

1170 (ymin <= 0) | ((ymin + psf_dim - 1) >= obs["elements"] - 1) 

1171 ) 

1172 if range_test_y_i[0].size > 0: 1172 ↛ 1175line 1172 didn't jump to line 1175 because the condition on line 1172 was always true

1173 xmin[range_test_y_i] = -1 

1174 ymin[range_test_y_i] = -1 

1175 del range_test_x_i 

1176 del range_test_y_i 

1177 

1178 if flag_dist_i[0].size > 0: 1178 ↛ 1183line 1178 didn't jump to line 1183 because the condition on line 1178 was always true

1179 xmin[flag_dist_i] = -1 

1180 ymin[flag_dist_i] = -1 

1181 

1182 # If flag_frequencies is false, freq_use should be all 1's anyway, so this shouldn't affect anything 

1183 freq_cut_i = np.where(obs["baseline_info"]["freq_use"] == 0) 

1184 if freq_cut_i[0].size > 0: 1184 ↛ 1186line 1184 didn't jump to line 1186 because the condition on line 1184 was always true

1185 vis_weights[0 : obs["n_pol"], freq_cut_i[0], :] = 0 

1186 tile_cut_i = np.where(obs["baseline_info"]["tile_use"] == 0) 

1187 if tile_cut_i[0].size > 0: 

1188 bi_cut = array_match( 

1189 obs["baseline_info"]["tile_a"], 

1190 tile_cut_i[0] + 1, 

1191 obs["baseline_info"]["tile_b"], 

1192 ) 

1193 if np.size(bi_cut) > 0: 1193 ↛ 1196line 1193 didn't jump to line 1196 because the condition on line 1193 was always true

1194 vis_weights[0 : obs["n_pol"], :, bi_cut] = 0 

1195 

1196 time_cut_i = np.where(obs["baseline_info"]["time_use"] == 0)[0] 

1197 bin_offset = np.append(obs["baseline_info"]["bin_offset"], kx_arr.size) 

1198 time_bin = np.zeros(kx_arr.size) 

1199 for ti in range(obs["baseline_info"]["time_use"].size): 

1200 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti 

1201 for ti in range(time_cut_i.size): 1201 ↛ 1202line 1201 didn't jump to line 1202 because the loop on line 1201 never started

1202 ti_cut = np.where(time_bin == time_cut_i[ti]) 

1203 if ti_cut[0].size > 0: 

1204 vis_weights[0 : obs["n_pol"], :, ti_cut] = 0 

1205 

1206 flag_i = np.where(vis_weights[0] <= 0) 

1207 flag_i_new = np.where(xmin < 0) 

1208 if flag_i_new[0].size > 0: 1208 ↛ 1210line 1208 didn't jump to line 1210 because the condition on line 1208 was always true

1209 vis_weights[0 : obs["n_pol"], flag_i_new[0], flag_i_new[1]] = 0 

1210 if flag_i[0].size > 0: 1210 ↛ 1214line 1210 didn't jump to line 1214 because the condition on line 1210 was always true

1211 xmin[flag_i] = -1 

1212 ymin[flag_i] = -1 

1213 

1214 if min(np.max(xmin), np.max(ymin)) < 0: 1214 ↛ 1215line 1214 didn't jump to line 1215 because the condition on line 1214 was never true

1215 obs["n_vis"] = 0 

1216 return vis_weights, obs 

1217 

1218 bin_n, _, _ = histogram(xmin + ymin * obs["dimension"], min=0) 

1219 obs["n_vis"] = np.sum(bin_n) 

1220 

1221 obs["n_time_flag"] = np.sum(1 - obs["baseline_info"]["time_use"]) 

1222 obs["n_tile_flag"] = np.sum(1 - obs["baseline_info"]["tile_use"]) 

1223 obs["n_freq_flag"] = np.sum(1 - obs["baseline_info"]["freq_use"]) 

1224 

1225 return vis_weights, obs 

1226 

1227 

1228def split_vis_weights( 

1229 obs: dict, vis_weights: NDArray[np.float64] 

1230) -> tuple[NDArray[np.float64], NDArray[np.int64]]: 

1231 """ 

1232 Separate the indices in the visibility array (data/model/res/weights) into 

1233 interleaved time samples, generally called "even" and "odd" depending on 

1234 whether the time index is even or odd. Interleaved data can be used to generate 

1235 cross power spectra and propagate uncertainties, see eq 2 of Jacobs et al. 2016. 

1236 Ensures that the same flagging is applied to both sets. 

1237 

1238 Parameters 

1239 ---------- 

1240 obs : dict 

1241 Observation metadata dictionary 

1242 vis_weights : NDArray[np.float64] 

1243 Visibility weights array 

1244 

1245 Returns 

1246 ------- 

1247 vis_weights : NDArray[np.float64] 

1248 Full visibility weights array with the same flagging applied 

1249 to both interleaved sets. 

1250 bi_use NDArray[np.int64] 

1251 Baseline index arrays for interleaved time samples, separated 

1252 by "even" and "odd" indices. 

1253 """ 

1254 # Not always used in vis_noise_calc requires this check in other cases 

1255 if obs["n_time"] < 2: 1255 ↛ 1256line 1255 didn't jump to line 1256 because the condition on line 1255 was never true

1256 return vis_weights 

1257 

1258 # Get the number of baselines, should be the last in this case 

1259 nb = vis_weights.shape[-1] 

1260 bin_end = np.zeros(obs["n_time"], dtype=np.int64) 

1261 bin_end[: obs["n_time"] - 1] = ( 

1262 obs["baseline_info"]["bin_offset"][1 : obs["n_time"]] - 1 

1263 ) 

1264 bin_end[-1] = int(nb - 1) 

1265 bin_i = np.full(nb, -1, dtype=np.int64) 

1266 for t_i in range(obs["n_time"] // 2): 

1267 bin_i[obs["baseline_info"]["bin_offset"][t_i] : bin_end[t_i] + 1] = t_i 

1268 

1269 time_start_i = int(np.min(np.nonzero(obs["baseline_info"]["time_use"])[0])) 

1270 nt3 = int(np.floor((obs["n_time"] - time_start_i) / 2) * 2) 

1271 time_use_0 = obs["baseline_info"]["time_use"][time_start_i : time_start_i + nt3 : 2] 

1272 time_use_1 = obs["baseline_info"]["time_use"][ 

1273 time_start_i + 1 : time_start_i + nt3 : 2 

1274 ] 

1275 time_use_01 = time_use_0 * time_use_1 

1276 time_use = np.zeros(obs["baseline_info"]["time_use"].shape, dtype=np.int64) 

1277 time_use[time_start_i : time_start_i + nt3 : 2] = time_use_01 

1278 time_use[time_start_i + 1 : time_start_i + nt3 : 2] = time_use_01 

1279 time_cut_i = np.where(time_use <= 0)[0] 

1280 if time_cut_i.size > 0: 1280 ↛ 1281line 1280 didn't jump to line 1281 because the condition on line 1280 was never true

1281 for cut_i in range(time_cut_i.size): 

1282 bin_i_cut = np.where(bin_i == time_cut_i[cut_i])[0] 

1283 if bin_i_cut.size > 0: 

1284 bin_i[bin_i_cut] = -1 

1285 

1286 bi_use = [np.where(bin_i % 2 == 0)[0], np.where(bin_i % 2 == 1)[0]] 

1287 

1288 # Here we ensure that both even and odd samples are the same size by 

1289 # ensuring both arrays match the smallest size 

1290 if bi_use[0].size < bi_use[1].size: 1290 ↛ 1291line 1290 didn't jump to line 1291 because the condition on line 1290 was never true

1291 bi_use[1] = bi_use[1][0 : bi_use[0].size] 

1292 elif bi_use[1].size < bi_use[0].size: 1292 ↛ 1293line 1292 didn't jump to line 1293 because the condition on line 1292 was never true

1293 bi_use[0] = bi_use[0][0 : bi_use[1].size] 

1294 

1295 for pol_i in range(obs["n_pol"]): 

1296 # In IDL they are doing x > y < w < z 

1297 flag_use = np.minimum( 

1298 np.maximum(vis_weights[pol_i, :, bi_use[0]], 0), 

1299 np.minimum(vis_weights[pol_i, :, bi_use[1]], 1), 

1300 ) 

1301 # Reset vis_weights 

1302 vis_weights[pol_i] = 0 

1303 # No keywords used for odd_only or even_only in entirety of FHD 

1304 vis_weights[pol_i, :, bi_use[0]] = flag_use 

1305 vis_weights[pol_i, :, bi_use[1]] = flag_use 

1306 

1307 return vis_weights, bi_use 

1308 

1309 

1310def vis_noise_calc( 

1311 obs: dict, 

1312 vis_arr: NDArray[np.complex128], 

1313 vis_weights: NDArray[np.float64], 

1314 bi_use: NDArray[np.int64] | None = None, 

1315) -> NDArray[np.float64]: 

1316 """ 

1317 Calculate the noise from the calibrated visibilities by taking 

1318 the difference between the imaginary parts of the interleaved 

1319 time samples. A factor of sqrt(2) is required because there is 

1320 half as many independent noise samples when calculated from just 

1321 the imaginary part. 

1322 

1323 Parameters 

1324 ---------- 

1325 obs : dict 

1326 The observation metadata dictionary 

1327 vis_arr : NDArray[np.complex128] 

1328 The vsisibility array 

1329 vis_weights : NDArray[np.float64] 

1330 The visibility weights array 

1331 bi_use : NDArray[np.int64] | None, optional 

1332 Baseline index arrays for interleaved time samples, separated 

1333 by "even" and "odd" indices, by default None 

1334 

1335 Returns 

1336 ------- 

1337 noise_arr: NDArray[np.float64] 

1338 Calculated variance of the noise as a function of polarisation 

1339 and frequency. 

1340 """ 

1341 noise_arr = np.zeros([obs["n_pol"], obs["n_freq"]]) 

1342 

1343 if obs["n_time"] < 2: 1343 ↛ 1344line 1343 didn't jump to line 1344 because the condition on line 1343 was never true

1344 return noise_arr 

1345 

1346 if bi_use is None: 1346 ↛ 1349line 1346 didn't jump to line 1349 because the condition on line 1346 was always true

1347 vis_weights_use, bi_use = split_vis_weights(obs, vis_weights) 

1348 else: 

1349 vis_weights_use = vis_weights 

1350 

1351 for pol_i in range(obs["n_pol"]): 

1352 data_diff = ( 

1353 vis_arr[pol_i, :, bi_use[0]].imag - vis_arr[pol_i, :, bi_use[1]].imag 

1354 ) 

1355 vis_weight_diff = np.maximum( 

1356 vis_weights_use[pol_i, :, bi_use[0]], 0 

1357 ) * np.maximum(vis_weights_use[pol_i, :, bi_use[1]], 0) 

1358 for fi in range(obs["n_freq"]): 

1359 ind_use = np.where(vis_weight_diff[:, fi])[0] 

1360 if ind_use.size > 0: 

1361 noise_arr[pol_i, fi] = np.std(data_diff[ind_use, fi]) / np.sqrt(2) 

1362 

1363 return noise_arr 

1364 

1365 

1366def idl_median( 

1367 x: NDArray[np.integer | np.floating | np.complexfloating], 

1368 width: int = 0, 

1369 even: bool = False, 

1370) -> float: 

1371 """ 

1372 The IDL Median function doesn't always work as you'd expect, as generally you need to use the 

1373 EVEN keyword to get the median of an even number of elements, otherwise it returns the 

1374 maximum element of the two numbers in the middle of an even sorted array. 

1375 This function replicates the IDL median function, in case you need that functionality. 

1376 Typically though, we recommend using `numpy.median` or `scipy.ndimage.median_filter`. 

1377 

1378 Parameters 

1379 ---------- 

1380 x : NDArray[np.integer | np.floating | np.complexfloating] 

1381 Data to perform median on 

1382 width : int 

1383 If set, perform a type of median filtering. 

1384 even : bool, optional 

1385 In this case it will just run numpy.median, by default False 

1386 

1387 

1388 When `width` is set. unfortunately the edge conditions when using cannot be 

1389 replicated soley with `scipy.ndimage.median_filter` so use `median_filter` and set the edge cases manually 

1390 

1391 Returns 

1392 ------- 

1393 float 

1394 The median of the array 

1395 

1396 See Also 

1397 -------- 

1398 numpy.median: Computes the median of an array 

1399 scipy.ndimage.median_filter: Applies a median filter to an array 

1400 """ 

1401 

1402 if width: 

1403 # IDL median leaves everything within width//2 pixels of the edge alone 

1404 # So just shove the outputs of median_filter everywhere else. None of 

1405 # the `modes` in median_filter capture this behaviour 

1406 output = deepcopy(x) 

1407 

1408 hw = width // 2 

1409 output[hw:-hw] = median_filter(x, size=width)[hw:-hw] 

1410 

1411 return output 

1412 

1413 else: 

1414 if even: 1414 ↛ 1415line 1414 didn't jump to line 1415 because the condition on line 1414 was never true

1415 return np.median(x) 

1416 else: 

1417 med_index = int(np.ceil(len(x) / 2)) 

1418 

1419 return np.sort(x)[med_index] 

1420 

1421 

1422def reshape_and_average_in_time( 

1423 vis_array: NDArray[np.complex128], 

1424 n_freq: int, 

1425 n_time: int, 

1426 n_baselines: int, 

1427 vis_weights: NDArray[np.float64], 

1428) -> NDArray[np.complex128]: 

1429 """Given a single polarisation 2D `vis_array` of shape (n_freq, n_time*n_baselines), 

1430 reshape into (n_freq, n_time, n_baselines), and then average in time, weighting 

1431 by `vis_weights` (must be of shape (n_freq, n_time, n_baselines)) 

1432 Returns the averaged array in shape (n_freq, n_baselines) 

1433 

1434 Parameters 

1435 ---------- 

1436 vis_array : NDArray[np.complex128] 

1437 The visibility array 

1438 n_freq : int 

1439 Number of frequencies 

1440 n_time : int 

1441 Number of time steps 

1442 n_baselines : int 

1443 Number of baselines 

1444 vis_weights : NDArray[np.float64] 

1445 The visibility weights array 

1446 

1447 Returns 

1448 ------- 

1449 reshape_array: NDArray[np.complex128] 

1450 The reshaped visibility array the same shape as the visibility weights array 

1451 """ 

1452 

1453 new_shape = (n_freq, n_time, n_baselines) 

1454 

1455 if vis_weights.shape != new_shape: 1455 ↛ 1456line 1455 didn't jump to line 1456 because the condition on line 1455 was never true

1456 exit( 

1457 f"Attempting to use weights with shape {vis_weights.shape} in `reshape_and_average_in_time`, this is not allowed" 

1458 ) 

1459 

1460 reshape_array = np.reshape(vis_array, new_shape) 

1461 reshape_array = np.sum(reshape_array * vis_weights, axis=1) 

1462 

1463 return reshape_array 

1464 

1465 

1466def region_grow( 

1467 image: NDArray[np.integer | np.floating | np.complexfloating], 

1468 roiPixels: NDArray[np.integer], 

1469 low: int | float | None = None, 

1470 high: int | float | None = None, 

1471) -> NDArray[np.integer | np.floating | np.complexfloating] | None: 

1472 """ 

1473 Replicates IDL's Region Grow, where a region of interest will grow based upon a given threshold 

1474 within a 2D array. It finds all the pixels within the array that are connected neighbors via the 

1475 threshold and blob detection using SciPy's `label` function. In this case, the standard deviation 

1476 form of this function hasn't been implemented as PyFHD will only use this function once with a threshold. 

1477 

1478 If you want to use standard deviation region growing adjusting the function can be done by potentially 

1479 implementing skimage's blob detection algorithms for the labelling and keeping the rest the same. 

1480 

1481 Parameters 

1482 ---------- 

1483 image : NDArray[np.integer | np.floating | np.complexfloating] 

1484 A 2D array of pixels 

1485 roiPixels : NDArray[np.integer] 

1486 The region of interest given as FLAT indexes i.e. array.flat 

1487 low : int | float | None, optional 

1488 The low threshold, any number below this is considered background, 

1489 If left as None, this will be the lowest value of the region of interest, by default None 

1490 high : int | float | None, optional 

1491 The high threshold, any number higher than this is considered background, 

1492 If left as None, this will be the highest value of the region of interest, by default None 

1493 

1494 Returns 

1495 ------- 

1496 growROIPixels: NDArray[np.integer | np.floating | np.complexfloating] | None 

1497 The grown region of interest that has connected neighbours by using the threshold 

1498 

1499 See Also 

1500 -------- 

1501 scipy.ndimage.label: Labels an image based off a given kernel 

1502 

1503 Notes 

1504 ----- 

1505 'scikit-image Blob Detection' : https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_blob.html 

1506 """ 

1507 # Get the roi and set the low and high thresholds if they haven't been so already. 

1508 roi = image.flat[roiPixels] 

1509 if low is None: 1509 ↛ 1510line 1509 didn't jump to line 1510 because the condition on line 1509 was never true

1510 low = np.min(roi) 

1511 if high is None: 1511 ↛ 1512line 1511 didn't jump to line 1512 because the condition on line 1511 was never true

1512 high = np.max(roi) 

1513 # Replace all NaNs with Zeros 

1514 nans = np.isnan(image) 

1515 image[nans] = 0 

1516 # Get all the values that are within the threshold 

1517 threshArray = np.zeros_like(image) 

1518 threshArray[np.where((image >= low) & (image <= high))] = 1 

1519 threshArray[nans] = 0 

1520 # Do binary blob detection with the label function 

1521 labelArray, _ = label(threshArray) 

1522 # Make the edges background as the Region grow IDL does not do the edges 

1523 # of an array 

1524 if labelArray.ndim == 2: 

1525 labelArray[0, :] = 0 

1526 labelArray[:, 0] = 0 

1527 labelArray[-1, :] = 0 

1528 labelArray[:, -1] = 0 

1529 else: 

1530 labelArray[0] = 0 

1531 labelArray[-1] = 0 

1532 # Get the histogram of the labels to ascertain the neighbours we will be interested in 

1533 if np.size(roiPixels) > 1: 

1534 labels, _, _ = histogram(labelArray.flat[roiPixels], min=0) 

1535 labels = np.nonzero(labels != 0)[0] 

1536 nLabels = labels.size 

1537 else: 

1538 nLabels = 1 

1539 labels = labelArray.flat[roiPixels] 

1540 if not isinstance(labels, np.ndarray): 

1541 labels = np.array([labels], dtype=labelArray.dtype) 

1542 # Ignore the first label if it's 0 as it's the background 

1543 if labels[0] == 0: 

1544 nLabels -= 1 

1545 if nLabels > 0: 1545 ↛ 1548line 1545 didn't jump to line 1548 because the condition on line 1545 was always true

1546 labels = labels[1:] 

1547 # The histogram will have a minimum of 1 so we need to take 1 off the labels 

1548 if nLabels: 1548 ↛ 1551line 1548 didn't jump to line 1551 because the condition on line 1548 was always true

1549 labels -= 1 

1550 # Get a histogram of all the labels 

1551 labelHist, _, revInd = histogram(labelArray, min=1) 

1552 # Get the number of pixels we will be growing to 

1553 nPixels = np.sum(labelHist[labels]) if nLabels else 0 

1554 # If we have any pixels to grow, then grow 

1555 if nPixels > 0: 1555 ↛ 1571line 1555 didn't jump to line 1571 because the condition on line 1555 was always true

1556 # Only one label 

1557 if nLabels == 1: 1557 ↛ 1561line 1557 didn't jump to line 1561 because the condition on line 1557 was always true

1558 growROIPixels = revInd[revInd[labels[0]] : revInd[labels[0] + 1]] 

1559 else: 

1560 # Take in all the labels and save all the flat indexes 

1561 growROIPixels = np.empty(nPixels, dtype=np.int64) 

1562 j = 0 

1563 for i in range(nLabels): 

1564 if revInd[labels[i] + 1] <= revInd.size: 

1565 growROIPixels[j : j + labelHist[labels[i]]] = revInd[ 

1566 revInd[labels[i]] : revInd[labels[i] + 1] 

1567 ] 

1568 j = j + labelHist[labels[i]] 

1569 else: 

1570 # Return None if we didn't have anywhere to grow 

1571 growROIPixels = None 

1572 # Return the flat indexes 

1573 return growROIPixels 

1574 

1575 

1576def crosspol_split_real_imaginary( 

1577 image: NDArray[np.complex128], pol_names: list[str] | None = None 

1578) -> tuple[NDArray[np.complex128], list[str] | None]: 

1579 """ 

1580 Reformat the input full polarisation image, containing PP, PQ, QP, and QQ, 

1581 into PP, real(PQ), imaginary(PQ), and QQ. PQ and QP (i.e. XY and YX) 

1582 are complex and conjugate mirrors of one another. To make a understandable 

1583 image, we can plot the real and imaginary parts of PQ separately without 

1584 loss of information. 

1585 

1586 Parameters 

1587 ---------- 

1588 image : NDArray[np.complex128] 

1589 Image array ordered in polarisation by PP, PQ, QP, and QQ. 

1590 pol_names : list[str] | None, optional 

1591 The name of the polarisations, by default None 

1592 

1593 Returns 

1594 ------- 

1595 image : NDArray[np.complex128] 

1596 Image array ordered in polarisation by PP, real(PQ), imaginary(PQ), 

1597 and QQ 

1598 pol_names : list[str]|None] 

1599 New polarisation name array to reflect real(PQ) and imaginary(PQ) 

1600 """ 

1601 crosspol_image: NDArray[np.complex128] = image[2] 

1602 image[2] = crosspol_image.real 

1603 image[3] = crosspol_image.imag 

1604 

1605 if pol_names is not None: 

1606 crosspol_name: str = pol_names[2] 

1607 pol_names[2] = f"{crosspol_name}_real" 

1608 pol_names[3] = f"{crosspol_name}_imag" 

1609 

1610 return image, pol_names