Coverage for PyFHD/pyfhd_tools/pyfhd

1import subprocess

2from copy import deepcopy

3from logging import Logger

4from math import factorial, pi

5from sys import exit

6import h5py

7import numpy as np

8from astropy import units as u

9from astropy.coordinates import SkyCoord

10from numba import njit

11from numpy.typing import ArrayLike, NDArray

12from scipy.ndimage import label, median_filter

15@njit

16def get_bins(

17 min: int | float, max: int | float, bin_size: int

18) -> NDArray[np.float64 | np.int64]:

19 """

20 Calculates the bins for the histogram and reverse indices based on a

21 minimum and maximum value, plus a given bin size. It mirrors IDL's way of

22 calculating the bin edges. In IDL bins seem to be right hand side of the bin open

23 like IDL, even at the end. However, in comparison to NumPy, the last bin is always

24 the maximum value. This does utilize numba and parallelization to make it faster.

26 Parameters

27 ----------

28 min : int | float

29 The minimum chosen for the histogram

30 max : int | float

31 The maximum chosen for the histogram

32 bin_size : int

33 The bin size chosen for the histogram. This histogram always uses bins

34 of equal widths.

36 Returns

37 -------

38 bins: NDArray[np.float64 | np.int64]

39 A NumPy array of all the bins ranging from min (bin[0]) to the max (bin[-1]). The step

40 is bin_size.

42 See Also

43 --------

44 histogram: Calculates the bins, histogram and reverse indices

45 get_hist: Calculates histogram only

46 get_ri: Calculates the reverse indices only

47 """

49 return np.arange(min, max + bin_size, bin_size)

52@njit

53def get_hist(

54 data: NDArray[np.floating | np.integer | np.complexfloating],

55 bins: NDArray[np.float64 | np.int64],

56 min: int | float,

57 max: int | float,

58) -> NDArray[np.int64]:

59 """

60 Calculates the histogram based on the given bins and data, taking into account

61 the minimum and maximum

63 Parameters

64 ----------

65 data : NDArray[np.floating | np.integer | np.complexfloating]

66 A NumPy array that is of one dtype float, int, or complex. Cannot be object

67 bins : NDArray[np.float64 | np.int64]

68 A NumPy array of bins for the histogram

69 min : int | float

70 The minimum set for the data

71 max : int | float

72 The maximum set for the data

74 Returns

75 -------

76 hist: NDArray[np.int64]

77 A histogram of the data using the bins found in the bins array.

79 See Also

80 --------

81 histogram: Calculates the bins, histogram and reverse indices.

82 get_bins: Calculates the bins only

83 get_ri: Calculates the reverse indices only

84 """

86 bin_l = bins.size

87 # Setup the histogram

88 hist = np.zeros(bin_l, dtype=np.int64)

89 # Setup the things required for the indexing

90 n = bin_l - 1

91 bin_min = bins[0]

92 bin_max = bins[-1]

93 # In the case that the bin min and bin max is the same,

94 # then finding the index doesn't matter so set the divider

95 # to 1 (as there is only 1 bin!)

96 if bin_min == bin_max:

97 bin_divider = 1

98 else:

99 bin_divider = bin_max - bin_min

100 # Now loop through the data

101 for idx in range(data.size):

102 # Check if its inside the range we set

103 if data[idx] < min or data[idx] > max:

104 continue

105 # Calculate the index for indices and histogram

106 bin_i = int(n * (data[idx] - bin_min) / bin_divider)

107 # Add to the histogram

108 hist[bin_i] += 1

109 return hist

110

111

112@njit

113def get_ri(

114 data: NDArray[np.floating | np.integer | np.complexfloating],

115 bins: NDArray[np.float64 | np.int64],

116 hist: NDArray[np.int64],

117 min: int | float,

118 max: int | float,

119) -> NDArray[np.int64]:

120 """

121 Calculates the reverse indices of a data and histogram.

122 The function replicates IDL's REVERSE_INDICES keyword within

123 their HISTOGRAM function. The reverse indices array which is returned

124 by this function can be hard to understand at first, I will explain it here

125 and also link JD Smith's famous article on IDL's HISTOGRAM.

126

127 The reverse indices array is two vectors concatenated together. The first vector contains

128 indexes for the second vector, this vector should be the size of bins + 1.

129 The second vector contains indexes from the data itself, and should be the size of data.

130 The justification for having such an array is to quickly make adjustments to certain bins

131 without having to search the array multiple times, thus avoiding multiple O(data.size) loops.

132

133 The first vector indexes contain the starting positions of each bin in the second vector. For example,

134 between the indexes given by first_vector[0] and first_vector[1] of the second vector should be all the

135 indexes of bins[0] from inside the data. So if I wanted to make adjustments to the entire first bin,

136 and only the first bin I can use the reverse indices array, ri to do this. Let's say I wanted to flag

137 all values of bins[0] with -1 for some reason to make them invalid in other calculations with the data,

138 then I could do this:

139

140 `data[ri[ri[0] : ri[1]]] = -1`

141

142 Or more generally

143

144 `data[ri[ri[i] : ri[i + 1]]] = -1`

145

146 Where i is 0 <= i <= bins.size.

147

148 If you wish to gain a better understanding of how this get_ri function works, and the associated

149 histogram function I have created here, please use the link given in the Notes section. This

150 link will take you JD Smith's article on IDL's HISTOGRAM, it is an article which explains the

151 IDL HISTOGRAM function better than IDL's own documentation. If you must gain a deeper understanding,

152 read it once, gasp and get your shocks and many cries of why out of your system, then read it again.

153 And keep reading it till you understand, as per the editor's note on the article:

154

155 "...If you read it enough, the secrets of the command will be revealed to you. Stranger things have happened"

156

157 Parameters

158 ----------

159 data : NDArray[np.floating | np.integer | np.complexfloating]

160 A NumPy array of the data

161 bins : NDArray[np.float64 | np.int64]

162 A NumPy array containing the bins for the histogram

163 hist : NDArray[np.int64]

164 A NumPy array containing the histogram

165 min : int | float

166 The minimum for the dataset

167 max : int | float

168 The maximum for the dataset

169

170 Returns

171 -------

172 ri : NDArray[np.int64]

173 An array containing the reverse indices, which is two vectors, the first vector containing indexes for the second vector.

174 The second vector contains indexes for the data.

175

176 See Also

177 ---------

178 histogram: Calculates the bins, histogram and reverse indices.

179 get_bins: Calculates the bins only

180 get_hist: Calculates histogram only

181

182 Notes

183 ------

184 'HISTOGRAM: The Breathless Horror and Disgust' : http://www.idlcoyote.com/tips/histogram_tutorial.html

185 """

186

187 bin_l = bins.size

188 # Setup the first vector

189 data_idx = bin_l + 1

190 first_v = [bin_l + 1]

191 # Add the bins

192 for bin in hist:

193 data_idx += bin

194 first_v.append(data_idx)

195 # Setup the reverse indices

196 ri = np.zeros(bin_l + 1 + data.size, dtype=np.int64)

197 ri[0 : bin_l + 1] = first_v

198 # Create a tracking array to keep track of where we are in the data indexing

199 tracker = np.array(first_v)

200 # Setup the things required for the indexing

201 n = bin_l - 1

202 bin_min = bins[0]

203 bin_max = bins[-1]

204 if bin_min == bin_max:

205 bin_divider = 1

206 else:

207 bin_divider = bin_max - bin_min

208 # Setup counter to remove elements in case of min or max being set

209 counter = 0

210 for idx in range(data.size):

211 # Check if its inside the range we set

212 if data[idx] < min or data[idx] > max:

213 counter += 1

214 continue

215 # Calculate the index for indices and histogram

216 bin_i = int(n * (data[idx] - bin_min) / bin_divider)

217 # Record the current index and update the tracking array

218 ri[tracker[bin_i]] = idx

219 tracker[bin_i] += 1

220 ri = ri[: ri.size - counter]

221 return ri

222

223

224def histogram(

225 data: NDArray[np.floating | np.integer | np.complexfloating],

226 bin_size: int = 1,

227 num_bins: int | None = None,

228 min: int | float | None = None,

229 max: int | float | None = None,

230) -> tuple[NDArray[np.int64], NDArray[np.float64 | np.int64], NDArray[np.int64]]:

231 """

232 The histogram function combines the use of the get_bins, get_hist and get_ri

233 functions into one function. For the descriptions and docs of those functions

234 look in See Also. This function will return the histogram, bin/bin_edges and

235 the reverse indices.

236

237 Parameters

238 ----------

239 data : NDArray[np.floating | np.integer | np.complexfloating]

240 A NumPy array containing the data we want a histogram of

241 bin_size : int, optional

242 Sets the bin size for this histogram, by default 1

243 num_bins : int | None, optional

244 Set the number of bins this does override bin_size completely, by default None

245 min : int | float | None, optional

246 Set a minimum for the dataset, by default None

247 max : int | float | None, optional

248 Set a maximum for the dataset, by default None

249

250 Returns

251 -------

252 hist : NDArray[np.int64]

253 The histogram of the data

254 bins : NDArray[np.float64 | np.int64]

255 The bins of the histogram

256 ri : NDArray[np.int64]

257 The reverse indices array for the histogram and data

258

259 See Also

260 --------

261 get_bins: Calculates the bins only

262 get_hist: Calculates histogram only

263 get_ri: Calculates the reverse indices only

264 """

265

266 # If the minimum has not been set, set it

267 if min is None:

268 min = np.min(data)

269 # If the maximum has not been set, set it

270 # Check if the max argument was used, set to True, if we set max here by data turn it off.

271 if max is None:

272 max = np.max(data)

273 # If the number of bins has been set use that

274 if num_bins is not None:

275 bin_size = (max - min) / num_bins

276 # Need to add checks if max is below min or max below min

277 if min > max: 277 ↛ 278line 277 didn't jump to line 278 because the condition on line 277 was never true

278 max = min

279 if max < min: 279 ↛ 280line 279 didn't jump to line 280 because the condition on line 279 was never true

280 min = max

281 # IDL uses the bin_size as equal throughout min to max

282 bins = get_bins(min, max, bin_size)

283 # However, if we set a max, we must adjust the last bin to max according to IDL specifications

284 # And we only do this in the case max was by an argument

285 if (bins[-1] > max) or num_bins is not None:

286 bins = bins[:-1]

287 # Flatten the data

288 data_flat = data.flatten()

289 # Get the histogram

290 hist = get_hist(data_flat, bins, min, max)

291 # Get the reverse indices

292 ri = get_ri(data_flat, bins, hist, min, max)

293 # Return

294 return hist, bins, ri

295

296

297def l_m_n(

298 obs: dict,

299 psf: dict | h5py.File,

300 obsdec: float | None = None,

301 obsra: float | None = None,

302 declination_arr: NDArray[np.floating] | None = None,

303 right_ascension_arr: NDArray[np.floating] | None = None,

304) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:

305 """

306 Calculate the directional cosines l,m,n given an RA, Dec, and phase centre for

307 each pixel in the input arrays given, or from the hyperresolved RA/Dec array

308 created during beam creation.

309

310 Parameters

311 ----------

312 obs: dict

313 Observation metadata dictionary.

314 psf: dict | h5py.File

315 Beam dictionary.

316 obsdec: float | None, optional

317 The Dec phase centre for the observation, by default None and set to

318 the phase centre saved in the observation metadata dicitonary

319 (obs['obsdec'])

320 obsra: float | None, optional

321 The RA phase centre for the observation, by default None and set to

322 the phase centre saved in the observation metadata dicitonary

323 (obs['obsra'])

324 declination_arr: NDArray[np.floating] | None, optional

325 An array of declinations, by default None and set to the hyperresolved Dec

326 array in psf['image_info']['dec_arr']

327 right_ascension_arr: NDArray[np.floating] | None, optional

328 An array of right ascensions, by default None and set to the hyperresolved RA

329 array in psf['image_info']['ra_arr']

330

331 Returns

332 -------

333 l_mode : NDArray[np.float64]

334 Directional cosine l array, or the cosine of the vector that contributes

335 to the x-axis

336 m_mode : NDArray[np.float64]

337 Directional cosine m array, or the cosine of the vector that contributes

338 to the y-axis

339 n_tracked : NDArray[np.float64]

340 Directional cosine n array, or the cosine of the vector that contributes

341 to the z-axis, dependent on the phase centre.

342 """

343 # If the variables passed through are None them

344 if obsdec is None:

345 obsdec = obs["obsdec"]

346 if obsra is None:

347 obsra = obs["obsra"]

348 if declination_arr is None:

349 declination_arr = psf["image_info"]["dec_arr"]

350 if right_ascension_arr is None:

351 right_ascension_arr = psf["image_info"]["ra_arr"]

352

353 # Convert all the degrees given into radians

354 obsdec = np.radians(obsdec)

355 obsra = np.radians(obsra)

356 declination_arr = np.radians(declination_arr)

357 right_ascension_arr = np.radians(right_ascension_arr)

358

359 # Calculate l mode, m mode and the phase-tracked n mode of pixel centers

360 cdec0 = np.cos(obsdec)

361 sdec0 = np.sin(obsdec)

362 cdec = np.cos(declination_arr)

363 sdec = np.sin(declination_arr)

364 cdra = np.cos(right_ascension_arr - obsra)

365 sdra = np.sin(right_ascension_arr - obsra)

366 l_mode = sdra * cdec

367 m_mode = cdec0 * sdec - cdec * sdec0 * cdra

368 # n=1 at phase center, so reference from there for phase tracking

369 n_tracked = (sdec * sdec0 + cdec * cdec0 * cdra) - 1

370

371 # find any NaN values

372 nan_vals = np.where(np.isnan(n_tracked))

373 # If any found, replace them with 0's

374 if np.size(nan_vals) > 0: 374 ↛ 380line 374 didn't jump to line 380 because the condition on line 374 was always true

375 n_tracked[nan_vals] = 0

376 l_mode[nan_vals] = 0

377 m_mode[nan_vals] = 0

378

379 # Return the modes

380 return l_mode, m_mode, n_tracked

381

382

383def rebin_columns(

384 a: NDArray[np.integer | np.floating | np.complexfloating],

385 ax: int,

386 shape: tuple,

387 col_sizer: int,

388) -> NDArray[np.integer | np.floating | np.complexfloating]:

389 """

390 Performs expansion on the columns of a 1D or 2D array using interpolation to fill in the values that are created

391 by expanding in the space between existing values. This function assumes the rows have already been expanded

392 to the required number.

393

394 Parameters

395 ----------

396 a : NDArray[np.integer | np.floating | np.complexfloating]

397 An array to be expanded

398 ax : int

399 The axis we're expanding, almost always set to 1

400 shape : tuple

401 The shape of the original array

402 col_sizer : int

403 The number of columns we're adding in

404

405 Returns

406 -------

407 rebinned : NDArray[np.integer | np.floating | np.complexfloating]

408 An interpolated array of a containing shape[1] * col_sizer columns

409

410 See Also

411 --------

412 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation

413 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape

414 """

415 # tile the range of col_sizer

416 tiles = np.tile(np.arange(col_sizer), (shape[0], shape[1] // col_sizer - 1))

417 # Get the differences between the columns

418 differences = np.diff(a, axis=ax) / col_sizer

419 # Multiply this by the tiles

420 inferences_non_pad = np.repeat(differences, col_sizer, axis=ax) * tiles

421 # Pad the zeros for the last two rows, and remove the extra zeros to make inferences same shape as desired shape

422 inferences = np.pad(inferences_non_pad, (0, col_sizer))[:-col_sizer]

423 if np.issubdtype(a.dtype, np.integer):

424 inferences = np.floor(inferences).astype("int")

425 # Now get our final array by adding the repeat of our rows rebinned to the inferences

426 rebinned = inferences + np.repeat(a, col_sizer, axis=ax)

427 return rebinned

428

429

430def rebin_rows(

431 a: NDArray[np.integer | np.floating | np.complexfloating],

432 ax: int,

433 shape: tuple,

434 old_shape: tuple,

435 row_sizer: int,

436) -> NDArray[np.integer | np.floating | np.complexfloating]:

437 """

438 Performs expansion on the rows of array `a` to the number of rows in shape[0] using interpolation to fill between any

439 new values added when adding new rows between existing values.

440

441 Parameters

442 ----------

443 a : NDArray[np.integer | np.floating | np.complexfloating]

444 The array to be rebinned

445 ax : int

446 The axis we're expanding, almost always set to 0

447 shape : tuple

448 The new shape we're expanding to

449 old_shape : tuple

450 The shape of the array a

451 row_sizer : int

452 The number of rows we're expanding by

453

454 Returns

455 -------

456 row_rebinned : NDArray[np.integer | np.floating | np.complexfloating]

457 The interpolated array with row_sizer extra columns between existing rows

458

459 See Also

460 --------

461 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation

462 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape

463 """

464 # Tile the range of row_sizer

465 tiles = np.tile(

466 np.array_split(np.arange(row_sizer), row_sizer),

467 ((shape[0] - row_sizer) // row_sizer, old_shape[1]),

468 )

469 # Get the differences between values

470 differences = np.diff(a, axis=ax) / row_sizer

471 # Multiply differences array by tiles to get desired bins

472 inferences_non_pad = np.repeat(differences, row_sizer, axis=ax) * tiles

473 # Pad the inferences to get the same shape as above

474 inferences = np.pad(inferences_non_pad, (0, row_sizer))[:, :-row_sizer]

475 if np.issubdtype(a.dtype, np.integer):

476 inferences = np.floor(inferences).astype("int")

477 # Add this to the original array that has been repeated to match the size of inference

478 row_rebinned = inferences + np.repeat(a, row_sizer, axis=ax)

479 return row_rebinned

480

481

482def rebin(

483 a: NDArray[np.integer | np.floating | np.complexfloating],

484 shape: ArrayLike,

485 sample: bool = False,

486) -> NDArray[np.integer | np.floating | np.complexfloating]:

487 """

488 Resizes a 2D array by averaging or repeating elements, new dimensions must be integral factors of original dimensions.

489

490 In the case of expanding an existing array, rebin will interpolate between the original values with a linear function.

491 In the case of compressing an existing array, rebin will average

492

493 Parameters

494 ----------

495 a : NDArray[np.integer | np.floating | np.complexfloating]

496 Input array.

497 new_shape : ArrayLike

498 Shape of the output array in (rows, columns)

499 Must be a factor or multiple of a.shape

500 sample: bool, optional

501 Use this to get samples using rebin, rather than interpolation, by default False.

502

503 Returns

504 -------

505 rebinned : NDArray[np.integer | np.floating | np.complexfloating]

506 If the new shape is smaller of the input array, the data are averaged,

507 if the new shape is bigger array elements are repeated and interpolated

508

509 Examples

510 --------

511 >>> test = np.array([0,10,20,30])

512 >>> rebin(test, (1,8)) # Expand Columns

513 array([ 0, 5, 10, 15, 20, 25, 30, 30])

514 >>> rebin(test, (2,8)) # Expand Rows and Columns

515 array([[ 0, 5, 10, 15, 20, 25, 30, 30],

516 [ 0, 5, 10, 15, 20, 25, 30, 30]])

517 >>> data = np.array([[ -5, 4, 2, -8, 1],

518 [ 3, 0, 5, -5, 1],

519 [ 6, -7, 4, -4, -8],

520 [ -1, -5, -14, 2, 1]])

521 >>> rebin(data, (8,10)) # 2D Array example

522 array([[ -5, -1, 4, 3, 2, -3, -8, -4, 1, 1],

523 [ -1, 0, 2, 2, 3, -2, -7, -3, 1, 1],

524 [ 3, 1, 0, 2, 5, 0, -5, -2, 1, 1],

525 [ 4, 0, -4, 0, 4, -1, -5, -5, -4, -4],

526 [ 6, -1, -7, -2, 4, 0, -4, -6, -8, -8],

527 [ 2, -2, -6, -6, -5, -3, -1, -3, -4, -4],

528 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1],

529 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1]])

530 >>> rebin(data, (2,5)) # Compression

531 array([[-1, 2, 3, -6, 1],

532 [ 2, -6, -5, -1, -3]])

533 >>> to_compress = np.array([[3, 9, 7, 0, 1, 5],

534 [7, 7, 1, 9, 7, 3],

535 [9, 2, 2, 3, 1, 1],

536 [0, 3, 5, 0, 4, 3],

537 [5, 7, 7, 1, 9, 1],

538 [7, 2, 1, 1, 3, 0]])

539 >>> rebin(to_compress, (3,3)) # Compression

540 array([[6, 4, 4],

541 [3, 2, 2],

542 [5, 2, 3]])

543

544 References

545 ----------

546 [1] https://stackoverflow.com/a/8090605

547

548 See Also

549 --------

550 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation

551 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation

552 """

553 old_shape = a.shape

554 # Prevent more processing than needed if we want the same shape

555 if old_shape == shape:

556 return a

557 if len(old_shape) == 1:

558 old_shape = (1, old_shape[0])

559 # Sample the original array using rebin

560 if sample:

561 # If its a 1D array then...

562 if old_shape[0] == 1:

563 if shape[1] > old_shape[1]:

564 rebinned = np.repeat(a, shape[1] // old_shape[1], axis=0)

565 else:

566 rebinned = a[:: old_shape[1] // shape[1]]

567 # Assume its a 2D array

568 else:

569 # Do the Rows first

570 if shape[0] >= old_shape[0]:

571 # Expand Rows

572 rebinned = np.repeat(a, shape[0] // old_shape[0], axis=0)

573 else:

574 # Compress Rows

575 rebinned = a[:: old_shape[0] // shape[0], :]

576 # Then do the columns

577 if shape[1] >= old_shape[1]:

578 # Expand Columns

579 rebinned = np.repeat(rebinned, shape[1] // old_shape[1], axis=1)

580 else:

581 # Compress columns

582 rebinned = rebinned[:, :: old_shape[1] // shape[1]]

583 # Return the rebinned without adjusting dtype as none of the functions above change it

584 return rebinned

585

586 # If we are downsizing

587 elif shape[0] < old_shape[0] or shape[1] < old_shape[1]:

588 if (max(old_shape[0], shape[0]) % min(old_shape[0], shape[0]) != 0) or ( 588 ↛ 591line 588 didn't jump to line 591 because the condition on line 588 was never true

589 max(old_shape[1], shape[1]) % min(old_shape[1], shape[1]) != 0

590 ):

591 raise ValueError("Your new shape should be a factor of the original shape")

592 # If we are increasing the rows or columns and reducing the other, increase them now and change the old shape

593 if shape[0] > old_shape[0]:

594 a = np.tile(a, (shape[0], 1))

595 old_shape = a.shape

596 elif shape[1] > old_shape[1]:

597 a = np.tile(a, (1, shape[1]))

598 old_shape = a.shape

599 # Create the shape we need (rows, rows that can fit in old_shape, cols, cols that can fit into old_shape)

600 sh = shape[0], old_shape[0] // shape[0], shape[1], old_shape[1] // shape[1]

601 # Create the 4D array

602 rebinned = np.reshape(a, sh)

603 # Get the average of the columns first

604 rebinned = rebinned.mean(-1)

605 # To ensure that we get the result same as IDL

606 # it seems to fix the values after every calculation if integer

607 if np.issubdtype(a.dtype, np.integer):

608 rebinned = np.fix(rebinned).astype("int")

609 # Now get it for the rows

610 rebinned = rebinned.mean(1)

611 # If we are expecting 1D array ensure it gets returned as a 1D array

612 if shape[0] == 1:

613 rebinned = rebinned[0]

614 # To ensure that we get the result same as IDL

615 # it seems to fix the values after every calculation if integer

616 if np.issubdtype(a.dtype, np.integer):

617 rebinned = np.fix(rebinned).astype("int")

618

619 # Otherwise we are expanding

620 else:

621 if shape[0] % old_shape[0] != 0 or shape[1] % old_shape[1] != 0: 621 ↛ 622line 621 didn't jump to line 622 because the condition on line 621 was never true

622 raise ValueError(

623 "Your new shape should be a multiple of the original shape"

624 )

625 # Get the size changes of the row and column separately

626 row_sizer = shape[0] // old_shape[0]

627 col_sizer = shape[1] // old_shape[1]

628 ax = 0

629 # If 1D array then do along the columns

630 if old_shape[0] == 1:

631 rebinned = rebin_columns(a, ax, shape, col_sizer)

632 if shape[0] == 1:

633 rebinned = rebinned[0]

634 # Else its a 2D array

635 else:

636 row_rebinned = rebin_rows(a, ax, shape, old_shape, row_sizer)

637 # If it matches the new shape, then return it

638 if row_rebinned.shape == shape:

639 return row_rebinned

640 else:

641 ax = 1

642 rebinned = rebin_columns(row_rebinned, ax, shape, col_sizer)

643 return rebinned

644

645

646def weight_invert(

647 weights: (

649 ),

650 threshold: float | None = None,

651 use_abs: bool = False,

653 """

654 The weights invert function cleans the weights given by removing

655 the values that are 0, NaN or Inf ready for additional calculation.

656 If a threshold is set, then we check the values that match the threshold

657 instead of checking for zeros.

658

659 Parameters

660 ----------

662 An array of values of some dtype

663 threshold: float | None, optional

664 A real number set as the threshold for the array.

665 By default its set to None, in this case function checks

666 for zeros, by default None

667 use_abs: bool, optional

668 If True, take the absolute value (sometimes useful for complex numbers)

669 By default this is False, so will leave as a complex number and invert, by default False

670

671 Returns

672 -------

674 The weights array that has had NaNs and Infinities removed, and zeros OR

675 values that don't meet the threshold.

676 """

677 # IDL is able to treat one number as an array (because every number is aprrently an array of size 1?)

678 # As such we need to check if it's a number less than or equal to 0 and make a zeros array of size 1

679 if np.isscalar(weights):

680 result = np.zeros(1, dtype=type(weights))

681 weights = np.array([weights], dtype=type(weights))

682 else:

683 result = np.zeros_like(weights, dtype=weights.dtype)

684 weights_use = weights

685 if use_abs or np.iscomplexobj(weights_use):

686 """

687 Python and IDL use the where function on complex numbers differently.

688 On Python, if you apply a real threshold, it applies to only the real numbers,

689 and if you apply an imaginary threshold it applies to only imaginary numbers.

690 For example Python:

691 test = np.array([1j, 2 + 2j, 3j])

692 np.where(test >= 2) == array([1])

693 np.where(test >= 2j) == array([1,2])

694 Meanwhile in IDL:

695 test = COMPLEX([0,2,0],[1,2,3]) ;COMPLEX(REAL, IMAGINARY)

696 where(test ge 2) == [1, 2]

697 where(test ge COMPLEX(0,2)) == [1, 2]

698

699 IDL on the otherhand, uses the ABS function on COMPLEX numbers before using WHERE.

700 Hence the behaviour we're seeing above. This is why we also check for a complexobj

701 in the if statement

702 """

703 weights_use = np.abs(weights)

704

705 # If threshold has been set then...

706 if threshold is not None:

707 # Get the indexes which meet the threshold

708 # As indicated before IDL applies abs before using where to complex numbers

709 i_use = np.where(weights_use >= threshold)

710 else:

711 # Otherwise get where they are not zero

712 i_use = np.nonzero(weights_use)

713

714 if np.size(i_use) > 0:

715 result[i_use] = 1 / weights[i_use]

716

717 # Replace all NaNs with Zeros

718 if np.size(np.where(np.isnan(result))) != 0:

719 result[np.where(np.isnan(result))] = 0

720 # Replace all Infinities with Zeros

721 if np.size(np.where(np.isinf(result))) != 0: 721 ↛ 722line 721 didn't jump to line 722 because the condition on line 721 was never true

722 result[np.where(np.isinf(result))] = 0

723

724 # If the result is an array containing 1 result, then return the result, not an array

725 if np.size(result) == 1:

726 return result[0]

727 return result

728

729

730def array_match(

731 array_1: NDArray[np.integer | np.floating | np.complexfloating],

732 value_match: NDArray[np.integer | np.floating | np.complexfloating],

733 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None = None,

734) -> NDArray[np.int64]:

735 """

736 Find the indices of the input array which match the array of values provided.

737 If a second input array is provided, then find the indices where the values

738 provided match in both input arrays. This matching can only be done between

739 integer values.

740

741 Parameters

742 ----------

743 array_1: NDArray[np.integer | np.floating | np.complexfloating]

744 The input array to match the values in

745 value_match: NDArray[np.integer | np.floating | np.complexfloating]

746 The values to find in the input array

747 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None, optional

748 A second array to match the values in via a logical AND, by default is None

749

750 Returns

751 -------

752 match_indices: NDArray[np.int64]

753 The indices of the input array(s) which match the values provided

754

755 Raises

756 ------

757 ValueError

758 Gets raised if value_match is None

759 """

760

761 if value_match is None: 761 ↛ 762line 761 didn't jump to line 762 because the condition on line 761 was never true

762 raise ValueError("Value Match Should be a value not None")

763 # If array_2 has been supplied, compare which mins and maxes to use based on two arrays

764 if array_2 is not None and np.size(array_2) > 0:

765 min_use = np.min([np.min(array_1), np.min(array_2)])

766 max_use = np.max([np.max(array_1), np.max(array_2)])

767 # Also compute the histogram for array_2

768 hist2, _, ri2 = histogram(array_2, min=min_use, max=max_use)

769 else:

770 # If the second array wasn't supplied

771 min_use = np.min(array_1)

772 max_use = np.max(array_1)

773 # Supply a second hist

774 hist2 = np.arange(max_use - min_use + 1)

775 # Get the histogram for the first

776 hist1, _, ri1 = histogram(array_1, min=min_use, max=max_use)

777 # Arrays should be the same size, does addition

778 hist_combined = hist1 + hist2

779 bins = np.where(hist_combined > 0)

780

781 # Select the values to be used

782 hist_v1, bins_v1, _ = histogram(bins + min_use)

783 omin = bins_v1[0]

784 omax = bins_v1[-1]

785 hist_v2, _, _ = histogram(value_match, min=omin, max=omax)

786 vals = np.nonzero(np.bitwise_and(hist_v1, hist_v2))[0] + omin - min_use

787 n_match = vals.size

788

789 if n_match == 0:

790 return -1

791

792 ind_arr = np.zeros_like(array_1)

793 for vi in range(n_match):

794 i = vals[vi]

795 if hist1[i] > 0: 795 ↛ 797line 795 didn't jump to line 797 because the condition on line 795 was always true

796 ind_arr[ri1[ri1[i] : ri1[i + 1]]] = 1

797 if hist2[i] > 0:

798 ind_arr[ri2[ri2[i] : ri2[i + 1]]] = 1

799

800 match_indices = np.nonzero(ind_arr)[0]

801 # Return our matching indices

802 return match_indices

803

804

805def meshgrid(

806 dimension: int, elements: int, axis: int | None = None, return_integer: bool = False

807) -> NDArray[np.int64 | np.float64]:

808 """

809 Generates a 2D array of X or Y values. Could be replaced by a another function

810

811 Parameters

812 ----------

813 dimension: int

814 Sets the column size of the array to return

815 elements: int

816 Sets the row size of the array to return

817 axis = int | None, optional

818 Set axis = 1 for X values, set axis = 2 for Y values, by default is None

819 return integer: bool, optional

820 dtype is implied by dimension and/or elements

821 by default. If True, sets return array to int, by default is False

822

823 Returns

824 -------

825 result: NDArray[np.int64 | np.float64]

826 A numpy array of shape (elements, dimension) that is

827 a modified np.arange(elements * dimension).

828 """

829 if axis is None:

830 axis = elements

831 elements = dimension

832 # If elements is set as 2

833 if axis == 2:

834 # Replicate LINDGEN by doing arange and resizing, result is floored

835 result = np.reshape(

836 np.floor(np.arange(elements * dimension) / dimension), (elements, dimension)

837 )

838 else:

839 # Replicate LINDGEN by doing arange and resizing

840 result = np.reshape(

841 np.arange(elements * dimension) % dimension, (elements, dimension)

842 )

843 # If we need to return a float, then set the result and return

844 if return_integer:

845 return result.astype("int")

846 # Otherwise return as is

847 else:

848 return result

849

850

851def deriv_coefficients(n: int, divide_factorial: bool = False) -> NDArray[np.float64]:

852 """

853 Computes an array of coefficients resulting in taking the

854 n-th derivative of a function of the form x^a (a must not

855 be a positive integer less than n)

856

857 Parameters

858 ----------

859 n: int

860 Decides the length of coefficients

861 divide_factorial: bool, optional

862 Determine if we need to divide by the factorial, by default is False

863

864 Returns

865 -------

866 coeff: NDArray[np.float64]

867 An array of coefficients

868 """

869 if n <= 0:

870 return 0

871 # Set up the array

872 coeff = np.zeros(n)

873 # Set the first coefficient to 1

874 coeff[0] = 1

875 # For every coefficient

876 for m in range(1, n):

877 # Had to do m + 1 for the range as IDL coeff[1:1] == coeff[1], but Python coeff[1:1] == array([])

878 coeff[1 : m + 1] += -m * coeff[0:m]

879 # If we are to divide by the factorial do that to each coefficient

880 if divide_factorial:

881 for m in range(n):

882 coeff[m] /= factorial(m + 1)

883

884 # Return coefficients

885 return coeff

886

887

888def idl_argunique(

889 arr: NDArray[np.integer | np.floating | np.complexfloating],

890) -> NDArray[np.int64]:

891 """

892 In IDL the UNIQ function returns the indexes of the unique values within

893 an array (that is assumed to be sorted beforehand). In NumPy they use the first

894 unique index when using return index, where as in IDL they use the last index.

895

896 Parameters

897 ----------

898 arr : NDArray[np.integer | np.floating | np.complexfloating]

899 A sorted numpy array of any type.

900

901 Returns

902 -------

903 NDArray[np.int64]

904 An array containing indexes of the last occurence of the unique value in arr

905

906 Examples

907 --------

908 >>> test = np.array([10, 30, 5, 100, 200, 75, 200, 100, 30])

909 >>> idl_argunique(test)

910 array([0, 1, 3, 4, 6, 8])

911 """

912 return np.searchsorted(arr, np.unique(arr), side="right") - 1

913

914

915def angle_difference(

916 ra1: float,

917 dec1: float,

918 ra2: float,

919 dec2: float,

920 degree: bool = False,

921 nearest: bool = False,

922) -> float:

923 """

924 Calculates the angle difference between two given celestial coordinates.

925

926 Parameters

927 ----------

928 ra1 : float

929 Right Ascension of a coordinate 1 in radians or degrees

930 dec1 : float

931 Declination of a coordinate 1 in radians or degrees

932 ra2 : float

933 Right Ascension of a coordinate 2 in radians or degrees

934 dec2 : float

935 Declination of a coordinate 2 in radians or degrees

936 degree : bool, optional

937 If True, then we treate the coordinates given as degrees, by default False

938 nearest : bool, optional

939 Calculate implied angle instead, by default False

940

941 Returns

942 -------

943 relative_angle : float

944 The angle difference in degrees

945 """

946

947 if degree: 947 ↛ 950line 947 didn't jump to line 950 because the condition on line 947 was always true

948 unit = u.deg

949 else:

950 unit = u.rad

951 coord1 = SkyCoord(ra=ra1 * unit, dec=dec1 * unit)

952 coord2 = SkyCoord(ra=ra2 * unit, dec=dec2 * unit)

953 # Use built in astropy separtion instead of calculating it

954 relative_angle = coord1.separation(coord2).value

955 if nearest:

956 return max(relative_angle, 2 * pi - relative_angle)

957 else:

958 return relative_angle

959

960

961def parallactic_angle(latitude: float, hour_angle: float, dec: float) -> float:

962 """

963 Calculates the parallactic angle given latitude (usually a declination), hour_angle and another declination

964

965 Parameters

966 ----------

967 latitude : float

968 An angle in degrees, usually a declination in this package

969 hour_angle : float

970 The hour angle in degrees

971 dec : float

972 A declination in degrees

973

974 Returns

975 -------

976 parallactic_angle : float

977 The angle between the great circle through a celestial object and the zenith, and the hour circle of the object

978 """

979

980 y_term = np.sin(np.radians(hour_angle))

981 x_term = np.cos(np.radians(dec)) * np.tan(np.radians(latitude)) - np.sin(

982 np.radians(dec)

983 ) * np.cos(np.radians(hour_angle))

984 return np.degrees(np.arctan(y_term / x_term))

985

986

987def simple_deproject_w_term(

988 obs: dict,

989 params: dict,

990 vis_arr: NDArray[np.complex128],

991 direction: float,

992 logger: Logger,

993) -> NDArray[np.complex128]:

994 """

995 Applies a w-term deprojection to the visibility array

996

997 Parameters

998 ----------

999 obs : dict

1000 The observation data structure

1001 params : dict

1002 The data from the UVFITS file

1003 vis_arr : NDArray[np.complex128]

1004 The visibility array

1005 direction : float

1006 The direction we apply to the phase

1007

1008 Returns

1009 -------

1010 vis_arr : NDArray[np.complex128]

1011 The visibility array with the deprojection applied

1012 """

1013

1014 icomp = 1j

1015 zcen = np.outer(obs["baseline_info"]["freq"], params["ww"])

1016 sign = 1 if direction > 0 else -1

1017 phase = np.exp(direction * icomp * zcen)

1018 for pol_i in range(obs["n_pol"]):

1019 vis_arr[pol_i, :, :] *= phase

1020

1021 sign_str = " +1" if sign > 0 else " -1"

1022 logger.info(f"Applying simple w-term deprojection:{sign_str}")

1023

1024 return vis_arr

1025

1026

1027def resistant_mean(

1028 array: NDArray[np.integer | np.floating | np.complexfloating],

1029 deviations: int,

1030 mad_scale: float = 0.67449999999999999,

1031 sigma_coeff: NDArray[np.float64] = np.array(

1032 [

1033 0.020142000000000000,

1034 -0.23583999999999999,

1035 0.90722999999999998,

1036 -0.15404999999999999,

1037 ]

1038 ),

1039) -> int | float | complex | np.number:

1040 """

1041 The resistant_mean function translate the IDLAstro function resistant_mean from IDL to Python using NumPy.

1042 The values mad_scale and sigma_coeff are also retrieved from the same IDLAstro function when running in Double

1043 Precision Mode.

1044

1045 The resistant_mean gets the mean of an array which has had a median absolute deviation threshold applied to the

1046 absolute deviations of the array to exclude outliers.

1047

1048 If resistant_mean needs to be optimized, it can be vectorized easily enough

1049

1050 Parameters

1051 ----------

1052 array : NDArray[np.integer | np.floating | np.complexfloating]

1053 A 1 dimensional array of values, multidimensional arrays should be flattened before use

1054 deviations : int

1055 The number of median absolute deviations from the median we want use to exclude outliers

1056 mad_scale : float, optional

1057 The scale factor for the median absolute deviation, by default 0.67449999999999999

1058 sigma_coeff : NDArray[np.float64], optional

1059 The coefficients applied to the polynomial equation to the standard deviation of the points excluded by the outliers for additional exclusion, by default np.array([0.020142000000000000, -0.23583999999999999 , 0.90722999999999998 , -0.15404999999999999])

1060

1061 Returns

1062 -------

1063 resistant_mean : Number

1064 The mean of the array with outliers excluded using median absolute deviation

1065

1066 References

1067 ----------

1068 .. IDLAstro, RESISTANT_Mean, https://idlastro.gsfc.nasa.gov/ftp/pro/robust/resistant_mean.pro

1069 """

1070 # Calculate median of the real part of the array

1071 median = np.median(array.real)

1072 # Get the absolute deviation (residuals)

1073 abs_dev = np.abs(array - median)

1074 # Calculate Median Absolute Deviation

1075 # I could have used scipy's median_abs_deviation to get this, but by doing this manually I can guarantee the same behaviour as IDL

1076 mad = np.median(abs_dev) / mad_scale

1077 # Use MAD and the number of deviations

1078 mad_threshold = deviations * mad

1079 # Subset the array by the deviations and residuals

1080 no_outliers = array[np.where(abs_dev <= mad_threshold)]

1081 # If the deviations is less than 4.5, change the sigma (standard deviation of the subarray) by using a polyval with set sigma coefficient

1082 # This compensates Sigma for truncation

1083 # Calculate the standard deviation of the rela and imag separately

1084 sigma = np.std(no_outliers.real) + np.std(no_outliers.imag) * 1j

1085 # Set the deviationsX to 1.0 if it's less than 1

1086 deviationsX = max(deviations, 1.0)

1087 if deviationsX <= 4.5: 1087 ↛ 1089line 1087 didn't jump to line 1089 because the condition on line 1087 was always true

1088 sigma = sigma / np.polyval(sigma_coeff, deviationsX)

1089 sigma_threshold = sigma * deviations

1090 # Use the sigma threshold to again remove outliers from the array

1091 # Also take the absolute value of sigma_threshold to get the same behaviour as LE in IDL

1092 subarray = array[np.where(abs_dev <= np.abs(sigma_threshold))]

1093 # Get the mean of the subset array which contains no outliers

1094 return np.mean(subarray)

1095

1096

1097def run_command(cmd: str, dry_run=False):

1098 """

1099 Runs the command string `cmd` using `subprocess.run`. Returns any text output to stdout

1100

1101 Parameters

1102 ----------

1103 cmd : str

1104 The command to run on the command line

1105 dry_run : bool

1106 If True, don't actually run the command. Defaults to False (so defaults to running the command)

1107 """

1108

1109 if dry_run:

1110 stdout = "This was a dry run, not launching IDL code\n"

1111 else:

1112 stdout = subprocess.run(cmd.split(), stdout=subprocess.PIPE, text=True).stdout

1113

1114 return stdout

1115

1116

1117def vis_weights_update(

1118 vis_weights: NDArray[np.float64], obs: dict, psf: dict | h5py.File, params: dict

1119) -> tuple[NDArray[np.float64], dict]:

1120 """

1121 Update the visibility weights array to match any updates to the observation

1122 metadata dictionary, including flagged times, frequencies, tiles, and

1123 min/max baseline length.

1124

1125 Parameters

1126 ----------

1127 vis_weights : NDArray[np.float64]

1128 Visibility weights array.

1129 obs : dict

1130 Observation metadata dictionary

1131 psf: dict | h5py.File

1132 Beam dictionary

1133 params : dict

1134 Visibility metadata dictionary

1135

1136 Returns

1137 -------

1138 vis_weights : NDArray[np.float64]

1139 Updated vis_weights given the modified observation metadata dictionary

1140 obs : dict

1141 The observation metadata dictionary, now containing the correct summary

1142 statistics of the new flagging

1143 """

1144 kx_arr = params["uu"] / obs["kpix"]

1145 ky_arr = params["vv"] / obs["kpix"]

1146 dist_test = np.sqrt(kx_arr**2 + ky_arr**2) * obs["kpix"]

1147 dist_test = np.outer(obs["baseline_info"]["freq"], dist_test)

1148 flag_dist_i = np.where(

1149 (dist_test < obs["min_baseline"]) | (dist_test > obs["max_baseline"])

1150 )

1151 conj_i = np.where(ky_arr > 0)

1152 if conj_i[0].size > 0: 1152 ↛ 1155line 1152 didn't jump to line 1155 because the condition on line 1152 was always true

1153 kx_arr[conj_i] = -kx_arr[conj_i]

1154 ky_arr[conj_i] = -ky_arr[conj_i]

1155 psf_dim = psf["dim"]

1156 if isinstance(psf, h5py.File): 1156 ↛ 1157line 1156 didn't jump to line 1157 because the condition on line 1156 was never true

1157 psf_dim = psf_dim[0]

1158 xcen = np.outer(obs["baseline_info"]["freq"], kx_arr)

1159 xmin = np.floor(xcen) + obs["dimension"] / 2 - (psf_dim / 2 - 1)

1160 ycen = np.outer(obs["baseline_info"]["freq"], ky_arr)

1161 ymin = np.floor(ycen) + obs["elements"] / 2 - (psf_dim / 2 - 1)

1162

1163 range_test_x_i = np.where(

1164 (xmin <= 0) | ((xmin + psf_dim - 1) >= obs["dimension"] - 1)

1165 )

1166 if range_test_x_i[0].size > 0: 1166 ↛ 1169line 1166 didn't jump to line 1169 because the condition on line 1166 was always true

1167 xmin[range_test_x_i] = -1

1168 ymin[range_test_x_i] = -1

1169 range_test_y_i = np.where(

1170 (ymin <= 0) | ((ymin + psf_dim - 1) >= obs["elements"] - 1)

1171 )

1172 if range_test_y_i[0].size > 0: 1172 ↛ 1175line 1172 didn't jump to line 1175 because the condition on line 1172 was always true

1173 xmin[range_test_y_i] = -1

1174 ymin[range_test_y_i] = -1

1175 del range_test_x_i

1176 del range_test_y_i

1177

1178 if flag_dist_i[0].size > 0: 1178 ↛ 1183line 1178 didn't jump to line 1183 because the condition on line 1178 was always true

1179 xmin[flag_dist_i] = -1

1180 ymin[flag_dist_i] = -1

1181

1182 # If flag_frequencies is false, freq_use should be all 1's anyway, so this shouldn't affect anything

1183 freq_cut_i = np.where(obs["baseline_info"]["freq_use"] == 0)

1184 if freq_cut_i[0].size > 0: 1184 ↛ 1186line 1184 didn't jump to line 1186 because the condition on line 1184 was always true

1185 vis_weights[0 : obs["n_pol"], freq_cut_i[0], :] = 0

1186 tile_cut_i = np.where(obs["baseline_info"]["tile_use"] == 0)

1187 if tile_cut_i[0].size > 0:

1188 bi_cut = array_match(

1189 obs["baseline_info"]["tile_a"],

1190 tile_cut_i[0] + 1,

1191 obs["baseline_info"]["tile_b"],

1192 )

1193 if np.size(bi_cut) > 0: 1193 ↛ 1196line 1193 didn't jump to line 1196 because the condition on line 1193 was always true

1194 vis_weights[0 : obs["n_pol"], :, bi_cut] = 0

1195

1196 time_cut_i = np.where(obs["baseline_info"]["time_use"] == 0)[0]

1197 bin_offset = np.append(obs["baseline_info"]["bin_offset"], kx_arr.size)

1198 time_bin = np.zeros(kx_arr.size)

1199 for ti in range(obs["baseline_info"]["time_use"].size):

1200 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti

1201 for ti in range(time_cut_i.size): 1201 ↛ 1202line 1201 didn't jump to line 1202 because the loop on line 1201 never started

1202 ti_cut = np.where(time_bin == time_cut_i[ti])

1203 if ti_cut[0].size > 0:

1204 vis_weights[0 : obs["n_pol"], :, ti_cut] = 0

1205

1206 flag_i = np.where(vis_weights[0] <= 0)

1207 flag_i_new = np.where(xmin < 0)

1208 if flag_i_new[0].size > 0: 1208 ↛ 1210line 1208 didn't jump to line 1210 because the condition on line 1208 was always true

1209 vis_weights[0 : obs["n_pol"], flag_i_new[0], flag_i_new[1]] = 0

1210 if flag_i[0].size > 0: 1210 ↛ 1214line 1210 didn't jump to line 1214 because the condition on line 1210 was always true

1211 xmin[flag_i] = -1

1212 ymin[flag_i] = -1

1213

1214 if min(np.max(xmin), np.max(ymin)) < 0: 1214 ↛ 1215line 1214 didn't jump to line 1215 because the condition on line 1214 was never true

1215 obs["n_vis"] = 0

1216 return vis_weights, obs

1217

1218 bin_n, _, _ = histogram(xmin + ymin * obs["dimension"], min=0)

1219 obs["n_vis"] = np.sum(bin_n)

1220

1221 obs["n_time_flag"] = np.sum(1 - obs["baseline_info"]["time_use"])

1222 obs["n_tile_flag"] = np.sum(1 - obs["baseline_info"]["tile_use"])

1223 obs["n_freq_flag"] = np.sum(1 - obs["baseline_info"]["freq_use"])

1224

1225 return vis_weights, obs

1226

1227

1228def split_vis_weights(

1229 obs: dict, vis_weights: NDArray[np.float64]

1230) -> tuple[NDArray[np.float64], NDArray[np.int64]]:

1231 """

1232 Separate the indices in the visibility array (data/model/res/weights) into

1233 interleaved time samples, generally called "even" and "odd" depending on

1234 whether the time index is even or odd. Interleaved data can be used to generate

1235 cross power spectra and propagate uncertainties, see eq 2 of Jacobs et al. 2016.

1236 Ensures that the same flagging is applied to both sets.

1237

1238 Parameters

1239 ----------

1240 obs : dict

1241 Observation metadata dictionary

1242 vis_weights : NDArray[np.float64]

1243 Visibility weights array

1244

1245 Returns

1246 -------

1247 vis_weights : NDArray[np.float64]

1248 Full visibility weights array with the same flagging applied

1249 to both interleaved sets.

1250 bi_use NDArray[np.int64]

1251 Baseline index arrays for interleaved time samples, separated

1252 by "even" and "odd" indices.

1253 """

1254 # Not always used in vis_noise_calc requires this check in other cases

1255 if obs["n_time"] < 2: 1255 ↛ 1256line 1255 didn't jump to line 1256 because the condition on line 1255 was never true

1256 return vis_weights

1257

1258 # Get the number of baselines, should be the last in this case

1259 nb = vis_weights.shape[-1]

1260 bin_end = np.zeros(obs["n_time"], dtype=np.int64)

1261 bin_end[: obs["n_time"] - 1] = (

1262 obs["baseline_info"]["bin_offset"][1 : obs["n_time"]] - 1

1263 )

1264 bin_end[-1] = int(nb - 1)

1265 bin_i = np.full(nb, -1, dtype=np.int64)

1266 for t_i in range(obs["n_time"] // 2):

1267 bin_i[obs["baseline_info"]["bin_offset"][t_i] : bin_end[t_i] + 1] = t_i

1268

1269 time_start_i = int(np.min(np.nonzero(obs["baseline_info"]["time_use"])[0]))

1270 nt3 = int(np.floor((obs["n_time"] - time_start_i) / 2) * 2)

1271 time_use_0 = obs["baseline_info"]["time_use"][time_start_i : time_start_i + nt3 : 2]

1272 time_use_1 = obs["baseline_info"]["time_use"][

1273 time_start_i + 1 : time_start_i + nt3 : 2

1274 ]

1275 time_use_01 = time_use_0 * time_use_1

1276 time_use = np.zeros(obs["baseline_info"]["time_use"].shape, dtype=np.int64)

1277 time_use[time_start_i : time_start_i + nt3 : 2] = time_use_01

1278 time_use[time_start_i + 1 : time_start_i + nt3 : 2] = time_use_01

1279 time_cut_i = np.where(time_use <= 0)[0]

1280 if time_cut_i.size > 0: 1280 ↛ 1281line 1280 didn't jump to line 1281 because the condition on line 1280 was never true

1281 for cut_i in range(time_cut_i.size):

1282 bin_i_cut = np.where(bin_i == time_cut_i[cut_i])[0]

1283 if bin_i_cut.size > 0:

1284 bin_i[bin_i_cut] = -1

1285

1286 bi_use = [np.where(bin_i % 2 == 0)[0], np.where(bin_i % 2 == 1)[0]]

1287

1288 # Here we ensure that both even and odd samples are the same size by

1289 # ensuring both arrays match the smallest size

1290 if bi_use[0].size < bi_use[1].size: 1290 ↛ 1291line 1290 didn't jump to line 1291 because the condition on line 1290 was never true

1291 bi_use[1] = bi_use[1][0 : bi_use[0].size]

1292 elif bi_use[1].size < bi_use[0].size: 1292 ↛ 1293line 1292 didn't jump to line 1293 because the condition on line 1292 was never true

1293 bi_use[0] = bi_use[0][0 : bi_use[1].size]

1294

1295 for pol_i in range(obs["n_pol"]):

1296 # In IDL they are doing x > y < w < z

1297 flag_use = np.minimum(

1298 np.maximum(vis_weights[pol_i, :, bi_use[0]], 0),

1299 np.minimum(vis_weights[pol_i, :, bi_use[1]], 1),

1300 )

1301 # Reset vis_weights

1302 vis_weights[pol_i] = 0

1303 # No keywords used for odd_only or even_only in entirety of FHD

1304 vis_weights[pol_i, :, bi_use[0]] = flag_use

1305 vis_weights[pol_i, :, bi_use[1]] = flag_use

1306

1307 return vis_weights, bi_use

1308

1309

1310def vis_noise_calc(

1311 obs: dict,

1312 vis_arr: NDArray[np.complex128],

1313 vis_weights: NDArray[np.float64],

1314 bi_use: NDArray[np.int64] | None = None,

1315) -> NDArray[np.float64]:

1316 """

1317 Calculate the noise from the calibrated visibilities by taking

1318 the difference between the imaginary parts of the interleaved

1319 time samples. A factor of sqrt(2) is required because there is

1320 half as many independent noise samples when calculated from just

1321 the imaginary part.

1322

1323 Parameters

1324 ----------

1325 obs : dict

1326 The observation metadata dictionary

1327 vis_arr : NDArray[np.complex128]

1328 The vsisibility array

1329 vis_weights : NDArray[np.float64]

1330 The visibility weights array

1331 bi_use : NDArray[np.int64] | None, optional

1332 Baseline index arrays for interleaved time samples, separated

1333 by "even" and "odd" indices, by default None

1334

1335 Returns

1336 -------

1337 noise_arr: NDArray[np.float64]

1338 Calculated variance of the noise as a function of polarisation

1339 and frequency.

1340 """

1341 noise_arr = np.zeros([obs["n_pol"], obs["n_freq"]])

1342

1343 if obs["n_time"] < 2: 1343 ↛ 1344line 1343 didn't jump to line 1344 because the condition on line 1343 was never true

1344 return noise_arr

1345

1346 if bi_use is None: 1346 ↛ 1349line 1346 didn't jump to line 1349 because the condition on line 1346 was always true

1347 vis_weights_use, bi_use = split_vis_weights(obs, vis_weights)

1348 else:

1349 vis_weights_use = vis_weights

1350

1351 for pol_i in range(obs["n_pol"]):

1352 data_diff = (

1353 vis_arr[pol_i, :, bi_use[0]].imag - vis_arr[pol_i, :, bi_use[1]].imag

1354 )

1355 vis_weight_diff = np.maximum(

1356 vis_weights_use[pol_i, :, bi_use[0]], 0

1357 ) * np.maximum(vis_weights_use[pol_i, :, bi_use[1]], 0)

1358 for fi in range(obs["n_freq"]):

1359 ind_use = np.where(vis_weight_diff[:, fi])[0]

1360 if ind_use.size > 0:

1361 noise_arr[pol_i, fi] = np.std(data_diff[ind_use, fi]) / np.sqrt(2)

1362

1363 return noise_arr

1364

1365

1366def idl_median(

1367 x: NDArray[np.integer | np.floating | np.complexfloating],

1368 width: int = 0,

1369 even: bool = False,

1370) -> float:

1371 """

1372 The IDL Median function doesn't always work as you'd expect, as generally you need to use the

1373 EVEN keyword to get the median of an even number of elements, otherwise it returns the

1374 maximum element of the two numbers in the middle of an even sorted array.

1375 This function replicates the IDL median function, in case you need that functionality.

1376 Typically though, we recommend using `numpy.median` or `scipy.ndimage.median_filter`.

1377

1378 Parameters

1379 ----------

1380 x : NDArray[np.integer | np.floating | np.complexfloating]

1381 Data to perform median on

1382 width : int

1383 If set, perform a type of median filtering.

1384 even : bool, optional

1385 In this case it will just run numpy.median, by default False

1386

1387

1388 When `width` is set. unfortunately the edge conditions when using cannot be

1389 replicated soley with `scipy.ndimage.median_filter` so use `median_filter` and set the edge cases manually

1390

1391 Returns

1392 -------

1393 float

1394 The median of the array

1395

1396 See Also

1397 --------

1398 numpy.median: Computes the median of an array

1399 scipy.ndimage.median_filter: Applies a median filter to an array

1400 """

1401

1402 if width:

1403 # IDL median leaves everything within width//2 pixels of the edge alone

1404 # So just shove the outputs of median_filter everywhere else. None of

1405 # the `modes` in median_filter capture this behaviour

1406 output = deepcopy(x)

1407

1408 hw = width // 2

1409 output[hw:-hw] = median_filter(x, size=width)[hw:-hw]

1410

1411 return output

1412

1413 else:

1414 if even: 1414 ↛ 1415line 1414 didn't jump to line 1415 because the condition on line 1414 was never true

1415 return np.median(x)

1416 else:

1417 med_index = int(np.ceil(len(x) / 2))

1418

1419 return np.sort(x)[med_index]

1420

1421

1422def reshape_and_average_in_time(

1423 vis_array: NDArray[np.complex128],

1424 n_freq: int,

1425 n_time: int,

1426 n_baselines: int,

1427 vis_weights: NDArray[np.float64],

1428) -> NDArray[np.complex128]:

1429 """Given a single polarisation 2D `vis_array` of shape (n_freq, n_time*n_baselines),

1430 reshape into (n_freq, n_time, n_baselines), and then average in time, weighting

1431 by `vis_weights` (must be of shape (n_freq, n_time, n_baselines))

1432 Returns the averaged array in shape (n_freq, n_baselines)

1433

1434 Parameters

1435 ----------

1436 vis_array : NDArray[np.complex128]

1437 The visibility array

1438 n_freq : int

1439 Number of frequencies

1440 n_time : int

1441 Number of time steps

1442 n_baselines : int

1443 Number of baselines

1444 vis_weights : NDArray[np.float64]

1445 The visibility weights array

1446

1447 Returns

1448 -------

1449 reshape_array: NDArray[np.complex128]

1450 The reshaped visibility array the same shape as the visibility weights array

1451 """

1452

1453 new_shape = (n_freq, n_time, n_baselines)

1454

1455 if vis_weights.shape != new_shape: 1455 ↛ 1456line 1455 didn't jump to line 1456 because the condition on line 1455 was never true

1456 exit(

1457 f"Attempting to use weights with shape {vis_weights.shape} in `reshape_and_average_in_time`, this is not allowed"

1458 )

1459

1460 reshape_array = np.reshape(vis_array, new_shape)

1461 reshape_array = np.sum(reshape_array * vis_weights, axis=1)

1462

1463 return reshape_array

1464

1465

1466def region_grow(

1467 image: NDArray[np.integer | np.floating | np.complexfloating],

1468 roiPixels: NDArray[np.integer],

1469 low: int | float | None = None,

1470 high: int | float | None = None,

1471) -> NDArray[np.integer | np.floating | np.complexfloating] | None:

1472 """

1473 Replicates IDL's Region Grow, where a region of interest will grow based upon a given threshold

1474 within a 2D array. It finds all the pixels within the array that are connected neighbors via the

1475 threshold and blob detection using SciPy's `label` function. In this case, the standard deviation

1476 form of this function hasn't been implemented as PyFHD will only use this function once with a threshold.

1477

1478 If you want to use standard deviation region growing adjusting the function can be done by potentially

1479 implementing skimage's blob detection algorithms for the labelling and keeping the rest the same.

1480

1481 Parameters

1482 ----------

1483 image : NDArray[np.integer | np.floating | np.complexfloating]

1484 A 2D array of pixels

1485 roiPixels : NDArray[np.integer]

1486 The region of interest given as FLAT indexes i.e. array.flat

1487 low : int | float | None, optional

1488 The low threshold, any number below this is considered background,

1489 If left as None, this will be the lowest value of the region of interest, by default None

1490 high : int | float | None, optional

1491 The high threshold, any number higher than this is considered background,

1492 If left as None, this will be the highest value of the region of interest, by default None

1493

1494 Returns

1495 -------

1496 growROIPixels: NDArray[np.integer | np.floating | np.complexfloating] | None

1497 The grown region of interest that has connected neighbours by using the threshold

1498

1499 See Also

1500 --------

1501 scipy.ndimage.label: Labels an image based off a given kernel

1502

1503 Notes

1504 -----

1505 'scikit-image Blob Detection' : https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_blob.html

1506 """

1507 # Get the roi and set the low and high thresholds if they haven't been so already.

1508 roi = image.flat[roiPixels]

1509 if low is None: 1509 ↛ 1510line 1509 didn't jump to line 1510 because the condition on line 1509 was never true

1510 low = np.min(roi)

1511 if high is None: 1511 ↛ 1512line 1511 didn't jump to line 1512 because the condition on line 1511 was never true

1512 high = np.max(roi)

1513 # Replace all NaNs with Zeros

1514 nans = np.isnan(image)

1515 image[nans] = 0

1516 # Get all the values that are within the threshold

1517 threshArray = np.zeros_like(image)

1518 threshArray[np.where((image >= low) & (image <= high))] = 1

1519 threshArray[nans] = 0

1520 # Do binary blob detection with the label function

1521 labelArray, _ = label(threshArray)

1522 # Make the edges background as the Region grow IDL does not do the edges

1523 # of an array

1524 if labelArray.ndim == 2:

1525 labelArray[0, :] = 0

1526 labelArray[:, 0] = 0

1527 labelArray[-1, :] = 0

1528 labelArray[:, -1] = 0

1529 else:

1530 labelArray[0] = 0

1531 labelArray[-1] = 0

1532 # Get the histogram of the labels to ascertain the neighbours we will be interested in

1533 if np.size(roiPixels) > 1:

1534 labels, _, _ = histogram(labelArray.flat[roiPixels], min=0)

1535 labels = np.nonzero(labels != 0)[0]

1536 nLabels = labels.size

1537 else:

1538 nLabels = 1

1539 labels = labelArray.flat[roiPixels]

1540 if not isinstance(labels, np.ndarray):

1541 labels = np.array([labels], dtype=labelArray.dtype)

1542 # Ignore the first label if it's 0 as it's the background

1543 if labels[0] == 0:

1544 nLabels -= 1

1545 if nLabels > 0: 1545 ↛ 1548line 1545 didn't jump to line 1548 because the condition on line 1545 was always true

1546 labels = labels[1:]

1547 # The histogram will have a minimum of 1 so we need to take 1 off the labels

1548 if nLabels: 1548 ↛ 1551line 1548 didn't jump to line 1551 because the condition on line 1548 was always true

1549 labels -= 1

1550 # Get a histogram of all the labels

1551 labelHist, _, revInd = histogram(labelArray, min=1)

1552 # Get the number of pixels we will be growing to

1553 nPixels = np.sum(labelHist[labels]) if nLabels else 0

1554 # If we have any pixels to grow, then grow

1555 if nPixels > 0: 1555 ↛ 1571line 1555 didn't jump to line 1571 because the condition on line 1555 was always true

1556 # Only one label

1557 if nLabels == 1: 1557 ↛ 1561line 1557 didn't jump to line 1561 because the condition on line 1557 was always true

1558 growROIPixels = revInd[revInd[labels[0]] : revInd[labels[0] + 1]]

1559 else:

1560 # Take in all the labels and save all the flat indexes

1561 growROIPixels = np.empty(nPixels, dtype=np.int64)

1562 j = 0

1563 for i in range(nLabels):

1564 if revInd[labels[i] + 1] <= revInd.size:

1565 growROIPixels[j : j + labelHist[labels[i]]] = revInd[

1566 revInd[labels[i]] : revInd[labels[i] + 1]

1567 ]

1568 j = j + labelHist[labels[i]]

1569 else:

1570 # Return None if we didn't have anywhere to grow

1571 growROIPixels = None

1572 # Return the flat indexes

1573 return growROIPixels

1574

1575

1576def crosspol_split_real_imaginary(

1577 image: NDArray[np.complex128], pol_names: list[str] | None = None

1578) -> tuple[NDArray[np.complex128], list[str] | None]:

1579 """

1580 Reformat the input full polarisation image, containing PP, PQ, QP, and QQ,

1581 into PP, real(PQ), imaginary(PQ), and QQ. PQ and QP (i.e. XY and YX)

1582 are complex and conjugate mirrors of one another. To make a understandable

1583 image, we can plot the real and imaginary parts of PQ separately without

1584 loss of information.

1585

1586 Parameters

1587 ----------

1588 image : NDArray[np.complex128]

1589 Image array ordered in polarisation by PP, PQ, QP, and QQ.

1590 pol_names : list[str] | None, optional

1591 The name of the polarisations, by default None

1592

1593 Returns

1594 -------

1595 image : NDArray[np.complex128]

1596 Image array ordered in polarisation by PP, real(PQ), imaginary(PQ),

1597 and QQ

1598 pol_names : list[str]|None]

1599 New polarisation name array to reflect real(PQ) and imaginary(PQ)

1600 """

1601 crosspol_image: NDArray[np.complex128] = image[2]

1602 image[2] = crosspol_image.real

1603 image[3] = crosspol_image.imag

1604

1605 if pol_names is not None:

1606 crosspol_name: str = pol_names[2]

1607 pol_names[2] = f"{crosspol_name}_real"

1608 pol_names[3] = f"{crosspol_name}_imag"

1609

1610 return image, pol_names

Coverage for PyFHD/pyfhd_tools/pyfhd_utils.py: 78%

475 statements