Coverage for PyFHD/pyfhd_tools/pyfhd_utils.py: 78%
475 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
1import subprocess
2from copy import deepcopy
3from logging import Logger
4from math import factorial, pi
5from sys import exit
6import h5py
7import numpy as np
8from astropy import units as u
9from astropy.coordinates import SkyCoord
10from numba import njit
11from numpy.typing import ArrayLike, NDArray
12from scipy.ndimage import label, median_filter
15@njit
16def get_bins(
17 min: int | float, max: int | float, bin_size: int
18) -> NDArray[np.float64 | np.int64]:
19 """
20 Calculates the bins for the histogram and reverse indices based on a
21 minimum and maximum value, plus a given bin size. It mirrors IDL's way of
22 calculating the bin edges. In IDL bins seem to be right hand side of the bin open
23 like IDL, even at the end. However, in comparison to NumPy, the last bin is always
24 the maximum value. This does utilize numba and parallelization to make it faster.
26 Parameters
27 ----------
28 min : int | float
29 The minimum chosen for the histogram
30 max : int | float
31 The maximum chosen for the histogram
32 bin_size : int
33 The bin size chosen for the histogram. This histogram always uses bins
34 of equal widths.
36 Returns
37 -------
38 bins: NDArray[np.float64 | np.int64]
39 A NumPy array of all the bins ranging from min (bin[0]) to the max (bin[-1]). The step
40 is bin_size.
42 See Also
43 --------
44 histogram: Calculates the bins, histogram and reverse indices
45 get_hist: Calculates histogram only
46 get_ri: Calculates the reverse indices only
47 """
49 return np.arange(min, max + bin_size, bin_size)
52@njit
53def get_hist(
54 data: NDArray[np.floating | np.integer | np.complexfloating],
55 bins: NDArray[np.float64 | np.int64],
56 min: int | float,
57 max: int | float,
58) -> NDArray[np.int64]:
59 """
60 Calculates the histogram based on the given bins and data, taking into account
61 the minimum and maximum
63 Parameters
64 ----------
65 data : NDArray[np.floating | np.integer | np.complexfloating]
66 A NumPy array that is of one dtype float, int, or complex. Cannot be object
67 bins : NDArray[np.float64 | np.int64]
68 A NumPy array of bins for the histogram
69 min : int | float
70 The minimum set for the data
71 max : int | float
72 The maximum set for the data
74 Returns
75 -------
76 hist: NDArray[np.int64]
77 A histogram of the data using the bins found in the bins array.
79 See Also
80 --------
81 histogram: Calculates the bins, histogram and reverse indices.
82 get_bins: Calculates the bins only
83 get_ri: Calculates the reverse indices only
84 """
86 bin_l = bins.size
87 # Setup the histogram
88 hist = np.zeros(bin_l, dtype=np.int64)
89 # Setup the things required for the indexing
90 n = bin_l - 1
91 bin_min = bins[0]
92 bin_max = bins[-1]
93 # In the case that the bin min and bin max is the same,
94 # then finding the index doesn't matter so set the divider
95 # to 1 (as there is only 1 bin!)
96 if bin_min == bin_max:
97 bin_divider = 1
98 else:
99 bin_divider = bin_max - bin_min
100 # Now loop through the data
101 for idx in range(data.size):
102 # Check if its inside the range we set
103 if data[idx] < min or data[idx] > max:
104 continue
105 # Calculate the index for indices and histogram
106 bin_i = int(n * (data[idx] - bin_min) / bin_divider)
107 # Add to the histogram
108 hist[bin_i] += 1
109 return hist
112@njit
113def get_ri(
114 data: NDArray[np.floating | np.integer | np.complexfloating],
115 bins: NDArray[np.float64 | np.int64],
116 hist: NDArray[np.int64],
117 min: int | float,
118 max: int | float,
119) -> NDArray[np.int64]:
120 """
121 Calculates the reverse indices of a data and histogram.
122 The function replicates IDL's REVERSE_INDICES keyword within
123 their HISTOGRAM function. The reverse indices array which is returned
124 by this function can be hard to understand at first, I will explain it here
125 and also link JD Smith's famous article on IDL's HISTOGRAM.
127 The reverse indices array is two vectors concatenated together. The first vector contains
128 indexes for the second vector, this vector should be the size of bins + 1.
129 The second vector contains indexes from the data itself, and should be the size of data.
130 The justification for having such an array is to quickly make adjustments to certain bins
131 without having to search the array multiple times, thus avoiding multiple O(data.size) loops.
133 The first vector indexes contain the starting positions of each bin in the second vector. For example,
134 between the indexes given by first_vector[0] and first_vector[1] of the second vector should be all the
135 indexes of bins[0] from inside the data. So if I wanted to make adjustments to the entire first bin,
136 and only the first bin I can use the reverse indices array, ri to do this. Let's say I wanted to flag
137 all values of bins[0] with -1 for some reason to make them invalid in other calculations with the data,
138 then I could do this:
140 `data[ri[ri[0] : ri[1]]] = -1`
142 Or more generally
144 `data[ri[ri[i] : ri[i + 1]]] = -1`
146 Where i is 0 <= i <= bins.size.
148 If you wish to gain a better understanding of how this get_ri function works, and the associated
149 histogram function I have created here, please use the link given in the Notes section. This
150 link will take you JD Smith's article on IDL's HISTOGRAM, it is an article which explains the
151 IDL HISTOGRAM function better than IDL's own documentation. If you must gain a deeper understanding,
152 read it once, gasp and get your shocks and many cries of why out of your system, then read it again.
153 And keep reading it till you understand, as per the editor's note on the article:
155 "...If you read it enough, the secrets of the command will be revealed to you. Stranger things have happened"
157 Parameters
158 ----------
159 data : NDArray[np.floating | np.integer | np.complexfloating]
160 A NumPy array of the data
161 bins : NDArray[np.float64 | np.int64]
162 A NumPy array containing the bins for the histogram
163 hist : NDArray[np.int64]
164 A NumPy array containing the histogram
165 min : int | float
166 The minimum for the dataset
167 max : int | float
168 The maximum for the dataset
170 Returns
171 -------
172 ri : NDArray[np.int64]
173 An array containing the reverse indices, which is two vectors, the first vector containing indexes for the second vector.
174 The second vector contains indexes for the data.
176 See Also
177 ---------
178 histogram: Calculates the bins, histogram and reverse indices.
179 get_bins: Calculates the bins only
180 get_hist: Calculates histogram only
182 Notes
183 ------
184 'HISTOGRAM: The Breathless Horror and Disgust' : http://www.idlcoyote.com/tips/histogram_tutorial.html
185 """
187 bin_l = bins.size
188 # Setup the first vector
189 data_idx = bin_l + 1
190 first_v = [bin_l + 1]
191 # Add the bins
192 for bin in hist:
193 data_idx += bin
194 first_v.append(data_idx)
195 # Setup the reverse indices
196 ri = np.zeros(bin_l + 1 + data.size, dtype=np.int64)
197 ri[0 : bin_l + 1] = first_v
198 # Create a tracking array to keep track of where we are in the data indexing
199 tracker = np.array(first_v)
200 # Setup the things required for the indexing
201 n = bin_l - 1
202 bin_min = bins[0]
203 bin_max = bins[-1]
204 if bin_min == bin_max:
205 bin_divider = 1
206 else:
207 bin_divider = bin_max - bin_min
208 # Setup counter to remove elements in case of min or max being set
209 counter = 0
210 for idx in range(data.size):
211 # Check if its inside the range we set
212 if data[idx] < min or data[idx] > max:
213 counter += 1
214 continue
215 # Calculate the index for indices and histogram
216 bin_i = int(n * (data[idx] - bin_min) / bin_divider)
217 # Record the current index and update the tracking array
218 ri[tracker[bin_i]] = idx
219 tracker[bin_i] += 1
220 ri = ri[: ri.size - counter]
221 return ri
224def histogram(
225 data: NDArray[np.floating | np.integer | np.complexfloating],
226 bin_size: int = 1,
227 num_bins: int | None = None,
228 min: int | float | None = None,
229 max: int | float | None = None,
230) -> tuple[NDArray[np.int64], NDArray[np.float64 | np.int64], NDArray[np.int64]]:
231 """
232 The histogram function combines the use of the get_bins, get_hist and get_ri
233 functions into one function. For the descriptions and docs of those functions
234 look in See Also. This function will return the histogram, bin/bin_edges and
235 the reverse indices.
237 Parameters
238 ----------
239 data : NDArray[np.floating | np.integer | np.complexfloating]
240 A NumPy array containing the data we want a histogram of
241 bin_size : int, optional
242 Sets the bin size for this histogram, by default 1
243 num_bins : int | None, optional
244 Set the number of bins this does override bin_size completely, by default None
245 min : int | float | None, optional
246 Set a minimum for the dataset, by default None
247 max : int | float | None, optional
248 Set a maximum for the dataset, by default None
250 Returns
251 -------
252 hist : NDArray[np.int64]
253 The histogram of the data
254 bins : NDArray[np.float64 | np.int64]
255 The bins of the histogram
256 ri : NDArray[np.int64]
257 The reverse indices array for the histogram and data
259 See Also
260 --------
261 get_bins: Calculates the bins only
262 get_hist: Calculates histogram only
263 get_ri: Calculates the reverse indices only
264 """
266 # If the minimum has not been set, set it
267 if min is None:
268 min = np.min(data)
269 # If the maximum has not been set, set it
270 # Check if the max argument was used, set to True, if we set max here by data turn it off.
271 if max is None:
272 max = np.max(data)
273 # If the number of bins has been set use that
274 if num_bins is not None:
275 bin_size = (max - min) / num_bins
276 # Need to add checks if max is below min or max below min
277 if min > max: 277 ↛ 278line 277 didn't jump to line 278 because the condition on line 277 was never true
278 max = min
279 if max < min: 279 ↛ 280line 279 didn't jump to line 280 because the condition on line 279 was never true
280 min = max
281 # IDL uses the bin_size as equal throughout min to max
282 bins = get_bins(min, max, bin_size)
283 # However, if we set a max, we must adjust the last bin to max according to IDL specifications
284 # And we only do this in the case max was by an argument
285 if (bins[-1] > max) or num_bins is not None:
286 bins = bins[:-1]
287 # Flatten the data
288 data_flat = data.flatten()
289 # Get the histogram
290 hist = get_hist(data_flat, bins, min, max)
291 # Get the reverse indices
292 ri = get_ri(data_flat, bins, hist, min, max)
293 # Return
294 return hist, bins, ri
297def l_m_n(
298 obs: dict,
299 psf: dict | h5py.File,
300 obsdec: float | None = None,
301 obsra: float | None = None,
302 declination_arr: NDArray[np.floating] | None = None,
303 right_ascension_arr: NDArray[np.floating] | None = None,
304) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
305 """
306 Calculate the directional cosines l,m,n given an RA, Dec, and phase centre for
307 each pixel in the input arrays given, or from the hyperresolved RA/Dec array
308 created during beam creation.
310 Parameters
311 ----------
312 obs: dict
313 Observation metadata dictionary.
314 psf: dict | h5py.File
315 Beam dictionary.
316 obsdec: float | None, optional
317 The Dec phase centre for the observation, by default None and set to
318 the phase centre saved in the observation metadata dicitonary
319 (obs['obsdec'])
320 obsra: float | None, optional
321 The RA phase centre for the observation, by default None and set to
322 the phase centre saved in the observation metadata dicitonary
323 (obs['obsra'])
324 declination_arr: NDArray[np.floating] | None, optional
325 An array of declinations, by default None and set to the hyperresolved Dec
326 array in psf['image_info']['dec_arr']
327 right_ascension_arr: NDArray[np.floating] | None, optional
328 An array of right ascensions, by default None and set to the hyperresolved RA
329 array in psf['image_info']['ra_arr']
331 Returns
332 -------
333 l_mode : NDArray[np.float64]
334 Directional cosine l array, or the cosine of the vector that contributes
335 to the x-axis
336 m_mode : NDArray[np.float64]
337 Directional cosine m array, or the cosine of the vector that contributes
338 to the y-axis
339 n_tracked : NDArray[np.float64]
340 Directional cosine n array, or the cosine of the vector that contributes
341 to the z-axis, dependent on the phase centre.
342 """
343 # If the variables passed through are None them
344 if obsdec is None:
345 obsdec = obs["obsdec"]
346 if obsra is None:
347 obsra = obs["obsra"]
348 if declination_arr is None:
349 declination_arr = psf["image_info"]["dec_arr"]
350 if right_ascension_arr is None:
351 right_ascension_arr = psf["image_info"]["ra_arr"]
353 # Convert all the degrees given into radians
354 obsdec = np.radians(obsdec)
355 obsra = np.radians(obsra)
356 declination_arr = np.radians(declination_arr)
357 right_ascension_arr = np.radians(right_ascension_arr)
359 # Calculate l mode, m mode and the phase-tracked n mode of pixel centers
360 cdec0 = np.cos(obsdec)
361 sdec0 = np.sin(obsdec)
362 cdec = np.cos(declination_arr)
363 sdec = np.sin(declination_arr)
364 cdra = np.cos(right_ascension_arr - obsra)
365 sdra = np.sin(right_ascension_arr - obsra)
366 l_mode = sdra * cdec
367 m_mode = cdec0 * sdec - cdec * sdec0 * cdra
368 # n=1 at phase center, so reference from there for phase tracking
369 n_tracked = (sdec * sdec0 + cdec * cdec0 * cdra) - 1
371 # find any NaN values
372 nan_vals = np.where(np.isnan(n_tracked))
373 # If any found, replace them with 0's
374 if np.size(nan_vals) > 0: 374 ↛ 380line 374 didn't jump to line 380 because the condition on line 374 was always true
375 n_tracked[nan_vals] = 0
376 l_mode[nan_vals] = 0
377 m_mode[nan_vals] = 0
379 # Return the modes
380 return l_mode, m_mode, n_tracked
383def rebin_columns(
384 a: NDArray[np.integer | np.floating | np.complexfloating],
385 ax: int,
386 shape: tuple,
387 col_sizer: int,
388) -> NDArray[np.integer | np.floating | np.complexfloating]:
389 """
390 Performs expansion on the columns of a 1D or 2D array using interpolation to fill in the values that are created
391 by expanding in the space between existing values. This function assumes the rows have already been expanded
392 to the required number.
394 Parameters
395 ----------
396 a : NDArray[np.integer | np.floating | np.complexfloating]
397 An array to be expanded
398 ax : int
399 The axis we're expanding, almost always set to 1
400 shape : tuple
401 The shape of the original array
402 col_sizer : int
403 The number of columns we're adding in
405 Returns
406 -------
407 rebinned : NDArray[np.integer | np.floating | np.complexfloating]
408 An interpolated array of a containing shape[1] * col_sizer columns
410 See Also
411 --------
412 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation
413 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape
414 """
415 # tile the range of col_sizer
416 tiles = np.tile(np.arange(col_sizer), (shape[0], shape[1] // col_sizer - 1))
417 # Get the differences between the columns
418 differences = np.diff(a, axis=ax) / col_sizer
419 # Multiply this by the tiles
420 inferences_non_pad = np.repeat(differences, col_sizer, axis=ax) * tiles
421 # Pad the zeros for the last two rows, and remove the extra zeros to make inferences same shape as desired shape
422 inferences = np.pad(inferences_non_pad, (0, col_sizer))[:-col_sizer]
423 if np.issubdtype(a.dtype, np.integer):
424 inferences = np.floor(inferences).astype("int")
425 # Now get our final array by adding the repeat of our rows rebinned to the inferences
426 rebinned = inferences + np.repeat(a, col_sizer, axis=ax)
427 return rebinned
430def rebin_rows(
431 a: NDArray[np.integer | np.floating | np.complexfloating],
432 ax: int,
433 shape: tuple,
434 old_shape: tuple,
435 row_sizer: int,
436) -> NDArray[np.integer | np.floating | np.complexfloating]:
437 """
438 Performs expansion on the rows of array `a` to the number of rows in shape[0] using interpolation to fill between any
439 new values added when adding new rows between existing values.
441 Parameters
442 ----------
443 a : NDArray[np.integer | np.floating | np.complexfloating]
444 The array to be rebinned
445 ax : int
446 The axis we're expanding, almost always set to 0
447 shape : tuple
448 The new shape we're expanding to
449 old_shape : tuple
450 The shape of the array a
451 row_sizer : int
452 The number of rows we're expanding by
454 Returns
455 -------
456 row_rebinned : NDArray[np.integer | np.floating | np.complexfloating]
457 The interpolated array with row_sizer extra columns between existing rows
459 See Also
460 --------
461 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation
462 PyFHD.pyfhd_tools.pyfhd_utils.rebin : Expand or Contract an array based on a given shape
463 """
464 # Tile the range of row_sizer
465 tiles = np.tile(
466 np.array_split(np.arange(row_sizer), row_sizer),
467 ((shape[0] - row_sizer) // row_sizer, old_shape[1]),
468 )
469 # Get the differences between values
470 differences = np.diff(a, axis=ax) / row_sizer
471 # Multiply differences array by tiles to get desired bins
472 inferences_non_pad = np.repeat(differences, row_sizer, axis=ax) * tiles
473 # Pad the inferences to get the same shape as above
474 inferences = np.pad(inferences_non_pad, (0, row_sizer))[:, :-row_sizer]
475 if np.issubdtype(a.dtype, np.integer):
476 inferences = np.floor(inferences).astype("int")
477 # Add this to the original array that has been repeated to match the size of inference
478 row_rebinned = inferences + np.repeat(a, row_sizer, axis=ax)
479 return row_rebinned
482def rebin(
483 a: NDArray[np.integer | np.floating | np.complexfloating],
484 shape: ArrayLike,
485 sample: bool = False,
486) -> NDArray[np.integer | np.floating | np.complexfloating]:
487 """
488 Resizes a 2D array by averaging or repeating elements, new dimensions must be integral factors of original dimensions.
490 In the case of expanding an existing array, rebin will interpolate between the original values with a linear function.
491 In the case of compressing an existing array, rebin will average
493 Parameters
494 ----------
495 a : NDArray[np.integer | np.floating | np.complexfloating]
496 Input array.
497 new_shape : ArrayLike
498 Shape of the output array in (rows, columns)
499 Must be a factor or multiple of a.shape
500 sample: bool, optional
501 Use this to get samples using rebin, rather than interpolation, by default False.
503 Returns
504 -------
505 rebinned : NDArray[np.integer | np.floating | np.complexfloating]
506 If the new shape is smaller of the input array, the data are averaged,
507 if the new shape is bigger array elements are repeated and interpolated
509 Examples
510 --------
511 >>> test = np.array([0,10,20,30])
512 >>> rebin(test, (1,8)) # Expand Columns
513 array([ 0, 5, 10, 15, 20, 25, 30, 30])
514 >>> rebin(test, (2,8)) # Expand Rows and Columns
515 array([[ 0, 5, 10, 15, 20, 25, 30, 30],
516 [ 0, 5, 10, 15, 20, 25, 30, 30]])
517 >>> data = np.array([[ -5, 4, 2, -8, 1],
518 [ 3, 0, 5, -5, 1],
519 [ 6, -7, 4, -4, -8],
520 [ -1, -5, -14, 2, 1]])
521 >>> rebin(data, (8,10)) # 2D Array example
522 array([[ -5, -1, 4, 3, 2, -3, -8, -4, 1, 1],
523 [ -1, 0, 2, 2, 3, -2, -7, -3, 1, 1],
524 [ 3, 1, 0, 2, 5, 0, -5, -2, 1, 1],
525 [ 4, 0, -4, 0, 4, -1, -5, -5, -4, -4],
526 [ 6, -1, -7, -2, 4, 0, -4, -6, -8, -8],
527 [ 2, -2, -6, -6, -5, -3, -1, -3, -4, -4],
528 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1],
529 [ -1, -3, -5, -10, -14, -6, 2, 1, 1, 1]])
530 >>> rebin(data, (2,5)) # Compression
531 array([[-1, 2, 3, -6, 1],
532 [ 2, -6, -5, -1, -3]])
533 >>> to_compress = np.array([[3, 9, 7, 0, 1, 5],
534 [7, 7, 1, 9, 7, 3],
535 [9, 2, 2, 3, 1, 1],
536 [0, 3, 5, 0, 4, 3],
537 [5, 7, 7, 1, 9, 1],
538 [7, 2, 1, 1, 3, 0]])
539 >>> rebin(to_compress, (3,3)) # Compression
540 array([[6, 4, 4],
541 [3, 2, 2],
542 [5, 2, 3]])
544 References
545 ----------
546 [1] https://stackoverflow.com/a/8090605
548 See Also
549 --------
550 PyFHD.pyfhd_tools.pyfhd_utils.rebin_rows : Expand the number of rows through interpolation
551 PyFHD.pyfhd_tools.pyfhd_utils.rebin_columns : Expand the number of columns through interpolation
552 """
553 old_shape = a.shape
554 # Prevent more processing than needed if we want the same shape
555 if old_shape == shape:
556 return a
557 if len(old_shape) == 1:
558 old_shape = (1, old_shape[0])
559 # Sample the original array using rebin
560 if sample:
561 # If its a 1D array then...
562 if old_shape[0] == 1:
563 if shape[1] > old_shape[1]:
564 rebinned = np.repeat(a, shape[1] // old_shape[1], axis=0)
565 else:
566 rebinned = a[:: old_shape[1] // shape[1]]
567 # Assume its a 2D array
568 else:
569 # Do the Rows first
570 if shape[0] >= old_shape[0]:
571 # Expand Rows
572 rebinned = np.repeat(a, shape[0] // old_shape[0], axis=0)
573 else:
574 # Compress Rows
575 rebinned = a[:: old_shape[0] // shape[0], :]
576 # Then do the columns
577 if shape[1] >= old_shape[1]:
578 # Expand Columns
579 rebinned = np.repeat(rebinned, shape[1] // old_shape[1], axis=1)
580 else:
581 # Compress columns
582 rebinned = rebinned[:, :: old_shape[1] // shape[1]]
583 # Return the rebinned without adjusting dtype as none of the functions above change it
584 return rebinned
586 # If we are downsizing
587 elif shape[0] < old_shape[0] or shape[1] < old_shape[1]:
588 if (max(old_shape[0], shape[0]) % min(old_shape[0], shape[0]) != 0) or ( 588 ↛ 591line 588 didn't jump to line 591 because the condition on line 588 was never true
589 max(old_shape[1], shape[1]) % min(old_shape[1], shape[1]) != 0
590 ):
591 raise ValueError("Your new shape should be a factor of the original shape")
592 # If we are increasing the rows or columns and reducing the other, increase them now and change the old shape
593 if shape[0] > old_shape[0]:
594 a = np.tile(a, (shape[0], 1))
595 old_shape = a.shape
596 elif shape[1] > old_shape[1]:
597 a = np.tile(a, (1, shape[1]))
598 old_shape = a.shape
599 # Create the shape we need (rows, rows that can fit in old_shape, cols, cols that can fit into old_shape)
600 sh = shape[0], old_shape[0] // shape[0], shape[1], old_shape[1] // shape[1]
601 # Create the 4D array
602 rebinned = np.reshape(a, sh)
603 # Get the average of the columns first
604 rebinned = rebinned.mean(-1)
605 # To ensure that we get the result same as IDL
606 # it seems to fix the values after every calculation if integer
607 if np.issubdtype(a.dtype, np.integer):
608 rebinned = np.fix(rebinned).astype("int")
609 # Now get it for the rows
610 rebinned = rebinned.mean(1)
611 # If we are expecting 1D array ensure it gets returned as a 1D array
612 if shape[0] == 1:
613 rebinned = rebinned[0]
614 # To ensure that we get the result same as IDL
615 # it seems to fix the values after every calculation if integer
616 if np.issubdtype(a.dtype, np.integer):
617 rebinned = np.fix(rebinned).astype("int")
619 # Otherwise we are expanding
620 else:
621 if shape[0] % old_shape[0] != 0 or shape[1] % old_shape[1] != 0: 621 ↛ 622line 621 didn't jump to line 622 because the condition on line 621 was never true
622 raise ValueError(
623 "Your new shape should be a multiple of the original shape"
624 )
625 # Get the size changes of the row and column separately
626 row_sizer = shape[0] // old_shape[0]
627 col_sizer = shape[1] // old_shape[1]
628 ax = 0
629 # If 1D array then do along the columns
630 if old_shape[0] == 1:
631 rebinned = rebin_columns(a, ax, shape, col_sizer)
632 if shape[0] == 1:
633 rebinned = rebinned[0]
634 # Else its a 2D array
635 else:
636 row_rebinned = rebin_rows(a, ax, shape, old_shape, row_sizer)
637 # If it matches the new shape, then return it
638 if row_rebinned.shape == shape:
639 return row_rebinned
640 else:
641 ax = 1
642 rebinned = rebin_columns(row_rebinned, ax, shape, col_sizer)
643 return rebinned
646def weight_invert(
647 weights: (
648 NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number
649 ),
650 threshold: float | None = None,
651 use_abs: bool = False,
652) -> NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number:
653 """
654 The weights invert function cleans the weights given by removing
655 the values that are 0, NaN or Inf ready for additional calculation.
656 If a threshold is set, then we check the values that match the threshold
657 instead of checking for zeros.
659 Parameters
660 ----------
661 weights: NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number
662 An array of values of some dtype
663 threshold: float | None, optional
664 A real number set as the threshold for the array.
665 By default its set to None, in this case function checks
666 for zeros, by default None
667 use_abs: bool, optional
668 If True, take the absolute value (sometimes useful for complex numbers)
669 By default this is False, so will leave as a complex number and invert, by default False
671 Returns
672 -------
673 result: NDArray[np.integer | np.floating | np.complexfloating] | int | float | np.number
674 The weights array that has had NaNs and Infinities removed, and zeros OR
675 values that don't meet the threshold.
676 """
677 # IDL is able to treat one number as an array (because every number is aprrently an array of size 1?)
678 # As such we need to check if it's a number less than or equal to 0 and make a zeros array of size 1
679 if np.isscalar(weights):
680 result = np.zeros(1, dtype=type(weights))
681 weights = np.array([weights], dtype=type(weights))
682 else:
683 result = np.zeros_like(weights, dtype=weights.dtype)
684 weights_use = weights
685 if use_abs or np.iscomplexobj(weights_use):
686 """
687 Python and IDL use the where function on complex numbers differently.
688 On Python, if you apply a real threshold, it applies to only the real numbers,
689 and if you apply an imaginary threshold it applies to only imaginary numbers.
690 For example Python:
691 test = np.array([1j, 2 + 2j, 3j])
692 np.where(test >= 2) == array([1])
693 np.where(test >= 2j) == array([1,2])
694 Meanwhile in IDL:
695 test = COMPLEX([0,2,0],[1,2,3]) ;COMPLEX(REAL, IMAGINARY)
696 where(test ge 2) == [1, 2]
697 where(test ge COMPLEX(0,2)) == [1, 2]
699 IDL on the otherhand, uses the ABS function on COMPLEX numbers before using WHERE.
700 Hence the behaviour we're seeing above. This is why we also check for a complexobj
701 in the if statement
702 """
703 weights_use = np.abs(weights)
705 # If threshold has been set then...
706 if threshold is not None:
707 # Get the indexes which meet the threshold
708 # As indicated before IDL applies abs before using where to complex numbers
709 i_use = np.where(weights_use >= threshold)
710 else:
711 # Otherwise get where they are not zero
712 i_use = np.nonzero(weights_use)
714 if np.size(i_use) > 0:
715 result[i_use] = 1 / weights[i_use]
717 # Replace all NaNs with Zeros
718 if np.size(np.where(np.isnan(result))) != 0:
719 result[np.where(np.isnan(result))] = 0
720 # Replace all Infinities with Zeros
721 if np.size(np.where(np.isinf(result))) != 0: 721 ↛ 722line 721 didn't jump to line 722 because the condition on line 721 was never true
722 result[np.where(np.isinf(result))] = 0
724 # If the result is an array containing 1 result, then return the result, not an array
725 if np.size(result) == 1:
726 return result[0]
727 return result
730def array_match(
731 array_1: NDArray[np.integer | np.floating | np.complexfloating],
732 value_match: NDArray[np.integer | np.floating | np.complexfloating],
733 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None = None,
734) -> NDArray[np.int64]:
735 """
736 Find the indices of the input array which match the array of values provided.
737 If a second input array is provided, then find the indices where the values
738 provided match in both input arrays. This matching can only be done between
739 integer values.
741 Parameters
742 ----------
743 array_1: NDArray[np.integer | np.floating | np.complexfloating]
744 The input array to match the values in
745 value_match: NDArray[np.integer | np.floating | np.complexfloating]
746 The values to find in the input array
747 array_2: NDArray[np.integer | np.floating | np.complexfloating] | None, optional
748 A second array to match the values in via a logical AND, by default is None
750 Returns
751 -------
752 match_indices: NDArray[np.int64]
753 The indices of the input array(s) which match the values provided
755 Raises
756 ------
757 ValueError
758 Gets raised if value_match is None
759 """
761 if value_match is None: 761 ↛ 762line 761 didn't jump to line 762 because the condition on line 761 was never true
762 raise ValueError("Value Match Should be a value not None")
763 # If array_2 has been supplied, compare which mins and maxes to use based on two arrays
764 if array_2 is not None and np.size(array_2) > 0:
765 min_use = np.min([np.min(array_1), np.min(array_2)])
766 max_use = np.max([np.max(array_1), np.max(array_2)])
767 # Also compute the histogram for array_2
768 hist2, _, ri2 = histogram(array_2, min=min_use, max=max_use)
769 else:
770 # If the second array wasn't supplied
771 min_use = np.min(array_1)
772 max_use = np.max(array_1)
773 # Supply a second hist
774 hist2 = np.arange(max_use - min_use + 1)
775 # Get the histogram for the first
776 hist1, _, ri1 = histogram(array_1, min=min_use, max=max_use)
777 # Arrays should be the same size, does addition
778 hist_combined = hist1 + hist2
779 bins = np.where(hist_combined > 0)
781 # Select the values to be used
782 hist_v1, bins_v1, _ = histogram(bins + min_use)
783 omin = bins_v1[0]
784 omax = bins_v1[-1]
785 hist_v2, _, _ = histogram(value_match, min=omin, max=omax)
786 vals = np.nonzero(np.bitwise_and(hist_v1, hist_v2))[0] + omin - min_use
787 n_match = vals.size
789 if n_match == 0:
790 return -1
792 ind_arr = np.zeros_like(array_1)
793 for vi in range(n_match):
794 i = vals[vi]
795 if hist1[i] > 0: 795 ↛ 797line 795 didn't jump to line 797 because the condition on line 795 was always true
796 ind_arr[ri1[ri1[i] : ri1[i + 1]]] = 1
797 if hist2[i] > 0:
798 ind_arr[ri2[ri2[i] : ri2[i + 1]]] = 1
800 match_indices = np.nonzero(ind_arr)[0]
801 # Return our matching indices
802 return match_indices
805def meshgrid(
806 dimension: int, elements: int, axis: int | None = None, return_integer: bool = False
807) -> NDArray[np.int64 | np.float64]:
808 """
809 Generates a 2D array of X or Y values. Could be replaced by a another function
811 Parameters
812 ----------
813 dimension: int
814 Sets the column size of the array to return
815 elements: int
816 Sets the row size of the array to return
817 axis = int | None, optional
818 Set axis = 1 for X values, set axis = 2 for Y values, by default is None
819 return integer: bool, optional
820 dtype is implied by dimension and/or elements
821 by default. If True, sets return array to int, by default is False
823 Returns
824 -------
825 result: NDArray[np.int64 | np.float64]
826 A numpy array of shape (elements, dimension) that is
827 a modified np.arange(elements * dimension).
828 """
829 if axis is None:
830 axis = elements
831 elements = dimension
832 # If elements is set as 2
833 if axis == 2:
834 # Replicate LINDGEN by doing arange and resizing, result is floored
835 result = np.reshape(
836 np.floor(np.arange(elements * dimension) / dimension), (elements, dimension)
837 )
838 else:
839 # Replicate LINDGEN by doing arange and resizing
840 result = np.reshape(
841 np.arange(elements * dimension) % dimension, (elements, dimension)
842 )
843 # If we need to return a float, then set the result and return
844 if return_integer:
845 return result.astype("int")
846 # Otherwise return as is
847 else:
848 return result
851def deriv_coefficients(n: int, divide_factorial: bool = False) -> NDArray[np.float64]:
852 """
853 Computes an array of coefficients resulting in taking the
854 n-th derivative of a function of the form x^a (a must not
855 be a positive integer less than n)
857 Parameters
858 ----------
859 n: int
860 Decides the length of coefficients
861 divide_factorial: bool, optional
862 Determine if we need to divide by the factorial, by default is False
864 Returns
865 -------
866 coeff: NDArray[np.float64]
867 An array of coefficients
868 """
869 if n <= 0:
870 return 0
871 # Set up the array
872 coeff = np.zeros(n)
873 # Set the first coefficient to 1
874 coeff[0] = 1
875 # For every coefficient
876 for m in range(1, n):
877 # Had to do m + 1 for the range as IDL coeff[1:1] == coeff[1], but Python coeff[1:1] == array([])
878 coeff[1 : m + 1] += -m * coeff[0:m]
879 # If we are to divide by the factorial do that to each coefficient
880 if divide_factorial:
881 for m in range(n):
882 coeff[m] /= factorial(m + 1)
884 # Return coefficients
885 return coeff
888def idl_argunique(
889 arr: NDArray[np.integer | np.floating | np.complexfloating],
890) -> NDArray[np.int64]:
891 """
892 In IDL the UNIQ function returns the indexes of the unique values within
893 an array (that is assumed to be sorted beforehand). In NumPy they use the first
894 unique index when using return index, where as in IDL they use the last index.
896 Parameters
897 ----------
898 arr : NDArray[np.integer | np.floating | np.complexfloating]
899 A sorted numpy array of any type.
901 Returns
902 -------
903 NDArray[np.int64]
904 An array containing indexes of the last occurence of the unique value in arr
906 Examples
907 --------
908 >>> test = np.array([10, 30, 5, 100, 200, 75, 200, 100, 30])
909 >>> idl_argunique(test)
910 array([0, 1, 3, 4, 6, 8])
911 """
912 return np.searchsorted(arr, np.unique(arr), side="right") - 1
915def angle_difference(
916 ra1: float,
917 dec1: float,
918 ra2: float,
919 dec2: float,
920 degree: bool = False,
921 nearest: bool = False,
922) -> float:
923 """
924 Calculates the angle difference between two given celestial coordinates.
926 Parameters
927 ----------
928 ra1 : float
929 Right Ascension of a coordinate 1 in radians or degrees
930 dec1 : float
931 Declination of a coordinate 1 in radians or degrees
932 ra2 : float
933 Right Ascension of a coordinate 2 in radians or degrees
934 dec2 : float
935 Declination of a coordinate 2 in radians or degrees
936 degree : bool, optional
937 If True, then we treate the coordinates given as degrees, by default False
938 nearest : bool, optional
939 Calculate implied angle instead, by default False
941 Returns
942 -------
943 relative_angle : float
944 The angle difference in degrees
945 """
947 if degree: 947 ↛ 950line 947 didn't jump to line 950 because the condition on line 947 was always true
948 unit = u.deg
949 else:
950 unit = u.rad
951 coord1 = SkyCoord(ra=ra1 * unit, dec=dec1 * unit)
952 coord2 = SkyCoord(ra=ra2 * unit, dec=dec2 * unit)
953 # Use built in astropy separtion instead of calculating it
954 relative_angle = coord1.separation(coord2).value
955 if nearest:
956 return max(relative_angle, 2 * pi - relative_angle)
957 else:
958 return relative_angle
961def parallactic_angle(latitude: float, hour_angle: float, dec: float) -> float:
962 """
963 Calculates the parallactic angle given latitude (usually a declination), hour_angle and another declination
965 Parameters
966 ----------
967 latitude : float
968 An angle in degrees, usually a declination in this package
969 hour_angle : float
970 The hour angle in degrees
971 dec : float
972 A declination in degrees
974 Returns
975 -------
976 parallactic_angle : float
977 The angle between the great circle through a celestial object and the zenith, and the hour circle of the object
978 """
980 y_term = np.sin(np.radians(hour_angle))
981 x_term = np.cos(np.radians(dec)) * np.tan(np.radians(latitude)) - np.sin(
982 np.radians(dec)
983 ) * np.cos(np.radians(hour_angle))
984 return np.degrees(np.arctan(y_term / x_term))
987def simple_deproject_w_term(
988 obs: dict,
989 params: dict,
990 vis_arr: NDArray[np.complex128],
991 direction: float,
992 logger: Logger,
993) -> NDArray[np.complex128]:
994 """
995 Applies a w-term deprojection to the visibility array
997 Parameters
998 ----------
999 obs : dict
1000 The observation data structure
1001 params : dict
1002 The data from the UVFITS file
1003 vis_arr : NDArray[np.complex128]
1004 The visibility array
1005 direction : float
1006 The direction we apply to the phase
1008 Returns
1009 -------
1010 vis_arr : NDArray[np.complex128]
1011 The visibility array with the deprojection applied
1012 """
1014 icomp = 1j
1015 zcen = np.outer(obs["baseline_info"]["freq"], params["ww"])
1016 sign = 1 if direction > 0 else -1
1017 phase = np.exp(direction * icomp * zcen)
1018 for pol_i in range(obs["n_pol"]):
1019 vis_arr[pol_i, :, :] *= phase
1021 sign_str = " +1" if sign > 0 else " -1"
1022 logger.info(f"Applying simple w-term deprojection:{sign_str}")
1024 return vis_arr
1027def resistant_mean(
1028 array: NDArray[np.integer | np.floating | np.complexfloating],
1029 deviations: int,
1030 mad_scale: float = 0.67449999999999999,
1031 sigma_coeff: NDArray[np.float64] = np.array(
1032 [
1033 0.020142000000000000,
1034 -0.23583999999999999,
1035 0.90722999999999998,
1036 -0.15404999999999999,
1037 ]
1038 ),
1039) -> int | float | complex | np.number:
1040 """
1041 The resistant_mean function translate the IDLAstro function resistant_mean from IDL to Python using NumPy.
1042 The values mad_scale and sigma_coeff are also retrieved from the same IDLAstro function when running in Double
1043 Precision Mode.
1045 The resistant_mean gets the mean of an array which has had a median absolute deviation threshold applied to the
1046 absolute deviations of the array to exclude outliers.
1048 If resistant_mean needs to be optimized, it can be vectorized easily enough
1050 Parameters
1051 ----------
1052 array : NDArray[np.integer | np.floating | np.complexfloating]
1053 A 1 dimensional array of values, multidimensional arrays should be flattened before use
1054 deviations : int
1055 The number of median absolute deviations from the median we want use to exclude outliers
1056 mad_scale : float, optional
1057 The scale factor for the median absolute deviation, by default 0.67449999999999999
1058 sigma_coeff : NDArray[np.float64], optional
1059 The coefficients applied to the polynomial equation to the standard deviation of the points excluded by the outliers for additional exclusion, by default np.array([0.020142000000000000, -0.23583999999999999 , 0.90722999999999998 , -0.15404999999999999])
1061 Returns
1062 -------
1063 resistant_mean : Number
1064 The mean of the array with outliers excluded using median absolute deviation
1066 References
1067 ----------
1068 .. IDLAstro, RESISTANT_Mean, https://idlastro.gsfc.nasa.gov/ftp/pro/robust/resistant_mean.pro
1069 """
1070 # Calculate median of the real part of the array
1071 median = np.median(array.real)
1072 # Get the absolute deviation (residuals)
1073 abs_dev = np.abs(array - median)
1074 # Calculate Median Absolute Deviation
1075 # I could have used scipy's median_abs_deviation to get this, but by doing this manually I can guarantee the same behaviour as IDL
1076 mad = np.median(abs_dev) / mad_scale
1077 # Use MAD and the number of deviations
1078 mad_threshold = deviations * mad
1079 # Subset the array by the deviations and residuals
1080 no_outliers = array[np.where(abs_dev <= mad_threshold)]
1081 # If the deviations is less than 4.5, change the sigma (standard deviation of the subarray) by using a polyval with set sigma coefficient
1082 # This compensates Sigma for truncation
1083 # Calculate the standard deviation of the rela and imag separately
1084 sigma = np.std(no_outliers.real) + np.std(no_outliers.imag) * 1j
1085 # Set the deviationsX to 1.0 if it's less than 1
1086 deviationsX = max(deviations, 1.0)
1087 if deviationsX <= 4.5: 1087 ↛ 1089line 1087 didn't jump to line 1089 because the condition on line 1087 was always true
1088 sigma = sigma / np.polyval(sigma_coeff, deviationsX)
1089 sigma_threshold = sigma * deviations
1090 # Use the sigma threshold to again remove outliers from the array
1091 # Also take the absolute value of sigma_threshold to get the same behaviour as LE in IDL
1092 subarray = array[np.where(abs_dev <= np.abs(sigma_threshold))]
1093 # Get the mean of the subset array which contains no outliers
1094 return np.mean(subarray)
1097def run_command(cmd: str, dry_run=False):
1098 """
1099 Runs the command string `cmd` using `subprocess.run`. Returns any text output to stdout
1101 Parameters
1102 ----------
1103 cmd : str
1104 The command to run on the command line
1105 dry_run : bool
1106 If True, don't actually run the command. Defaults to False (so defaults to running the command)
1107 """
1109 if dry_run:
1110 stdout = "This was a dry run, not launching IDL code\n"
1111 else:
1112 stdout = subprocess.run(cmd.split(), stdout=subprocess.PIPE, text=True).stdout
1114 return stdout
1117def vis_weights_update(
1118 vis_weights: NDArray[np.float64], obs: dict, psf: dict | h5py.File, params: dict
1119) -> tuple[NDArray[np.float64], dict]:
1120 """
1121 Update the visibility weights array to match any updates to the observation
1122 metadata dictionary, including flagged times, frequencies, tiles, and
1123 min/max baseline length.
1125 Parameters
1126 ----------
1127 vis_weights : NDArray[np.float64]
1128 Visibility weights array.
1129 obs : dict
1130 Observation metadata dictionary
1131 psf: dict | h5py.File
1132 Beam dictionary
1133 params : dict
1134 Visibility metadata dictionary
1136 Returns
1137 -------
1138 vis_weights : NDArray[np.float64]
1139 Updated vis_weights given the modified observation metadata dictionary
1140 obs : dict
1141 The observation metadata dictionary, now containing the correct summary
1142 statistics of the new flagging
1143 """
1144 kx_arr = params["uu"] / obs["kpix"]
1145 ky_arr = params["vv"] / obs["kpix"]
1146 dist_test = np.sqrt(kx_arr**2 + ky_arr**2) * obs["kpix"]
1147 dist_test = np.outer(obs["baseline_info"]["freq"], dist_test)
1148 flag_dist_i = np.where(
1149 (dist_test < obs["min_baseline"]) | (dist_test > obs["max_baseline"])
1150 )
1151 conj_i = np.where(ky_arr > 0)
1152 if conj_i[0].size > 0: 1152 ↛ 1155line 1152 didn't jump to line 1155 because the condition on line 1152 was always true
1153 kx_arr[conj_i] = -kx_arr[conj_i]
1154 ky_arr[conj_i] = -ky_arr[conj_i]
1155 psf_dim = psf["dim"]
1156 if isinstance(psf, h5py.File): 1156 ↛ 1157line 1156 didn't jump to line 1157 because the condition on line 1156 was never true
1157 psf_dim = psf_dim[0]
1158 xcen = np.outer(obs["baseline_info"]["freq"], kx_arr)
1159 xmin = np.floor(xcen) + obs["dimension"] / 2 - (psf_dim / 2 - 1)
1160 ycen = np.outer(obs["baseline_info"]["freq"], ky_arr)
1161 ymin = np.floor(ycen) + obs["elements"] / 2 - (psf_dim / 2 - 1)
1163 range_test_x_i = np.where(
1164 (xmin <= 0) | ((xmin + psf_dim - 1) >= obs["dimension"] - 1)
1165 )
1166 if range_test_x_i[0].size > 0: 1166 ↛ 1169line 1166 didn't jump to line 1169 because the condition on line 1166 was always true
1167 xmin[range_test_x_i] = -1
1168 ymin[range_test_x_i] = -1
1169 range_test_y_i = np.where(
1170 (ymin <= 0) | ((ymin + psf_dim - 1) >= obs["elements"] - 1)
1171 )
1172 if range_test_y_i[0].size > 0: 1172 ↛ 1175line 1172 didn't jump to line 1175 because the condition on line 1172 was always true
1173 xmin[range_test_y_i] = -1
1174 ymin[range_test_y_i] = -1
1175 del range_test_x_i
1176 del range_test_y_i
1178 if flag_dist_i[0].size > 0: 1178 ↛ 1183line 1178 didn't jump to line 1183 because the condition on line 1178 was always true
1179 xmin[flag_dist_i] = -1
1180 ymin[flag_dist_i] = -1
1182 # If flag_frequencies is false, freq_use should be all 1's anyway, so this shouldn't affect anything
1183 freq_cut_i = np.where(obs["baseline_info"]["freq_use"] == 0)
1184 if freq_cut_i[0].size > 0: 1184 ↛ 1186line 1184 didn't jump to line 1186 because the condition on line 1184 was always true
1185 vis_weights[0 : obs["n_pol"], freq_cut_i[0], :] = 0
1186 tile_cut_i = np.where(obs["baseline_info"]["tile_use"] == 0)
1187 if tile_cut_i[0].size > 0:
1188 bi_cut = array_match(
1189 obs["baseline_info"]["tile_a"],
1190 tile_cut_i[0] + 1,
1191 obs["baseline_info"]["tile_b"],
1192 )
1193 if np.size(bi_cut) > 0: 1193 ↛ 1196line 1193 didn't jump to line 1196 because the condition on line 1193 was always true
1194 vis_weights[0 : obs["n_pol"], :, bi_cut] = 0
1196 time_cut_i = np.where(obs["baseline_info"]["time_use"] == 0)[0]
1197 bin_offset = np.append(obs["baseline_info"]["bin_offset"], kx_arr.size)
1198 time_bin = np.zeros(kx_arr.size)
1199 for ti in range(obs["baseline_info"]["time_use"].size):
1200 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti
1201 for ti in range(time_cut_i.size): 1201 ↛ 1202line 1201 didn't jump to line 1202 because the loop on line 1201 never started
1202 ti_cut = np.where(time_bin == time_cut_i[ti])
1203 if ti_cut[0].size > 0:
1204 vis_weights[0 : obs["n_pol"], :, ti_cut] = 0
1206 flag_i = np.where(vis_weights[0] <= 0)
1207 flag_i_new = np.where(xmin < 0)
1208 if flag_i_new[0].size > 0: 1208 ↛ 1210line 1208 didn't jump to line 1210 because the condition on line 1208 was always true
1209 vis_weights[0 : obs["n_pol"], flag_i_new[0], flag_i_new[1]] = 0
1210 if flag_i[0].size > 0: 1210 ↛ 1214line 1210 didn't jump to line 1214 because the condition on line 1210 was always true
1211 xmin[flag_i] = -1
1212 ymin[flag_i] = -1
1214 if min(np.max(xmin), np.max(ymin)) < 0: 1214 ↛ 1215line 1214 didn't jump to line 1215 because the condition on line 1214 was never true
1215 obs["n_vis"] = 0
1216 return vis_weights, obs
1218 bin_n, _, _ = histogram(xmin + ymin * obs["dimension"], min=0)
1219 obs["n_vis"] = np.sum(bin_n)
1221 obs["n_time_flag"] = np.sum(1 - obs["baseline_info"]["time_use"])
1222 obs["n_tile_flag"] = np.sum(1 - obs["baseline_info"]["tile_use"])
1223 obs["n_freq_flag"] = np.sum(1 - obs["baseline_info"]["freq_use"])
1225 return vis_weights, obs
1228def split_vis_weights(
1229 obs: dict, vis_weights: NDArray[np.float64]
1230) -> tuple[NDArray[np.float64], NDArray[np.int64]]:
1231 """
1232 Separate the indices in the visibility array (data/model/res/weights) into
1233 interleaved time samples, generally called "even" and "odd" depending on
1234 whether the time index is even or odd. Interleaved data can be used to generate
1235 cross power spectra and propagate uncertainties, see eq 2 of Jacobs et al. 2016.
1236 Ensures that the same flagging is applied to both sets.
1238 Parameters
1239 ----------
1240 obs : dict
1241 Observation metadata dictionary
1242 vis_weights : NDArray[np.float64]
1243 Visibility weights array
1245 Returns
1246 -------
1247 vis_weights : NDArray[np.float64]
1248 Full visibility weights array with the same flagging applied
1249 to both interleaved sets.
1250 bi_use NDArray[np.int64]
1251 Baseline index arrays for interleaved time samples, separated
1252 by "even" and "odd" indices.
1253 """
1254 # Not always used in vis_noise_calc requires this check in other cases
1255 if obs["n_time"] < 2: 1255 ↛ 1256line 1255 didn't jump to line 1256 because the condition on line 1255 was never true
1256 return vis_weights
1258 # Get the number of baselines, should be the last in this case
1259 nb = vis_weights.shape[-1]
1260 bin_end = np.zeros(obs["n_time"], dtype=np.int64)
1261 bin_end[: obs["n_time"] - 1] = (
1262 obs["baseline_info"]["bin_offset"][1 : obs["n_time"]] - 1
1263 )
1264 bin_end[-1] = int(nb - 1)
1265 bin_i = np.full(nb, -1, dtype=np.int64)
1266 for t_i in range(obs["n_time"] // 2):
1267 bin_i[obs["baseline_info"]["bin_offset"][t_i] : bin_end[t_i] + 1] = t_i
1269 time_start_i = int(np.min(np.nonzero(obs["baseline_info"]["time_use"])[0]))
1270 nt3 = int(np.floor((obs["n_time"] - time_start_i) / 2) * 2)
1271 time_use_0 = obs["baseline_info"]["time_use"][time_start_i : time_start_i + nt3 : 2]
1272 time_use_1 = obs["baseline_info"]["time_use"][
1273 time_start_i + 1 : time_start_i + nt3 : 2
1274 ]
1275 time_use_01 = time_use_0 * time_use_1
1276 time_use = np.zeros(obs["baseline_info"]["time_use"].shape, dtype=np.int64)
1277 time_use[time_start_i : time_start_i + nt3 : 2] = time_use_01
1278 time_use[time_start_i + 1 : time_start_i + nt3 : 2] = time_use_01
1279 time_cut_i = np.where(time_use <= 0)[0]
1280 if time_cut_i.size > 0: 1280 ↛ 1281line 1280 didn't jump to line 1281 because the condition on line 1280 was never true
1281 for cut_i in range(time_cut_i.size):
1282 bin_i_cut = np.where(bin_i == time_cut_i[cut_i])[0]
1283 if bin_i_cut.size > 0:
1284 bin_i[bin_i_cut] = -1
1286 bi_use = [np.where(bin_i % 2 == 0)[0], np.where(bin_i % 2 == 1)[0]]
1288 # Here we ensure that both even and odd samples are the same size by
1289 # ensuring both arrays match the smallest size
1290 if bi_use[0].size < bi_use[1].size: 1290 ↛ 1291line 1290 didn't jump to line 1291 because the condition on line 1290 was never true
1291 bi_use[1] = bi_use[1][0 : bi_use[0].size]
1292 elif bi_use[1].size < bi_use[0].size: 1292 ↛ 1293line 1292 didn't jump to line 1293 because the condition on line 1292 was never true
1293 bi_use[0] = bi_use[0][0 : bi_use[1].size]
1295 for pol_i in range(obs["n_pol"]):
1296 # In IDL they are doing x > y < w < z
1297 flag_use = np.minimum(
1298 np.maximum(vis_weights[pol_i, :, bi_use[0]], 0),
1299 np.minimum(vis_weights[pol_i, :, bi_use[1]], 1),
1300 )
1301 # Reset vis_weights
1302 vis_weights[pol_i] = 0
1303 # No keywords used for odd_only or even_only in entirety of FHD
1304 vis_weights[pol_i, :, bi_use[0]] = flag_use
1305 vis_weights[pol_i, :, bi_use[1]] = flag_use
1307 return vis_weights, bi_use
1310def vis_noise_calc(
1311 obs: dict,
1312 vis_arr: NDArray[np.complex128],
1313 vis_weights: NDArray[np.float64],
1314 bi_use: NDArray[np.int64] | None = None,
1315) -> NDArray[np.float64]:
1316 """
1317 Calculate the noise from the calibrated visibilities by taking
1318 the difference between the imaginary parts of the interleaved
1319 time samples. A factor of sqrt(2) is required because there is
1320 half as many independent noise samples when calculated from just
1321 the imaginary part.
1323 Parameters
1324 ----------
1325 obs : dict
1326 The observation metadata dictionary
1327 vis_arr : NDArray[np.complex128]
1328 The vsisibility array
1329 vis_weights : NDArray[np.float64]
1330 The visibility weights array
1331 bi_use : NDArray[np.int64] | None, optional
1332 Baseline index arrays for interleaved time samples, separated
1333 by "even" and "odd" indices, by default None
1335 Returns
1336 -------
1337 noise_arr: NDArray[np.float64]
1338 Calculated variance of the noise as a function of polarisation
1339 and frequency.
1340 """
1341 noise_arr = np.zeros([obs["n_pol"], obs["n_freq"]])
1343 if obs["n_time"] < 2: 1343 ↛ 1344line 1343 didn't jump to line 1344 because the condition on line 1343 was never true
1344 return noise_arr
1346 if bi_use is None: 1346 ↛ 1349line 1346 didn't jump to line 1349 because the condition on line 1346 was always true
1347 vis_weights_use, bi_use = split_vis_weights(obs, vis_weights)
1348 else:
1349 vis_weights_use = vis_weights
1351 for pol_i in range(obs["n_pol"]):
1352 data_diff = (
1353 vis_arr[pol_i, :, bi_use[0]].imag - vis_arr[pol_i, :, bi_use[1]].imag
1354 )
1355 vis_weight_diff = np.maximum(
1356 vis_weights_use[pol_i, :, bi_use[0]], 0
1357 ) * np.maximum(vis_weights_use[pol_i, :, bi_use[1]], 0)
1358 for fi in range(obs["n_freq"]):
1359 ind_use = np.where(vis_weight_diff[:, fi])[0]
1360 if ind_use.size > 0:
1361 noise_arr[pol_i, fi] = np.std(data_diff[ind_use, fi]) / np.sqrt(2)
1363 return noise_arr
1366def idl_median(
1367 x: NDArray[np.integer | np.floating | np.complexfloating],
1368 width: int = 0,
1369 even: bool = False,
1370) -> float:
1371 """
1372 The IDL Median function doesn't always work as you'd expect, as generally you need to use the
1373 EVEN keyword to get the median of an even number of elements, otherwise it returns the
1374 maximum element of the two numbers in the middle of an even sorted array.
1375 This function replicates the IDL median function, in case you need that functionality.
1376 Typically though, we recommend using `numpy.median` or `scipy.ndimage.median_filter`.
1378 Parameters
1379 ----------
1380 x : NDArray[np.integer | np.floating | np.complexfloating]
1381 Data to perform median on
1382 width : int
1383 If set, perform a type of median filtering.
1384 even : bool, optional
1385 In this case it will just run numpy.median, by default False
1388 When `width` is set. unfortunately the edge conditions when using cannot be
1389 replicated soley with `scipy.ndimage.median_filter` so use `median_filter` and set the edge cases manually
1391 Returns
1392 -------
1393 float
1394 The median of the array
1396 See Also
1397 --------
1398 numpy.median: Computes the median of an array
1399 scipy.ndimage.median_filter: Applies a median filter to an array
1400 """
1402 if width:
1403 # IDL median leaves everything within width//2 pixels of the edge alone
1404 # So just shove the outputs of median_filter everywhere else. None of
1405 # the `modes` in median_filter capture this behaviour
1406 output = deepcopy(x)
1408 hw = width // 2
1409 output[hw:-hw] = median_filter(x, size=width)[hw:-hw]
1411 return output
1413 else:
1414 if even: 1414 ↛ 1415line 1414 didn't jump to line 1415 because the condition on line 1414 was never true
1415 return np.median(x)
1416 else:
1417 med_index = int(np.ceil(len(x) / 2))
1419 return np.sort(x)[med_index]
1422def reshape_and_average_in_time(
1423 vis_array: NDArray[np.complex128],
1424 n_freq: int,
1425 n_time: int,
1426 n_baselines: int,
1427 vis_weights: NDArray[np.float64],
1428) -> NDArray[np.complex128]:
1429 """Given a single polarisation 2D `vis_array` of shape (n_freq, n_time*n_baselines),
1430 reshape into (n_freq, n_time, n_baselines), and then average in time, weighting
1431 by `vis_weights` (must be of shape (n_freq, n_time, n_baselines))
1432 Returns the averaged array in shape (n_freq, n_baselines)
1434 Parameters
1435 ----------
1436 vis_array : NDArray[np.complex128]
1437 The visibility array
1438 n_freq : int
1439 Number of frequencies
1440 n_time : int
1441 Number of time steps
1442 n_baselines : int
1443 Number of baselines
1444 vis_weights : NDArray[np.float64]
1445 The visibility weights array
1447 Returns
1448 -------
1449 reshape_array: NDArray[np.complex128]
1450 The reshaped visibility array the same shape as the visibility weights array
1451 """
1453 new_shape = (n_freq, n_time, n_baselines)
1455 if vis_weights.shape != new_shape: 1455 ↛ 1456line 1455 didn't jump to line 1456 because the condition on line 1455 was never true
1456 exit(
1457 f"Attempting to use weights with shape {vis_weights.shape} in `reshape_and_average_in_time`, this is not allowed"
1458 )
1460 reshape_array = np.reshape(vis_array, new_shape)
1461 reshape_array = np.sum(reshape_array * vis_weights, axis=1)
1463 return reshape_array
1466def region_grow(
1467 image: NDArray[np.integer | np.floating | np.complexfloating],
1468 roiPixels: NDArray[np.integer],
1469 low: int | float | None = None,
1470 high: int | float | None = None,
1471) -> NDArray[np.integer | np.floating | np.complexfloating] | None:
1472 """
1473 Replicates IDL's Region Grow, where a region of interest will grow based upon a given threshold
1474 within a 2D array. It finds all the pixels within the array that are connected neighbors via the
1475 threshold and blob detection using SciPy's `label` function. In this case, the standard deviation
1476 form of this function hasn't been implemented as PyFHD will only use this function once with a threshold.
1478 If you want to use standard deviation region growing adjusting the function can be done by potentially
1479 implementing skimage's blob detection algorithms for the labelling and keeping the rest the same.
1481 Parameters
1482 ----------
1483 image : NDArray[np.integer | np.floating | np.complexfloating]
1484 A 2D array of pixels
1485 roiPixels : NDArray[np.integer]
1486 The region of interest given as FLAT indexes i.e. array.flat
1487 low : int | float | None, optional
1488 The low threshold, any number below this is considered background,
1489 If left as None, this will be the lowest value of the region of interest, by default None
1490 high : int | float | None, optional
1491 The high threshold, any number higher than this is considered background,
1492 If left as None, this will be the highest value of the region of interest, by default None
1494 Returns
1495 -------
1496 growROIPixels: NDArray[np.integer | np.floating | np.complexfloating] | None
1497 The grown region of interest that has connected neighbours by using the threshold
1499 See Also
1500 --------
1501 scipy.ndimage.label: Labels an image based off a given kernel
1503 Notes
1504 -----
1505 'scikit-image Blob Detection' : https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_blob.html
1506 """
1507 # Get the roi and set the low and high thresholds if they haven't been so already.
1508 roi = image.flat[roiPixels]
1509 if low is None: 1509 ↛ 1510line 1509 didn't jump to line 1510 because the condition on line 1509 was never true
1510 low = np.min(roi)
1511 if high is None: 1511 ↛ 1512line 1511 didn't jump to line 1512 because the condition on line 1511 was never true
1512 high = np.max(roi)
1513 # Replace all NaNs with Zeros
1514 nans = np.isnan(image)
1515 image[nans] = 0
1516 # Get all the values that are within the threshold
1517 threshArray = np.zeros_like(image)
1518 threshArray[np.where((image >= low) & (image <= high))] = 1
1519 threshArray[nans] = 0
1520 # Do binary blob detection with the label function
1521 labelArray, _ = label(threshArray)
1522 # Make the edges background as the Region grow IDL does not do the edges
1523 # of an array
1524 if labelArray.ndim == 2:
1525 labelArray[0, :] = 0
1526 labelArray[:, 0] = 0
1527 labelArray[-1, :] = 0
1528 labelArray[:, -1] = 0
1529 else:
1530 labelArray[0] = 0
1531 labelArray[-1] = 0
1532 # Get the histogram of the labels to ascertain the neighbours we will be interested in
1533 if np.size(roiPixels) > 1:
1534 labels, _, _ = histogram(labelArray.flat[roiPixels], min=0)
1535 labels = np.nonzero(labels != 0)[0]
1536 nLabels = labels.size
1537 else:
1538 nLabels = 1
1539 labels = labelArray.flat[roiPixels]
1540 if not isinstance(labels, np.ndarray):
1541 labels = np.array([labels], dtype=labelArray.dtype)
1542 # Ignore the first label if it's 0 as it's the background
1543 if labels[0] == 0:
1544 nLabels -= 1
1545 if nLabels > 0: 1545 ↛ 1548line 1545 didn't jump to line 1548 because the condition on line 1545 was always true
1546 labels = labels[1:]
1547 # The histogram will have a minimum of 1 so we need to take 1 off the labels
1548 if nLabels: 1548 ↛ 1551line 1548 didn't jump to line 1551 because the condition on line 1548 was always true
1549 labels -= 1
1550 # Get a histogram of all the labels
1551 labelHist, _, revInd = histogram(labelArray, min=1)
1552 # Get the number of pixels we will be growing to
1553 nPixels = np.sum(labelHist[labels]) if nLabels else 0
1554 # If we have any pixels to grow, then grow
1555 if nPixels > 0: 1555 ↛ 1571line 1555 didn't jump to line 1571 because the condition on line 1555 was always true
1556 # Only one label
1557 if nLabels == 1: 1557 ↛ 1561line 1557 didn't jump to line 1561 because the condition on line 1557 was always true
1558 growROIPixels = revInd[revInd[labels[0]] : revInd[labels[0] + 1]]
1559 else:
1560 # Take in all the labels and save all the flat indexes
1561 growROIPixels = np.empty(nPixels, dtype=np.int64)
1562 j = 0
1563 for i in range(nLabels):
1564 if revInd[labels[i] + 1] <= revInd.size:
1565 growROIPixels[j : j + labelHist[labels[i]]] = revInd[
1566 revInd[labels[i]] : revInd[labels[i] + 1]
1567 ]
1568 j = j + labelHist[labels[i]]
1569 else:
1570 # Return None if we didn't have anywhere to grow
1571 growROIPixels = None
1572 # Return the flat indexes
1573 return growROIPixels
1576def crosspol_split_real_imaginary(
1577 image: NDArray[np.complex128], pol_names: list[str] | None = None
1578) -> tuple[NDArray[np.complex128], list[str] | None]:
1579 """
1580 Reformat the input full polarisation image, containing PP, PQ, QP, and QQ,
1581 into PP, real(PQ), imaginary(PQ), and QQ. PQ and QP (i.e. XY and YX)
1582 are complex and conjugate mirrors of one another. To make a understandable
1583 image, we can plot the real and imaginary parts of PQ separately without
1584 loss of information.
1586 Parameters
1587 ----------
1588 image : NDArray[np.complex128]
1589 Image array ordered in polarisation by PP, PQ, QP, and QQ.
1590 pol_names : list[str] | None, optional
1591 The name of the polarisations, by default None
1593 Returns
1594 -------
1595 image : NDArray[np.complex128]
1596 Image array ordered in polarisation by PP, real(PQ), imaginary(PQ),
1597 and QQ
1598 pol_names : list[str]|None]
1599 New polarisation name array to reflect real(PQ) and imaginary(PQ)
1600 """
1601 crosspol_image: NDArray[np.complex128] = image[2]
1602 image[2] = crosspol_image.real
1603 image[3] = crosspol_image.imag
1605 if pol_names is not None:
1606 crosspol_name: str = pol_names[2]
1607 pol_names[2] = f"{crosspol_name}_real"
1608 pol_names[3] = f"{crosspol_name}_imag"
1610 return image, pol_names