Coverage for PyFHD/calibration/calibration_utils.py: 62%
655 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
1import numpy as np
2from numpy.typing import NDArray
3from logging import Logger
4from PyFHD.pyfhd_tools.pyfhd_utils import (
5 resistant_mean,
6 weight_invert,
7 rebin,
8 histogram,
9)
10from copy import deepcopy
11from astropy.io import fits
12from astropy.constants import c
13from pathlib import Path
14from scipy.ndimage import uniform_filter
15import importlib_resources
18def vis_extract_autocorr(
19 obs: dict,
20 vis_arr: NDArray[np.complex128],
21 pyfhd_config: dict,
22 auto_tile_i: NDArray[np.integer] | None = None,
23) -> tuple[NDArray[np.float64], NDArray[np.integer]]:
24 """
25 Extract the auto-correlations if they exist from the full visibility array.
27 Parameters
28 ----------
29 obs : dict
30 Observation metadata dictionary
31 vis_arr : NDArray[np.complex128]
32 Uncalibrated data visiblities
33 pyfhd_config : dict
34 PyFHD's configuration dictionary containing all the options set for a PyFHD run
35 auto_tile_i : NDArray[np.integer] | None, optional
36 Index array for auto-correlation visibilities, by default None
38 Returns
39 -------
40 autocorr: NDArray[np.float64]
41 The auto-correlation visibilities
42 auto_tile_i : NDArray[np.integer]
43 The index array for auto-correlation visibilities
44 """
46 autocorr_i = np.where(
47 obs["baseline_info"]["tile_a"] == obs["baseline_info"]["tile_b"]
48 )[0]
49 if autocorr_i.size > 0: 49 ↛ 96line 49 didn't jump to line 96 because the condition on line 49 was always true
50 auto_tile_i = obs["baseline_info"]["tile_a"][autocorr_i] - 1
51 # As auto_tile_i is used for indexing we need to make it an integer array
52 auto_tile_i = auto_tile_i.astype(np.integer)
53 auto_tile_i_single = np.unique(auto_tile_i)
54 # expect it as a list of 2D arrays, so there might be trouble
55 if not pyfhd_config["cal_time_average"]:
56 freq_tile_shape = np.real(vis_arr[0][:, autocorr_i]).shape
57 auto_corr = np.zeros([obs["n_pol"]] + list(freq_tile_shape))
58 else:
59 auto_corr = np.zeros((obs["n_pol"], obs["n_freq"], auto_tile_i_single.size))
61 for pol_i in range(obs["n_pol"]):
62 # Auto-correlations by definition are enirely real, so take the real part here
63 # index this way in case people have vis_arr as one dimensional
64 # array, containing two further 2D arrays, rather than a proper 3D
65 # array. Turns out this indexing is consistent across both cases
66 auto_vals = np.real(vis_arr[pol_i][:, autocorr_i])
67 if pyfhd_config["cal_time_average"]:
68 auto_single = np.zeros((obs["n_freq"], auto_tile_i_single.size))
69 time_inds = np.where(obs["baseline_info"]["time_use"])[0]
70 for tile_i in range(auto_tile_i_single.size):
71 baseline_i = np.where(auto_tile_i == auto_tile_i_single[tile_i])[0]
72 baseline_i = baseline_i[time_inds]
73 if time_inds.size > 1: 73 ↛ 86line 73 didn't jump to line 86 because the condition on line 73 was always true
74 # OK, auto_vals is of shape (n_freq, n_autos)
75 # We want to average a subset of baseline_i within auto_vals,
76 # to average a specific auto-correlation over time
77 auto_single[:, tile_i] = (
78 np.sum(
79 auto_vals[:, baseline_i][np.arange(obs["n_freq"]), :],
80 axis=1,
81 )
82 / time_inds.size
83 )
85 else:
86 auto_single[:, tile_i] = auto_vals[:, baseline_i][
87 np.arange(obs["n_freq"]), :
88 ]
89 auto_vals = auto_single
90 auto_corr[pol_i, :, :] = auto_vals
91 if pyfhd_config["cal_time_average"]:
92 auto_tile_i = auto_tile_i_single
93 return auto_corr, auto_tile_i
94 else:
95 # Return auto_corr as 0 and auto_tile_i as an empty array
96 return np.zeros(1), np.zeros(0)
99def vis_cal_auto_init(
100 obs: dict,
101 cal: dict,
102 vis_arr: NDArray[np.complex128],
103 vis_model_arr: NDArray[np.complex128],
104 vis_auto: NDArray[np.float64],
105 vis_auto_model: NDArray[np.float64],
106 auto_tile_i: NDArray[np.integer],
107) -> NDArray[np.complex128]:
108 """
109 Initialize the calibration solutions using the autocorrelations prior to the linear least squares fit
110 for faster convergence. The auto-correlations and cross-correlations have separate formulisms for the
111 initialization.
113 Autos -- Square root of "Ratio of data autos to model autos" x "overall mean for both crosses and
114 autos of ratio of data to model" / "overall mean of ratio of data autos to model autos, with 1's
115 for crosses", results in a per-frequency, per-baseline gain scaling.
116 Crosses -- Square root of "overall mean for both crosses and autos of ratio of data to model" /
117 "overall mean of ratio of data autos to model autos, with 1's for crosses", results in a single
118 number gain scaling.
120 Parameters
121 ----------
122 obs : dict
123 Observation metadata dictionary
124 cal : dict
125 Calibration dictionary
126 vis_arr : NDArray[np.complex128]
127 Uncalibrated data visiblities
128 vis_model_arr : NDArray[np.complex128]
129 Simulated model visibilites
130 vis_auto : NDArray[np.float64]
131 Data auto-correlations
132 vis_auto_model : NDArray[np.float64]
133 Simulated model auto-correlations
134 auto_tile_i : NDArray[np.integer]
135 Index array for auto-correlation visibilities
137 Returns
138 -------
139 auto_gain : NDArray[np.complex128]
140 Initialization gain array
141 """
142 # Set out the
143 auto_scale = np.zeros((cal["n_pol"], obs["n_freq"], obs["n_tile"]))
144 auto_gain = np.ones(
145 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128
146 )
147 freq_i_use = np.where(obs["baseline_info"]["freq_use"])[0]
148 for pol_i in range(cal["n_pol"]):
149 res_mean_data = resistant_mean(np.abs(vis_arr[pol_i, freq_i_use, :]), 2)
150 res_mean_model = resistant_mean(np.abs(vis_model_arr[pol_i, freq_i_use, :]), 2)
151 auto_scale[pol_i] = np.sqrt(res_mean_data / res_mean_model)
152 auto_gain = np.ones(
153 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128
154 )
155 # Would love to get rid of the loop altogether, haven't figurted it out yet
156 for pol_i in range(cal["n_pol"]):
157 auto_gain[pol_i, :, :][:, auto_tile_i] = np.sqrt(
158 vis_auto[pol_i, :, :] * weight_invert(vis_auto_model[pol_i, :, :])
159 )
160 auto_gain[pol_i, :, :] *= auto_scale[pol_i] * weight_invert(
161 np.mean(auto_gain[pol_i, :, :])
162 )
163 auto_gain[pol_i, :, :][np.isnan(auto_gain[pol_i, :, :])] = 1
164 auto_gain[pol_i, :, :][auto_gain[pol_i, :, :] <= 0] = 1
165 return auto_gain
168def vis_calibration_flag(
169 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger
170) -> dict:
171 """
172 Flag tile and frequency outliers based on the calibration solutions. First, iteratively flag a maximum of three
173 times on amplitude with three tests: 1) flag frequencies above 5 sigma, 2) flag tiles above 5 sigma, and 3) flag
174 tiles either 2x lower or 2x higher than average. Second, iteratively flag a maximum of three times on phase with
175 two tests: 1) flag tiles with slopes above 5 sigma, and 2) flag tiles with per-frequency deviations from their slope
176 above 5 sigma.
178 Parameters
179 ----------
180 obs : dict
181 Observation metadata dictionary
182 cal : dict
183 Calibration dictionary
184 pyfhd_config : dict
185 PyFHD's configuration dictionary containing all the options set for a PyFHD run
186 logger : Logger
187 PyFHD's logger for displaying errors and info to the log files
189 Returns
190 -------
191 obs: dict
192 Observation metadata dictionary
193 """
194 amp_sigma_threshold = 5
195 amp_threshold = 2
196 phase_sigma_threshold = 5
197 freq_use = obs["baseline_info"]["freq_use"].copy()
198 for pol_i in range(cal["n_pol"]):
199 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0]
200 freq_use_i = np.nonzero(freq_use)[0]
201 gain = cal["gain"][pol_i]
202 phase = np.arctan2(gain.imag, gain.real)
203 amp = np.abs(gain)
205 # first flag based on overall amplitude
206 # extract_subarray is not being used as it was FHD's way of taking the fact that
207 # IDL's indexing can be weird and won't allow to index the result
208 # use np.where on gain, which will be multidimensional as well
209 amp_sub = amp[:, tile_use_i][freq_use_i, :]
210 gain_freq_fom = np.std(amp_sub, axis=1)
211 # Calculate the y values from a polynomial fit and keep the standard deviation of the error in gain_tile_fom
212 amp_sub_fit = np.zeros_like(amp_sub)
213 # Polynomial polyval can only take 1d arrays and doesn't vectorize across a 2d array.
214 for tile_i in range(tile_use_i.size):
215 amp_sub_fit[:, tile_i] = np.polynomial.polynomial.polyval(
216 freq_use_i,
217 np.polynomial.Polynomial.fit(
218 freq_use_i,
219 amp_sub[:, tile_i],
220 deg=pyfhd_config["cal_amp_degree_fit"],
221 )
222 .convert()
223 .coef,
224 )
225 # Get standard deviation of the residuals on a per tile basis
226 gain_tile_fom = np.std(amp_sub - amp_sub_fit, axis=0)
227 # Get the median of every tile, why is it call avg?
228 gain_tile_avg = np.median(amp_sub, axis=0)
229 gain_freq_fom[np.isnan(gain_freq_fom)] = 0
230 gain_tile_fom[np.isnan(gain_tile_fom)] = 0
231 freq_cut_i = np.where(gain_freq_fom == 0)[0]
232 freq_uncut_i = np.nonzero(gain_freq_fom)[0]
233 if freq_cut_i.size > 0: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true
234 freq_use[freq_use_i][freq_cut_i] = 0
235 tile_cut_i = np.where(gain_tile_fom == 0)[0]
236 tile_uncut_i = np.nonzero(gain_tile_fom)[0]
237 if tile_cut_i.size > 0: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true
238 obs["baseline_info"]["tile_use"][tile_use_i][tile_cut_i] = 0
239 if freq_uncut_i.size == 0 or tile_uncut_i.size == 0: 239 ↛ 240line 239 didn't jump to line 240 because the condition on line 239 was never true
240 logger.error(
241 "The frequency and tile flagging inside calibration found some values not detected in previous flagging or calibration"
242 )
244 n_addl_cut = max(freq_cut_i.size + tile_cut_i.size, 1)
245 n_cut = freq_cut_i.size + tile_cut_i.size
246 iter = 0
247 while n_addl_cut > 0 and iter < 3:
248 gain_freq_sigma = np.std(gain_freq_fom[freq_uncut_i])
249 gain_tile_sigma = np.std(gain_tile_fom[tile_uncut_i])
250 freq_cut_i = np.where(
251 (
252 gain_freq_fom
253 - np.median(gain_freq_fom[freq_uncut_i])
254 - amp_sigma_threshold * gain_freq_sigma
255 )
256 > 0
257 )[0]
258 # Update the complement of freq_cut_i (the NOT) i.e. freq_uncut_i
259 freq_uncut_i = freq_uncut_i[~np.isin(freq_uncut_i, freq_cut_i)]
260 tile_cut_test1 = (
261 gain_tile_fom
262 - np.median(gain_tile_fom[tile_uncut_i])
263 - amp_sigma_threshold * gain_tile_sigma
264 ) > 0
265 tile_cut_test2 = (
266 gain_tile_avg < np.median(gain_tile_avg) / amp_threshold
267 ) | (gain_tile_avg > np.median(gain_tile_avg) * amp_threshold)
268 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0]
269 # Update the complement of tile_cut_i (the NOT) i.e. tile_uncut_i
270 tile_uncut_i = tile_uncut_i[~np.isin(tile_uncut_i, tile_cut_i)]
271 n_addl_cut = (freq_cut_i.size + tile_cut_i.size) - n_cut
272 n_cut = freq_cut_i.size + tile_cut_i.size
273 iter += 1
274 if (freq_cut_i.size) > 0: 274 ↛ 275line 274 didn't jump to line 275 because the condition on line 274 was never true
275 freq_use[freq_use_i[freq_cut_i]] = 0
276 if (tile_cut_i.size) > 0:
277 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0
279 # Reset freq_use_i and tile_use_i for flagging based on phase
280 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0]
281 freq_use_i = np.nonzero(freq_use)[0]
283 # Start flagging based on phase
284 phase_sub = phase[:, tile_use_i][freq_use_i, :]
285 phase_slope_arr = np.empty(tile_use_i.size)
286 phase_sigma_arr = np.empty(tile_use_i.size)
287 for tile_i in range(tile_use_i.size):
288 phase_use = np.unwrap(phase_sub[:, tile_i])
289 phase_params = np.polynomial.polynomial.Polynomial.fit(
290 freq_use_i, phase_use, deg=pyfhd_config["cal_phase_degree_fit"]
291 )
292 phase_params = phase_params.convert().coef
293 phase_fit = np.polynomial.polynomial.polyval(freq_use_i, phase_params)
294 phase_sigma2 = np.std(phase_use - phase_fit)
295 # In an unusual scenario sometimes you'll get an all 0 array to fit on, which gives only a zero back for the fit
296 phase_slope_arr[tile_i] = phase_params[1] if phase_params.size > 1 else 0
297 phase_sigma_arr[tile_i] = phase_sigma2
298 iter = 0
299 n_addl_cut = 1
300 n_cut = 0
301 while n_addl_cut > 0 and iter < 3:
302 slope_sigma = np.nanstd(phase_slope_arr)
303 tile_cut_test1 = (
304 np.abs(phase_slope_arr) - np.median(np.abs(phase_slope_arr))
305 ) > phase_sigma_threshold * slope_sigma
306 tile_cut_test2 = (phase_sigma_arr - np.median(phase_sigma_arr)) > (
307 phase_sigma_threshold * np.nanstd(phase_sigma_arr)
308 )
309 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0]
310 n_addl_cut = tile_cut_i.size - n_cut
311 n_cut = tile_cut_i.size
312 iter += 1
313 if tile_cut_i.size > 0:
314 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0
315 # If we decide not to flag the frequencies, ignore any frequency flagging
316 if pyfhd_config["flag_calibration_frequencies"]: 316 ↛ 317line 316 didn't jump to line 317 because the condition on line 316 was never true
317 obs["baseline_info"]["freq_use"] = freq_use
318 # Return the obs with an updated baseline_info on the use of tiles and frequency
319 return obs
322def transfer_bandpass(
323 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger
324) -> tuple[dict, dict]:
325 """
326 Apply a previously saved bandpass via a calfits file (github:pyuvdata). Check adherance to standards,
327 and match the polarizations, frequencies, timing, pointings, and tiles.
329 Parameters
330 ----------
331 obs : dict
332 Observation metadata dictionary
333 cal : dict
334 Calibration dictionary
335 pyfhd_config : dict
336 PyFHD's configuration dictionary containing all the options set for a PyFHD run
337 logger : Logger
338 PyFHD's logger for displaying errors and info to the log files
340 Returns
341 -------
342 cal_bandpass : dict
343 Calibration dictionary with bandpass gains
344 cal_remainder : dict
345 calibration dictionary with residuals after removing the bandpass gains
346 """
347 cal_bandpass = {}
348 try:
349 calfits = fits.open(
350 Path(pyfhd_config["input_path"], pyfhd_config["cal_bp_transfer"])
351 )
352 # Get the data
353 data_array = calfits[0].data
354 # Read in the header
355 naxis = calfits[0].header["naxis"]
356 n_jones = calfits[0].header["njones"]
357 if "delay" in calfits[0].header["caltype"]:
358 raise RuntimeWarning(
359 "Input Delay calibration not supported at this time, skipping calibration bandpass transfer."
360 )
361 time_integration = calfits[0].header["inttime"]
362 freq_channel_width = calfits[0].header["chwidth"]
363 x_orient = calfits[0].header["xorient"].strip()
364 data_dims = np.empty(naxis, dtype=np.integer)
365 data_types = naxis * [""]
366 for naxis_i in range(naxis):
367 # Get the dimensions of the data
368 data_dims[naxis_i] = calfits[0].header[f"naxis{naxis_i + 1}"]
369 # Get the ctypes
370 data_types[naxis_i] = (
371 calfits[0].header[f"ctype{naxis_i + 1}"].lower().strip()
372 )
373 data_types = np.array(data_types)
374 # Get the indexes for the FITS standard checks
375 data_index = np.nonzero("narrays" == data_types)[0][0]
376 ant_index = np.nonzero("antaxis" == data_types)[0][0]
377 freq_index = np.nonzero("freqs" == data_types)[0][0]
378 time_index = np.nonzero("time" == data_types)[0][0]
379 jones_index = np.nonzero("jones" == data_types)[0][0]
380 # Deal with spec_wind_index separately as default highband file doesn't have this in
381 # May cause issues with other fits files, adjust the code then.
382 spec_wind_index = np.nonzero("if" == data_types)[0]
383 if spec_wind_index.size == 0:
384 spec_wind_index = -1
386 # Check the indexes given to see if they match standards, if not raise exception
387 if (
388 data_index != 0
389 or ant_index != 4
390 or freq_index != 3
391 or time_index != 2
392 or jones_index != 1
393 ):
394 if (
395 data_index == 0
396 and ant_index == 5
397 and freq_index == 3
398 and time_index == 2
399 and jones_index == 1
400 and spec_wind_index == 4
401 ):
402 logger.info("Calfits adheres to the Fall 2018 pyuvdata convention")
403 if calfits[0].header["naxis5"] != 1:
404 raise RuntimeWarning(
405 "Calfits file includes more than one spectral window. Note that this feature is not yet supported in PyFHD."
406 )
407 # Remove spectral window dimension for compatibility
408 data_array = np.mean(data_array, axis=0)
409 else:
410 raise RuntimeWarning(
411 "Calfits file does not appear to adhere to standard. Please see github:pyuvdata/docs/references"
412 )
414 freq_start = calfits[0].header[f"crval{freq_index + 1}"]
415 time_start = calfits[0].header[f"crval{time_index + 1}"]
416 time_delt = calfits[0].header[f"cdelt{time_index + 1}"]
417 jones_start = calfits[0].header[f"crval{jones_index + 1}"]
418 jones_delt = calfits[0].header[f"cdelt{jones_index + 1}"]
420 n_ant_data = data_array.shape[0]
421 n_freq = data_array.shape[1]
422 n_time = data_array.shape[2]
424 # Check whether the number of polarizations specified matches the observation analysis run
425 jones_type_matrix = np.zeros(data_dims[1])
426 for jones_i in range(1, data_dims[1]):
427 jones_type_matrix[jones_i - 1] = jones_start + (jones_delt * jones_i)
428 if data_dims[1] > obs["n_pol"]:
429 logger.warning(
430 "More polarizations in calibration fits file than in observation analysis. Reducing calibration to match obs."
431 )
432 data_dims[1] = obs["n_pol"]
433 jones_type_matrix = jones_type_matrix[0 : obs["n_pol"]]
434 data_array = data_array[:, :, :, 0 : obs["n_pol"], :]
435 elif data_dims[1] < obs["n_pol"]:
436 raise RuntimeWarning(
437 "Not enough polarizations defined in the calibration fits file."
438 )
440 # Switch the pol convention to FHD standard if necessary
441 if x_orient == "north" or x_orient == "south":
442 data_array[:, :, :, 0, :], data_array[:, :, :, 1, :] = (
443 data_array[:, :, :, 1, :],
444 data_array[:, :, :, 0, :],
445 )
446 if obs["n_pol"] > 2:
447 data_array[:, :, :, 2, :], data_array[:, :, :, 3, :] = (
448 data_array[:, :, :, 3, :],
449 data_array[:, :, :, 2, :],
450 )
452 # Check to see if the calibraton and observation frequency resolution match
453 if freq_channel_width != obs["freq_res"]:
454 freq_factor = obs["freq_res"] / freq_channel_width
455 if freq_factor >= 1:
456 logic_test = freq_factor - np.floor(freq_factor)
457 else:
458 logic_test = 1 / freq_factor - np.floor(1 / freq_factor)
459 if logic_test != 0:
460 raise RuntimeWarning(
461 f"Calfits input freq channel width is not easily castable to the observation, different by a factor of {freq_factor}"
462 )
463 if freq_start != obs["baseline_info"]["freq"][0]:
464 raise RuntimeWarning(
465 "Calfits input freq start is not equal to observation freq start"
466 )
467 # Downselect the data array
468 if freq_factor > 1:
469 logger.warning(
470 f"Calfits input freq channel width is different by a factor of {freq_factor}. Avergaing Down."
471 )
472 # Set flagged indices to NAN to remove them from mean calculation
473 flag_inds = np.where(np.abs(np.squeeze(data_array[:, :, :, :, 2])) == 1)
474 data_array[flag_inds][0] = np.nan
475 data_array[flag_inds][1] = np.nan
476 for channel_i in range(obs["n_freq"]):
477 data_array[:, channel_i, :, :, :] = np.nanmean(
478 data_array[
479 :,
480 max(
481 channel_i * freq_factor - np.floor(freq_factor / 2), 0
482 ) : channel_i
483 * freq_factor
484 + np.floor(freq_factor / 2.0),
485 :,
486 :,
487 :,
488 ]
489 )
490 data_array = data_array[:, 0 : obs["n_freq"], :, :, :]
491 elif freq_factor < 1:
492 logger.warning(
493 f"Calfits input freq channel width is different by a factor of {freq_factor}. Using linear interpolation"
494 )
495 # The IDL code has 5 nested loops, and I can't think of the vectorization right now in a reasonable ampount of time
496 # Someone please vectorize laster if you can
497 data_array_temp = np.zeros(
498 (data_dims[4], obs["n_freq"], n_time, n_jones, 2)
499 )
500 for data_i in range(2):
501 for jones_i in range(n_jones):
502 for times_i in range(n_time):
503 for tile_i in range(data_dims[4]):
504 for channel_i in range(obs["n_freq"] - 1):
505 start_idx = int(channel_i * (1.0 / freq_factor))
506 end_idx = int((channel_i + 1) * (1.0 / freq_factor))
507 data_array_temp[
508 tile_i,
509 start_idx:end_idx,
510 times_i,
511 jones_i,
512 data_i,
513 ] = np.interp(
514 np.arange(start_idx, end_idx, 1.0),
515 np.arange(channel_i, channel_i + 1, 1.0),
516 data_array[
517 data_i,
518 jones_i,
519 times_i,
520 channel_i : channel_i + 1,
521 tile_i,
522 ],
523 )
524 data_array_temp[:, obs["n_freq"] - 1, :, :, :] = data_array[
525 :, n_freq - 1, :, :, :
526 ]
527 data_array = np.copy(data_array_temp)
529 # Check to see what time range this needs to be applied to, and if pointings are necessary
530 if n_time != 1:
531 sec_upperlimit = 2000
532 sec_lowerlimit = 1600
533 if (
534 time_integration < sec_upperlimit and time_integration > sec_lowerlimit
535 ) or (time_delt < sec_upperlimit and time_delt > sec_lowerlimit):
536 # Calibration fits are per pointing
537 # Keep all the delay patterns in a dictionary
538 delay_patterns = {
539 (-5): [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 3, 8, 13, 18],
540 (-4): [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15],
541 (-3): [0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9],
542 (-2): [0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6],
543 (-1): [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
544 (0): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
545 (5): [15, 10, 5, 0, 16, 11, 6, 1, 17, 12, 7, 2, 18, 13, 8, 3],
546 (4): [12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3],
547 (3): list(
548 reversed([0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9])
549 ),
550 (2): list(
551 reversed([0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6])
552 ),
553 (1): list(
554 reversed([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
555 ),
556 }
557 obs_pointing = None
558 for pointing, pattern in delay_patterns.items():
559 if np.array_equal(obs["delays"], pattern):
560 obs_pointing = pointing
561 if obs_pointing is None:
562 raise RuntimeWarning(
563 "Pointing number not within 5 pointings around zenith."
564 )
565 obs_julian_date = obs["astr"]["mjdobs"] + 2400000.5
567 # Find corresponding in the calfits
568 # Number of days since Auguest 23 2013
569 days_since_ref = np.floor(obs_julian_date) - np.floor(time_start)
570 # pointing start shift amount depending on how many days since ref
571 obs_pointing_shift_since_ref = ((24 - 23.9344699) / 24) * days_since_ref
572 # pointing start time for HH:MM:SS on Aug23 (in JD) plus comparible pointing start time for reference, using calculated shift
573 pointing_jdhms_ref = (
574 np.array(
575 [
576 0.13611,
577 0.15157,
578 0.17565,
579 0.19963,
580 0.22222,
581 0.24342593,
582 0.26453705,
583 0.28574074,
584 0.30694,
585 0.33657407,
586 ]
587 )
588 + obs_pointing_shift_since_ref
589 )
590 pointing_num_ref = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
591 # pointing start time for HH:MM:SS for calfits day (in JD)
592 pointing_jdhms_calfits = time_start - np.floor(time_start)
593 # find the closest index between reference and calfits
594 pointing_calfits_index = np.argmin(
595 np.abs(pointing_jdhms_ref - pointing_jdhms_calfits)
596 )
597 # make sure the index is greater than the pointing start time
598 if pointing_jdhms_calfits < pointing_jdhms_ref[pointing_calfits_index]:
599 pointing_calfits_index -= 1
601 if (
602 pointing_jdhms_calfits
603 > (
604 np.max(pointing_jdhms_ref)
605 + (pointing_jdhms_ref[1] - pointing_jdhms_ref[0])
606 )
607 ) or (
608 pointing_jdhms_calfits
609 < (
610 np.min(pointing_jdhms_ref)
611 - (pointing_jdhms_ref[1] - pointing_jdhms_ref[0])
612 )
613 ):
614 raise RuntimeWarning(
615 "Calfits does not start between five pointings before zenith and four pointings after zenith. Not suitable for pointing cal at this time."
616 )
617 # find which pointing is the start of the calfits data
618 pointing_calfits_start = pointing_num_ref[pointing_calfits_index]
619 if obs_pointing < pointing_calfits_start:
620 raise RuntimeWarning(
621 "Calfits file does not contain pointing of observation"
622 )
623 # select pointing index in calfits that matches observation
624 obs_pointing_index = abs(pointing_calfits_start - obs_pointing)
625 # choose the corresponding pointing from the calfits data array
626 data_array = data_array[:, :, obs_pointing_index, :, :]
627 elif np.floor(time_delt) == np.floor(obs["time_res"]):
628 # Calibration fits are per-timeres
629 logger.info(
630 "Averaging calfits to observation length, an FHD requirement at this time."
631 )
632 data_array_temp = np.zeros(
633 [data_dims[4], obs.n_freq, 1, data_dims[1], data_dims[0]]
634 )
635 data_array_temp[:, :, 0, :, :] = np.mean(data_array, axis=2)
636 data_array = np.copy(data_array_temp)
637 else:
638 # Calibration fits are for a random set of times
639 logger.info(
640 "Finding closest match in time between calfits and obs. Obs metadata assumed to report start time, calfits metadata assumed to report center time."
641 )
642 time_delta = time_integration / (60 * 60 * 24)
643 time_array = np.full(n_time, time_start + time_delta)
644 obs_julian_date = (
645 obs["astr"]["mjdobs"]
646 + 2400000.5
647 + ((obs["n_time"] * obs["time_res"]) / (2 * 60 * 60 * 24))
648 )
649 if (obs_julian_date < time_array[0] - 2 * time_delta) or (
650 obs_julian_date > time_array[-1] + 2 * time_delta
651 ):
652 raise RuntimeWarning(
653 "Observation does not seem to fit within the time frame of the calfits"
654 )
655 # find the closest index between calfits and observation
656 time_index = np.argmin(np.abs(obs_julian_date - time_array))
657 data_array = data_array[:, :, time_index, :, :]
659 # Check number of tiles
660 if n_ant_data != obs["n_tile"]:
661 raise RuntimeWarning(
662 "Number of antennas in calfits file does match observation antenna number"
663 )
665 # Now that the checks are done, return the cal structure
666 cal_bandpass["n_pol"] = min(obs["n_pol"], 2)
667 cal_bandpass["conv_thresh"] = 1e-7
668 cal_bandpass["gain"] = np.full(
669 (cal_bandpass["n_pol"], obs["n_tile"], obs["n_freq"]),
670 pyfhd_config["cal_gain_init"],
671 )
672 cal_bandpass["gain"][0 : obs["n_pol"], :, :] = np.squeeze(
673 data_array[:, :, 0, 0 : obs["n_pol"], 0]
674 ) + 1j * np.squeeze(data_array[:, :, 0, 0 : obs["n_pol"], 1])
675 logger.info("Calfits File has been read and cal_bandpass has been created")
676 except FileNotFoundError as e:
677 logger.error(
678 f"{pyfhd_config['cal_bp_transfer']} file wasn't found, skipping calibration bandpass transfer"
679 )
680 return {}, {}
681 except RuntimeWarning as e:
682 logger.error(e)
683 return {}, {}
684 cal_remainder = deepcopy(cal)
685 cal_remainder["gain"][0 : cal["n_pol"], :, :] = (
686 cal["gain"][0 : cal["n_pol"], :, :]
687 / cal_bandpass["gain"][0 : cal["n_pol"], :, :]
688 )
689 return cal_bandpass, cal_remainder
692def vis_cal_bandpass(
693 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger
694) -> tuple[dict, dict]:
695 """
696 Reduce the degrees of freedom on the per-frequency calibration amplitudes by averaging solutions
697 together. Options include averaging over tiles which use a particular beamformer-to-receiver cable
698 lengths/types, or averaging over all tiles for a global bandpass.
700 Parameters
701 ----------
702 obs : dict
703 Observation metadata dictionary
704 cal : dict
705 Calibration dictionary
706 pyfhd_config : dict
707 PyFHD's configuration dictionary containing all the options set for a PyFHD run
708 logger : Logger
709 PyFHD's logger for displaying errors and info to the log files
711 Returns
712 -------
713 cal_bandpass : dict
714 Calibration dictionary with bandpass gains
715 cal_remainder : dict
716 calibration dictionary with residuals after removing the bandpass gains
717 """
718 freq_use = np.nonzero(obs["baseline_info"]["freq_use"])[0]
719 tile_use = np.nonzero(obs["baseline_info"]["tile_use"])[0]
720 n_pol = cal["gain"].shape[0]
721 # Set a flag for global bandpass, will turn true if too many tiles are flagged
722 global_bandpass = False
724 # Initialize cal_bandpass and cal_remainder and transfer them in, if a file has been set (fits only supported right now)
725 if pyfhd_config["cal_bp_transfer"] is not None: 725 ↛ 726line 725 didn't jump to line 726 because the condition on line 725 was never true
726 cal_bandpass, cal_remainder = transfer_bandpass(obs, cal, pyfhd_config, logger)
727 if len(cal_bandpass.keys()) != 0 and len(cal_remainder.keys()) != 0:
728 logger.info(
729 f"Calibration Bandpass FITS file {pyfhd_config['cal_bp_transfer']} transferred in for cal_bandpass and cal_remainder"
730 )
731 return cal_bandpass, cal_remainder
732 cal_bandpass = deepcopy(cal)
733 cal_remainder = deepcopy(cal)
735 cal_bandpass_gain = np.empty(cal["gain"].shape, dtype=np.complex128)
736 cal_remainder_gain = np.empty(cal["gain"].shape, dtype=np.complex128)
738 if pyfhd_config["cable_bandpass_fit"]:
739 # Using preexisting file to extract information about which tiles have which cable length
740 # cable_len = np.loadtxt(Path(pyfhd_config["input"], pyfhd_config["cable-reflection-coefficients"]), skiprows=1)[:, 2].flatten()
741 cable_len_filepath = importlib_resources.files(
742 "PyFHD.resources.instrument_config"
743 ).joinpath(f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt")
744 cable_len = np.loadtxt(cable_len_filepath, skiprows=1)[:, 2].flatten()
746 # Taking tile information and cross-matching it with the nonflagged tiles array, resulting in nonflagged tile arrays grouped by cable length
747 cable_length_ref = np.unique(cable_len)
748 tile_use_arr = [0] * cable_length_ref.size
749 for cable_i in range(cable_length_ref.size):
750 tile_use_arr[cable_i] = np.where(
751 (obs["baseline_info"]["tile_use"])
752 & (cable_len == cable_length_ref[cable_i])
753 )[0]
754 if tile_use_arr[cable_i].size == 0: 754 ↛ 755line 754 didn't jump to line 755 because the condition on line 754 was never true
755 logger.warning(
756 "Too Many flagged tiles to implement bandpass cable averaging, using global bandpass."
757 )
758 global_bandpass = True
759 # n_freq x 13 array. columns are frequency, 90m xx, 90m yy, 150m xx, 150m yy, 230m xx, 230m yy, 320m xx, 320m yy, 400m xx, 400m yy, 524m xx, 524m yy
760 bandpass_arr = np.zeros(
761 (obs["n_freq"], cal["n_pol"] * cable_length_ref.size + 1)
762 )
763 bandpass_arr[:, 0] = obs["baseline_info"]["freq"]
764 bandpass_col_count = 1
765 if pyfhd_config["auto_ratio_calibration"]: 765 ↛ 768line 765 didn't jump to line 768 because the condition on line 765 was always true
766 logger.info("auto_ratio_calibration is set, using global bandpass")
767 global_bandpass = True
768 for cable_i in range(cable_length_ref.size):
769 # This is an option to calibrate over all tiles to find the 'global' bandpass. It will be looped over by the number
770 # of cable lengths, and will redo the same calculation everytime. It is inefficient, but effective.
771 if global_bandpass: 771 ↛ 774line 771 didn't jump to line 774 because the condition on line 771 was always true
772 tile_use_cable = tile_use
773 else:
774 tile_use_cable = tile_use_arr[cable_i]
776 for pol_i in range(cal["n_pol"]):
777 gain = cal["gain"][pol_i]
778 # gain2 is a temporary variable used in place of the gain array for an added layer of safety
779 if cable_i == 0 and pol_i == 0:
780 gain2 = np.zeros(cal["gain"].shape, dtype=np.complex128)
781 # Only use gains from unflagged tiles and frequencies, and calculate the amplitude and phase
782 gain_use = gain[freq_use, :][:, tile_use_cable]
783 amp = np.abs(gain_use)
784 # amp2 is a temporary variable used in place of the amp array for an added layer of safety
785 amp2 = np.zeros((freq_use.size, tile_use_cable.size))
786 # This is the normalization loop for each tile. If the mean of gain amplitudes over all frequencies is nonzero, then divide
787 # the gain amplitudes by that number, otherwise make the gain amplitudes zero.
788 for tile_i in range(tile_use_cable.size):
789 res_mean = resistant_mean(amp[:, tile_i], 2)
790 if res_mean != 0: 790 ↛ 793line 790 didn't jump to line 793 because the condition on line 790 was always true
791 amp2[:, tile_i] = amp[:, tile_i] / res_mean
792 else:
793 amp2[:, tile_i] = 0
794 # This finds the normalized gain amplitude mean per frequency over all tiles, which is the final bandpass per cable group.
795 bandpass_single = np.empty(freq_use.size)
796 # If this is slow, resistant_mean can be vectorized
797 for f_i in range(freq_use.size):
798 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2)
799 # Want iterative to start at 1 (to not overwrite freq) and store final bandpass per cable group.
800 bandpass_arr[freq_use, bandpass_col_count] = bandpass_single
801 bandpass_col_count += 1
802 # Fill temporary variable gain2, set equal to final bandpass per cable group for each tile that will use that bandpass.
804 # Vectorize later if possible
805 for tile_i in range(tile_use_cable.size):
806 gain2[pol_i, freq_use, tile_use_cable[tile_i]] = bandpass_single
808 # For the last bit at the end of the cable
809 if cable_i == cable_length_ref.size - 1:
810 # Set gain3 to the input gains
811 gain3 = cal["gain"][pol_i].copy()
812 # Set what will be passed back as the output gain as the final bandpass per cable type.
813 gain2_input = gain2[pol_i, :, :]
814 cal_bandpass_gain[pol_i] = gain2_input
815 # Set what will be passed back as the residual as the input gain divided by the final bandpass per cable type.
816 gain3[freq_use, :] /= gain2_input[freq_use, :]
817 cal_remainder_gain[pol_i] = gain3
818 # Add Levine Memo bandpass to the gain solutions here if you wish
819 else:
820 bandpass_arr = np.zeros((obs["n_freq"], cal["n_pol"] + 1))
821 bandpass_arr[:, 0] = obs["baseline_info"]["freq"]
822 for pol_i in range(cal["n_pol"]):
823 gain = cal["gain"][pol_i]
824 gain_use = gain[freq_use, :][:, tile_use]
825 amp = np.abs(gain_use)
826 amp2 = np.zeros((freq_use.size, tile_use.size))
827 for tile_i in range(tile_use.size):
828 res_mean = resistant_mean(amp[:, tile_i], 2)
829 if res_mean != 0: 829 ↛ 832line 829 didn't jump to line 832 because the condition on line 829 was always true
830 amp2[:, tile_i] = amp[:, tile_i] / res_mean
831 else:
832 amp2[:, tile_i] = 0
834 bandpass_single = np.empty(freq_use.size)
835 # If this is slow, resistant_mean can be vectorized
836 for f_i in range(freq_use.size):
837 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2)
839 bandpass_arr[freq_use, pol_i + 1] = bandpass_single
840 # Work out the gain for the bandpass
841 gain2 = np.zeros(gain.shape, dtype=np.complex128)
842 gain3 = deepcopy(gain)
843 # Vectorize later if possible
844 for tile_i in range(obs["n_tile"]):
845 gain2[freq_use, tile_i] = bandpass_single
846 gain3[freq_use, tile_i] /= bandpass_single
848 cal_bandpass_gain[pol_i, :, :] = gain2
849 cal_remainder_gain[pol_i, :, :] = gain3
851 cal_bandpass["gain"] = cal_bandpass_gain
852 cal_remainder["gain"] = cal_remainder_gain
853 return cal_bandpass, cal_remainder
856def vis_cal_polyfit(
857 obs: dict,
858 cal: dict,
859 auto_ratio: NDArray[np.float64] | None,
860 pyfhd_config: dict,
861 logger: Logger,
862) -> dict:
863 """
864 Reduce the degrees of freedom on the per-frequency calibration amplitudes and phases by fitting
865 the full frequency band with polynomials of a specified degree, with options for split polynomials
866 over certain regions.
868 In addition, fit cable reflections with a text file of fits, theoretical fits given cable length/type,
869 or the finding the maximum in delay space. Any of these can then be used as an initial estimate in a
870 hyperresolved FFT of the residual calibration solutions to fit the final mode, phase, and amplitude.
871 Residual calibration solutions can optionally be made by using an incoherent mean, or a mean over
872 all tiles which *do not* have the same cable length/type, to reduce bias in the residual, and
873 the cable reflections can also be fit using just the phases to reduce dependency on the polyphase
874 filter bank shape.
876 Parameters
877 ----------
878 obs : dict
879 Observation metadata dictionary
880 cal : dict
881 Calibration dictionary
882 auto_ratio: NDArray[np.float64] | None
883 The estimation of the antenna-dependent bias through the square root of the autocorrelation
884 visibilities normalized via a reference tile
885 pyfhd_config : dict
886 PyFHD's configuration dictionary containing all the options set for a PyFHD run
887 logger : Logger
888 PyFHD's logger for displaying errors and info to the log files
890 Returns
891 -------
892 cal : dict
893 Calibration dictionary with polynomial gain fits
894 pyfhd_config : dict
895 PyFHD's configuration dictionary containing all the options set for a PyFHD run
896 Is returned in case the digital_gain_jump_polyfit was set here.
897 """
898 # Keep the og_gain_arr for calculations later
899 og_gain_arr = np.copy(cal["gain"])
900 if ( 900 ↛ 914line 900 didn't jump to line 914 because the condition on line 900 was always true
901 pyfhd_config["cal_reflection_mode_theory"]
902 or pyfhd_config["cal_reflection_mode_file"]
903 or pyfhd_config["cal_reflection_mode_delay"]
904 or pyfhd_config["cal_reflection_hyperresolve"]
905 ):
906 if ( 906 ↛ 912line 906 didn't jump to line 912 because the condition on line 906 was always true
907 pyfhd_config["cal_reflection_mode_theory"]
908 and abs(pyfhd_config["cal_reflection_mode_theory"]) > 1
909 ):
910 cal_mode_fit = pyfhd_config["cal_reflection_mode_theory"]
911 else:
912 cal_mode_fit = True
913 else:
914 cal_mode_fit = False
915 freq_use = np.where(obs["baseline_info"]["freq_use"])[0]
916 tile_use = np.where(obs["baseline_info"]["tile_use"])[0]
918 pre_dig_inds = np.where(obs["baseline_info"]["freq"][freq_use] < 187.515e6)
919 if pre_dig_inds[0].size == 0: 919 ↛ 920line 919 didn't jump to line 920 because the condition on line 919 was never true
920 logger.warning(
921 "No frequencies below 187.515MHz, using full band polyfit, digital gain jump polyfit disabled."
922 )
923 pyfhd_config["digital_gain_jump_polyfit"] = False
924 else:
925 f_d = np.max(pre_dig_inds[0])
926 f_end = freq_use.size
928 if ((f_d + 1) == f_end) and pyfhd_config["digital_gain_jump_polyfit"]: 928 ↛ 929line 928 didn't jump to line 929 because the condition on line 928 was never true
929 logger.warning(
930 "No frequency found above 187.515MHz, using full band polyfit, digital gain jump polyfit disabled."
931 )
932 pyfhd_config["digital_gain_jump_polyfit"] = False
934 # If the observation date is beyond the date where digital gain jump polyfit is required, disable it
935 # Date is 2014-07-23 (23rd July 2014) or Julian Day 2456861.5000000
936 if obs["jd0"] > 2456861.5000000 and pyfhd_config["digital_gain_jump_polyfit"]: 936 ↛ 937line 936 didn't jump to line 937 because the condition on line 936 was never true
937 logger.warning(
938 "Observation date is beyond the date where digital gain jump polyfit is required, using full band polyfit, digital gain jump polyfit disabled."
939 )
940 pyfhd_config["digital_gain_jump_polyfit"] = False
942 # If the amp_degree or phase_degree weren't used, then apply the defaults
943 if not pyfhd_config["cal_amp_degree_fit"]: 943 ↛ 944line 943 didn't jump to line 944 because the condition on line 943 was never true
944 pyfhd_config["cal_amp_degree_fit"] = 2
945 if not pyfhd_config["cal_phase_degree_fit"]: 945 ↛ 946line 945 didn't jump to line 946 because the condition on line 945 was never true
946 pyfhd_config["cal_phase_degree_fit"] = 1
948 # If you wish to find any steps that are outliers beyond 5sigma, and remove them add that code here.
949 # The cal_step_fit option isn't in the eor_defaults_wrapper or defined in the FHD dictionary.
951 # Polynomial fitting over the frequency band
952 gain_residual = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"]))
953 # create amp_params with the shape of (n_pol, n_tile, amp_degree + 1)
954 # If we're using the digital_gain_jump_polyfit add extra dimension to amp_params
955 if pyfhd_config["digital_gain_jump_polyfit"]: 955 ↛ 965line 955 didn't jump to line 965 because the condition on line 955 was always true
956 cal["amp_params"] = np.empty(
957 (
958 cal["n_pol"],
959 obs["n_tile"],
960 pyfhd_config["cal_amp_degree_fit"],
961 pyfhd_config["cal_amp_degree_fit"],
962 )
963 )
964 else:
965 cal["amp_params"] = np.empty(
966 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_amp_degree_fit"] + 1)
967 )
968 cal["phase_params"] = np.empty(
969 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_phase_degree_fit"] + 1)
970 )
971 for pol_i in range(cal["n_pol"]):
972 gain_arr = np.copy(cal["gain"][pol_i])
973 gain_amp = np.abs(gain_arr)
974 gain_phase = np.arctan2(gain_arr.imag, gain_arr.real)
975 for tile_i in range(obs["n_tile"]):
976 gain = np.squeeze(gain_amp[freq_use, tile_i])
977 gain_fit = np.zeros(obs["n_freq"])
978 # Pre and post digital gain jump separately for highband MWA data
979 if pyfhd_config["digital_gain_jump_polyfit"]: 979 ↛ 1025line 979 didn't jump to line 1025 because the condition on line 979 was always true
980 if pre_dig_inds[0].size > 0: 980 ↛ 1020line 980 didn't jump to line 1020 because the condition on line 980 was always true
981 fit_params1 = (
982 np.polynomial.Polynomial.fit(
983 freq_use[0 : f_d + 1],
984 gain[0 : f_d + 1],
985 deg=pyfhd_config["cal_amp_degree_fit"] - 1,
986 )
987 .convert()
988 .coef
989 )
990 fit_params2 = (
991 np.polynomial.Polynomial.fit(
992 freq_use[f_d + 1 : f_end],
993 gain[f_d + 1 : f_end],
994 deg=pyfhd_config["cal_amp_degree_fit"] - 1,
995 )
996 .convert()
997 .coef
998 )
999 for di in range(pyfhd_config["cal_amp_degree_fit"]):
1000 gain_fit[freq_use[0] : freq_use[f_d] + 1] += fit_params1[di] * (
1001 # Had to use the size as use freq_use[f_d] could cause
1002 # off by one indexing errors (due to freq_use[0] potentially being
1003 # 1 or 0 which throws off the np.arange indexing)
1004 np.arange(gain_fit[freq_use[0] : freq_use[f_d] + 1].size)
1005 ** di
1006 )
1007 gain_fit[freq_use[f_d + 1] : freq_use[f_end - 1] + 1] += (
1008 fit_params2[di]
1009 * (
1010 np.arange(freq_use[f_end - 1] - freq_use[f_d + 1] + 1)
1011 + freq_use[f_d + 1]
1012 )
1013 ** di
1014 )
1015 # Do notice this is saving the coefficients on a per row basis, so fit_params1 a,b will be [pol_i, tile_i, 0, :]
1016 # While fit_params2 a, b coefficients will be in [pol_i, tile_i, 1, :]
1017 fit_params = np.vstack([fit_params1, fit_params2])
1018 cal["amp_params"][pol_i, tile_i] = fit_params
1019 else:
1020 logger.warning(
1021 "digital_gain_jump_polyfit only works with highband mwa data. Full band polyfit applied instead."
1022 )
1023 # Fit for amplitude
1024 else:
1025 fit_params = (
1026 np.polynomial.Polynomial.fit(
1027 freq_use, gain, deg=pyfhd_config["cal_amp_degree_fit"]
1028 )
1029 .convert()
1030 .coef
1031 )
1032 cal["amp_params"][pol_i, tile_i, :] = fit_params
1033 for di in range(pyfhd_config["cal_amp_degree_fit"]):
1034 gain_fit += fit_params[di] * np.arange(obs["n_freq"]) ** di
1036 gain_residual[pol_i, :, tile_i] = gain_amp[:, tile_i] - gain_fit
1038 # Fit for phase
1039 phase_use = np.unwrap(np.squeeze(gain_phase[freq_use, tile_i]))
1040 phase_params = (
1041 np.polynomial.Polynomial.fit(
1042 freq_use, phase_use, pyfhd_config["cal_phase_degree_fit"]
1043 )
1044 .convert()
1045 .coef
1046 )
1047 cal["phase_params"][pol_i, tile_i, :] = phase_params
1048 phase_fit = np.zeros(obs["n_freq"])
1049 for di in range(phase_params.size):
1050 phase_fit += phase_params[di] * np.arange(obs["n_freq"]) ** di
1051 gain_arr[:, tile_i] = gain_fit * np.exp(1j * phase_fit)
1052 cal["gain"][pol_i] = gain_arr
1053 # Cable Reflection Fitting
1054 if cal_mode_fit: 1054 ↛ 1284line 1054 didn't jump to line 1284 because the condition on line 1054 was always true
1055 if pyfhd_config["cal_reflection_mode_file"]: 1055 ↛ 1056line 1055 didn't jump to line 1056 because the condition on line 1055 was never true
1056 logger.info(
1057 "Using mwa calibration reflections fits from instrument_config/mwa_cable_reflection_coefficients.txt."
1058 )
1059 cable_len_filepath = importlib_resources.files(
1060 "PyFHD.resources.instrument_config"
1061 ).joinpath(
1062 f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt"
1063 )
1064 cable_reflections = np.loadtxt(cable_len_filepath, skiprows=1).transpose()
1065 cable_length = cable_reflections[2]
1066 tile_ref_flag = np.minimum(
1067 np.maximum(np.zeros_like(cable_reflections[4]), cable_reflections[4]),
1068 np.ones_like(cable_reflections[4]),
1069 )
1070 tile_mode_X = cable_reflections[5]
1071 tile_amp_X = cable_reflections[6]
1072 tile_phase_X = cable_reflections[7]
1073 tile_mode_Y = cable_reflections[8]
1074 tile_amp_Y = cable_reflections[9]
1075 tile_phase_Y = cable_reflections[10]
1077 # Modes in fourier transform units
1078 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"]))
1079 mode_i_arr[0, :] = tile_mode_X * tile_ref_flag
1080 mode_i_arr[1, :] = tile_mode_Y * tile_ref_flag
1082 amp_arr = np.vstack([tile_amp_X, tile_amp_Y])
1083 phase_arr = np.vstack[[tile_phase_X, tile_phase_Y]]
1085 elif pyfhd_config["cal_reflection_mode_theory"]: 1085 ↛ 1112line 1085 didn't jump to line 1112 because the condition on line 1085 was always true
1086 logger.info(
1087 "Using theory calculation in nominal reflection mode calibration."
1088 )
1089 # Get the nominal tile lengths and velocity factors
1090 cable_len_filepath = importlib_resources.files(
1091 "PyFHD.resources.instrument_config"
1092 ).joinpath(f"{pyfhd_config['instrument']}_cable_length.txt")
1093 cable_length_data = np.loadtxt(cable_len_filepath, skiprows=1).transpose()
1094 cable_length = cable_length_data[2]
1095 cable_vf = cable_length_data[3]
1096 tile_ref_flag = np.minimum(np.maximum(0, cable_length_data[4]), 1)
1098 # Nominal Reflect Time
1099 reflect_time = (2 * cable_length) / (c.value * cable_vf)
1100 bandwidth = (
1101 (
1102 np.max(obs["baseline_info"]["freq"])
1103 - np.min(obs["baseline_info"]["freq"])
1104 )
1105 * obs["n_freq"]
1106 ) / (obs["n_freq"] - 1)
1107 # Modes in fourier transform units
1108 mode_i_arr = np.tile(
1109 bandwidth * reflect_time * tile_ref_flag, [cal["n_pol"], 1]
1110 )
1112 elif pyfhd_config["cal_reflection_mode_delay"]:
1113 logger.info(
1114 "Using calibration delay spectrum to calculate nominal reflection modes."
1115 )
1116 spec_mask = np.zeros(obs["n_freq"])
1117 spec_mask[freq_use] = 1
1118 freq_cut = np.where(spec_mask == 0)
1119 # IDL uses forward FFT by default
1120 spec_psf = np.abs(np.fft.fftn(spec_mask, norm="forward"))
1121 spec_inds = np.arange(obs["n_freq"] // 2)
1122 spec_psf = spec_psf[spec_inds]
1123 mode_test = np.zeros(obs["n_freq"] // 2)
1124 for pol_i in range(cal["n_pol"]):
1125 for ti in range(tile_use.size):
1126 tile_i = tile_use[ti]
1127 spec0 = np.abs(np.fft.fftn(gain_residual[pol_i, tile_i]))
1128 mode_test += spec0[spec_inds]
1129 psf_mask = np.zeros(obs["n_freq"] // 2)
1131 if freq_cut[0].size > 0:
1132 psf_mask[np.where(spec_psf > (np.max(spec_psf) / 1000))] = 1
1133 # Replaces IDL smooth with edge_truncate
1134 psf_mask = uniform_filter(psf_mask, size=5, mode="nearest")
1135 mask_i = np.nonzero(psf_mask)
1136 if mask_i[0].size > 0:
1137 mode_test[mask_i] = 0
1138 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"])) + np.argmax(mode_test)
1140 # Fit only certain cable lengths
1141 # Positive length indicates fit mode, negative length indicates exclude mode
1142 # This is currently assuming cal_mode_fit is an integer or number, not an array!
1143 # If you need an array to fit or exclude cable lengths, then create another option for it
1144 # in the config and adjust the code accordingly. Ensure every config option only has one purpose.
1145 if auto_ratio is None and cal_mode_fit != 1:
1146 tile_ref_logic = np.zeros(obs["n_tile"])
1147 if cal_mode_fit > 0: 1147 ↛ 1152line 1147 didn't jump to line 1152 because the condition on line 1147 was always true
1148 cable_cut_i = np.where(cable_length != cal_mode_fit)
1149 if cable_cut_i[0].size > 0: 1149 ↛ 1155line 1149 didn't jump to line 1155 because the condition on line 1149 was always true
1150 tile_ref_logic[cable_cut_i] = 1
1151 else:
1152 cable_cut_i = np.where(cable_length == abs(cal_mode_fit))
1153 if cable_cut_i[0].size > 0:
1154 tile_ref_logic[cable_cut_i] = 1
1155 cable_cut = np.nonzero(tile_ref_logic)
1156 tile_ref_flag = np.ones(obs["n_tile"])
1157 if cable_cut[0].size > 0: 1157 ↛ 1159line 1157 didn't jump to line 1159 because the condition on line 1157 was always true
1158 tile_ref_flag[cable_cut] = 0
1159 mode_i_arr *= tile_ref_flag
1161 cal["mode_params"] = np.empty([cal["n_pol"], obs["n_tile"], 3])
1162 for pol_i in range(cal["n_pol"]):
1163 # Divide the polyfit to reveal the residual cable reflections better
1164 gain_arr = og_gain_arr[pol_i] / cal["gain"][pol_i]
1165 for ti in range(tile_use.size):
1166 tile_i = tile_use[ti]
1167 mode_i = mode_i_arr[pol_i, tile_i]
1168 if mode_i == 0:
1169 continue
1170 else:
1171 # Options to hyperresolve or fit the reflection modes/amp/phase given the nominal calculations
1172 if pyfhd_config["cal_reflection_hyperresolve"]: 1172 ↛ 1256line 1172 didn't jump to line 1256 because the condition on line 1172 was always true
1173 # start with nominal cable length
1174 mode0 = mode_i
1175 # overresolve the FT used for the fit (normal resolution would be dmode=1)
1176 dmode = 0.05
1177 # range around the central mode to test
1178 nmodes = 101
1179 # array of modes to try
1180 modes = (np.arange(nmodes) - nmodes // 2) * dmode + mode0
1181 # reshape for ease of computing
1182 modes = rebin(modes, (freq_use.size, nmodes)).T
1184 if auto_ratio is not None:
1185 # Find tiles which will *not* be accidently coherent in their cable reflection in order to reduce bias
1186 inds = np.where(
1187 (obs["baseline_info"]["tile_use"])
1188 & (mode_i_arr[pol_i, :] > 0)
1189 & ((np.abs(mode_i_arr[pol_i, :] - mode_i)) > 0.01)
1190 )
1191 # mean over frequency for each tile
1192 freq_mean = np.nanmean(auto_ratio[pol_i], axis=0)
1193 norm_autos = auto_ratio[pol_i] / rebin(
1194 freq_mean, (obs["n_freq"], obs["n_tile"])
1195 )
1196 # mean over all tiles which *are not* accidently coherent as a func of freq
1197 incoherent_mean = np.nanmean(norm_autos[:, inds[0]], axis=1)
1198 # Residual and normalized (using incoherent mean) auto-correlation
1199 resautos = (
1200 norm_autos[:, tile_i] / incoherent_mean
1201 ) - np.nanmean(norm_autos[:, tile_i] / incoherent_mean)
1202 gain_temp = rebin(
1203 resautos[freq_use], (nmodes, freq_use.size)
1204 )
1205 else:
1206 # dimension manipulation, add dim for mode fitting
1207 # Subtract the mean so aliasing is reduced in the dft cable fitting
1208 gain_temp = rebin(
1209 gain_arr[freq_use, tile_i]
1210 - np.mean(gain_arr[freq_use, tile_i]),
1211 (nmodes, freq_use.size),
1212 )
1213 # freq_use matrix to multiply/collapse in fit
1214 freq_mat = rebin(freq_use, (nmodes, freq_use.size))
1215 # Perform DFT of gains to test modes
1216 test_fits = np.sum(
1217 np.exp(1j * 2 * np.pi / obs["n_freq"] * modes * freq_mat)
1218 * gain_temp,
1219 axis=1,
1220 )
1221 # Pick out highest amplitude fit (mode_ind gives the index of the mode)
1222 amp_use = np.max(np.abs(test_fits)) / freq_use.size
1223 mode_ind = np.argmax(np.abs(test_fits))
1224 # Phase of said fit
1225 phase_use = np.arctan2(
1226 test_fits[mode_ind].imag, test_fits[mode_ind].real
1227 )
1228 mode_i = modes[mode_ind, 0]
1230 # Using the mode selected from the gains, optionally use the phase to find the amp and phase
1231 if auto_ratio is not None:
1232 # Find tiles which will not be accidently coherent in their cable reflection in order to reduce bias
1233 inds = np.where(
1234 (obs["baseline_info"]["tile_use"])
1235 & (mode_i_arr[pol_i, :] > 0)
1236 & (np.abs(mode_i_arr[pol_i, :] - mode_i) > 0.01)
1237 )
1238 residual_phase = np.arctan2(
1239 gain_arr[freq_use, :].imag, gain_arr[freq_use, :].real
1240 )
1241 incoherent_residual_phase = residual_phase[
1242 :, tile_i
1243 ] - np.nanmean(residual_phase[:, inds[0]], axis=1)
1244 test_fits = np.sum(
1245 np.exp(
1246 1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use
1247 )
1248 * incoherent_residual_phase
1249 )
1250 # Factor of 2 from fitting just the phase
1251 amp_use = 2 * np.abs(test_fits) / freq_use.size
1252 # Factor of pi/2 from just fitting the phase
1253 phase_use = (
1254 np.arctan2(test_fits.imag, test_fits.real) + np.pi / 2
1255 )
1256 elif pyfhd_config["cal_reflection_mode_file"]:
1257 # Use predetermined fits
1258 amp_use = amp_arr[pol_i, tile_i]
1259 phase_use = phase_arr[pol_i, tile_i]
1260 else:
1261 # Use nominal delay mode, but fit amplitude and phase of reflections
1262 mode_fit = np.sum(
1263 np.exp(1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use)
1264 * gain_arr[freq_use, tile_i]
1265 )
1266 amp_use = np.abs(mode_fit) / freq_use[0].size
1267 phase_use = np.arctan2(mode_fit.imag, mode_fit.real)
1269 gain_mode_fit = amp_use * np.exp(
1270 -1j
1271 * 2
1272 * np.pi
1273 * (mode_i * np.arange(obs["n_freq"]) / obs["n_freq"])
1274 + 1j * phase_use
1275 )
1276 if auto_ratio is not None:
1277 # Only fit for the cable reflection in the phases
1278 cal["gain"][pol_i, :, tile_i] *= np.exp(1j * gain_mode_fit.imag)
1279 else:
1280 cal["gain"][pol_i, :, tile_i] *= 1 + gain_mode_fit
1281 cal["mode_params"][pol_i, tile_i] = np.array(
1282 [mode_i, amp_use, phase_use]
1283 )
1284 return cal, pyfhd_config
1287def vis_cal_auto_fit(
1288 obs: dict,
1289 cal: dict,
1290 vis_auto: NDArray[np.float64],
1291 vis_auto_model: NDArray[np.float64],
1292 auto_tile_i: NDArray[np.integer],
1293) -> dict:
1294 """
1295 Solve for each tile's calibration amplitude via the square root of the ratio of the data autocorrelation
1296 to the model autocorrelation using the definition of a gain. Then, remove dependence on the correlated
1297 noise term in the autocorrelations by scaling this amplitude down to the crosscorrelations gain via a
1298 simple, linear fit. Build a full, scaled autocorrelation gain solution by adding in the phase term via
1299 the crosscorrelation gains.
1301 Parameters
1302 ----------
1303 obs : dict
1304 Observation metadata dictionary
1305 cal : dict
1306 Calibration dictionary
1307 vis_auto : NDArray[np.float64]
1308 Data autocorrelations
1309 vis_auto_model : NDArray[np.float64]
1310 Simulated model autocorrelations
1311 auto_tile_i : NDArray[np.integer]
1312 Index array of the tile array that have defined autocorrelations
1314 Returns
1315 -------
1316 cal: dict
1317 Calibration dictionary with scaled autocorrelation gains
1318 """
1319 freq_i_use = np.nonzero(obs["baseline_info"]["freq_use"])
1320 freq_i_flag = np.where(obs["baseline_info"]["freq_use"] == 0)[0]
1321 # If the number of frequencies not being used is above 0, then ignore the frequencies surrounding them.
1322 if freq_i_flag.size > 0: 1322 ↛ 1334line 1322 didn't jump to line 1334 because the condition on line 1322 was always true
1323 freq_flag = np.zeros(obs["n_freq"])
1324 freq_flag[freq_i_use] = 1
1325 for freq_i in range(freq_i_flag.size):
1326 minimum = max(0, freq_i_flag[freq_i] - 1)
1327 maximum = min(obs["n_freq"], freq_i_flag[freq_i] + 2)
1328 freq_flag[minimum:maximum] = 0
1329 freq_i_use = np.nonzero(freq_flag)
1330 # Vectorized loop for via_cal_auto_fit lines 45-55 in IDL
1331 # However the logic still indexes the full 128 tiles, so need to shove
1332 # outputs into an empty array of correct size
1333 # We're not using the cross polarizations if they are present
1334 auto_gain = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"]))
1335 auto_gain[:, :, auto_tile_i] = np.sqrt(
1336 vis_auto[: cal["n_pol"]] * weight_invert(vis_auto_model[: cal["n_pol"]])
1337 )
1338 gain_cross = cal["gain"]
1339 fit_slope = np.empty((cal["n_pol"], obs["n_tile"]))
1340 fit_offset = np.empty_like(fit_slope)
1342 # Didn't vectorize as the polyfit won't be vectorized
1343 for pol_i in range(cal["n_pol"]):
1344 for tile in range(auto_tile_i.size):
1345 tile_idx = auto_tile_i[tile]
1346 phase_cross_single = np.arctan2(
1347 gain_cross[pol_i, :, tile_idx].imag, gain_cross[pol_i, :, tile_idx].real
1348 )
1350 gain_auto_single = np.abs(auto_gain[pol_i, :, tile_idx])
1351 gain_cross_single = np.abs(gain_cross[pol_i, :, tile_idx])
1353 # mask out any NaN values; numpy doesn't like them,
1354 # I assume the IDL equiv function just masks them?
1355 # or maybe we need to do a catch for NaNs here, and abandon all
1356 # hope for a fit if there are NaNs?
1357 notnan = np.where(
1358 (np.isnan(gain_auto_single[freq_i_use]) != True)
1359 & (np.isnan(gain_cross_single[freq_i_use]) != True)
1360 )
1361 gain_auto_single_fit = gain_auto_single[freq_i_use][notnan]
1362 gain_cross_single_fit = gain_cross_single[freq_i_use][notnan]
1364 # linfit from IDL uses chi-square error calculations to do the linear fit, instead of least squares.
1365 # The polynomial fit uses least square method
1366 x = np.vstack([gain_auto_single_fit, np.ones(gain_auto_single_fit.size)]).T
1367 fit_single = np.linalg.lstsq(x, gain_cross_single_fit, rcond=None)[0]
1368 # IDL gives the solution in terms of [A, B] while Python does [B, A] assuming we're
1369 # solving the equation y = A + Bx
1370 cal["gain"][pol_i, :, tile_idx] = (
1371 gain_auto_single * fit_single[0] + fit_single[1]
1372 ) * np.exp(1j * phase_cross_single)
1373 fit_slope[pol_i, tile_idx] = fit_single[0]
1374 fit_offset[pol_i, tile_idx] = fit_single[1]
1375 cal["auto_scale"] = np.sum(fit_slope, axis=1) / auto_tile_i.size
1376 cal["auto_params"] = np.empty([cal["n_pol"], cal["n_pol"], obs["n_tile"]])
1377 cal["auto_params"][0, :, :] = fit_offset
1378 cal["auto_params"][1, :, :] = fit_slope
1379 return cal
1382def vis_calibration_apply(
1383 vis_arr: NDArray[np.complex128],
1384 obs: dict,
1385 cal: dict,
1386 vis_model_arr: NDArray[np.complex128],
1387 vis_weights: NDArray[np.float64],
1388 logger: Logger,
1389) -> tuple[NDArray[np.complex128], dict]:
1390 """
1391 Apply the calibration solutions to the input, data visibilities to create calibrated, data visibilities using
1392 the definition of the gains.
1394 Definition of the gain:
1395 (visibility for baseline of tile i and tile j) = (gain of tile i) (gain of tile j) (model visibility for baseline of tile i and tile j)
1397 If only two othogonal polarizations were used to calibrate, calculate the phase offset between the two orthogonal dipoles to
1398 solve for a degeneracy in the cross polarizations. The formula can be derived by multiplying the gains by an equal and opposite
1399 phase in the linear least square solver and solving for the phase when the partial derivative w.r.t the offset is 0.
1401 Parameters
1402 ----------
1403 vis_arr : NDArray[np.complex128]
1404 Uncalibrated data visiblities
1405 obs : dict
1406 Observation metadata dictionary
1407 cal : dict
1408 Calibration dictionary
1409 vis_model_arr : NDArray[np.complex128]
1410 Simulated model visibilites
1411 vis_weights : NDArray[np.float64]
1412 Weights (flags) of the visibilities
1413 logger : Logger
1414 PyFHD's logger for displaying errors and info to the log files
1416 Returns
1417 -------
1418 vis_arr : NDArray[np.complex128]
1419 Calibrated visibilities
1420 cal : dict
1421 Calibration dictionary
1422 """
1423 # tile numbering starts at 1
1424 tile_a_i = obs["baseline_info"]["tile_a"] - 1
1425 tile_b_i = obs["baseline_info"]["tile_b"] - 1
1427 n_pol_vis = vis_arr.shape[0]
1428 gain_pol_arr1 = [0, 1, 0, 1]
1429 gain_pol_arr2 = [0, 1, 1, 0]
1431 # OK, it really makes sense to use native python functionality here
1432 # We're just trying to match up the frequency-dependent gains to the
1433 # correct baselines, and apply them. Can use `meshgrid` here instead of
1434 # `rebin`, which will make 2D indexing arrays, so we can directly leave
1435 # the gain arrays in the correct shape and index the directly. Using `rebin`
1436 # means we have to flatten them
1437 (
1438 inds_a_baseline,
1439 inds_a_freqs,
1440 ) = np.meshgrid(tile_a_i, np.arange(obs["n_freq"]))
1441 inds_b_baseline, inds_b_freqs = np.meshgrid(tile_b_i, np.arange(obs["n_freq"]))
1443 for pol_i in range(n_pol_vis):
1444 gain_arr1 = cal["gain"][gain_pol_arr1[pol_i], :, :]
1445 gain_arr2 = cal["gain"][gain_pol_arr2[pol_i], :, :]
1447 vis_gain = gain_arr1[inds_a_freqs, inds_a_baseline] * np.conjugate(
1448 gain_arr2[inds_b_freqs, inds_b_baseline]
1449 )
1451 vis_arr[pol_i, :, :] *= weight_invert(vis_gain, use_abs=False)
1453 # We haven't run FHD in a way that uses 4 pols yet so this is all
1454 # untested
1455 if n_pol_vis == 4:
1456 if type(vis_model_arr) == np.ndarray and type(vis_weights) == np.ndarray: 1456 ↛ 1506line 1456 didn't jump to line 1506 because the condition on line 1456 was always true
1457 # This if statement replaces vis_calibrate_crosspol_phase
1458 # as this was the only place where the function was used
1459 # Note inside vis_calibrate_crosspol_phase there is a
1460 # if n_pol_vis == 4 check hence only run if n_pol_vis == 4
1461 # This code should calculate the phase fit between the X and Y
1462 # antenna polarizations.
1463 # Use the xx flags (yy should be identitical at this point)
1465 # this is num baselines per time step
1466 n_baselines = int(len(tile_a_i) / obs["n_times"])
1467 # reshape from (n_freq, n_baselines*n_times) to (n_freq, n_times, n_baselines). Turns out due to the row major vs col major difference
1468 # between IDL and python, this shape also changes
1469 new_shape = (obs["n_freq"], obs["n_times"], n_baselines)
1470 weights_use = np.reshape(vis_weights[0, :, :], new_shape)
1471 # carried over from FHD code
1472 weights_use = np.maximum(weights_use, np.zeros_like(weights_use))
1473 weights_use = np.minimum(weights_use, np.ones_like(weights_use))
1475 # Average the visbilities in time
1476 axis_avg = 1
1477 vis_xy = np.reshape(vis_arr[2, :, :], new_shape)
1478 vis_xy = np.sum(vis_xy * weights_use, axis=axis_avg)
1479 vis_yx = np.reshape(vis_arr[3, :, :], new_shape)
1480 vis_yx = np.sum(vis_yx * weights_use, axis=axis_avg)
1482 model_xy = np.reshape(vis_model_arr[2, :, :], new_shape)
1483 model_xy = np.sum(model_xy * weights_use, axis=axis_avg)
1484 model_yx = np.reshape(vis_model_arr[3, :, :], new_shape)
1485 model_yx = np.sum(model_yx * weights_use, axis=axis_avg)
1487 # Remove Zeros
1488 weight = np.sum(weights_use, axis=axis_avg)
1489 i_use = np.nonzero(weight)
1490 vis_xy = np.squeeze(vis_xy[i_use])
1491 vis_yx = np.squeeze(vis_yx[i_use])
1492 model_xy = np.squeeze(model_xy[i_use])
1493 model_yx = np.squeeze(model_yx[i_use])
1495 vis_sum = np.sum(np.conjugate(vis_xy) * model_xy) + np.sum(
1496 vis_yx * np.conjugate(model_yx)
1497 )
1498 cross_phase = np.arctan2(vis_sum.imag, vis_sum.real)
1500 logger.info(
1501 f"Phase fit between X and Y antenna polarizations: {cross_phase}"
1502 )
1504 cal["cross_phase"] = cross_phase
1506 vis_arr[2, :, :] *= np.exp(1j * 0)
1507 vis_arr[3, :, :] *= np.exp(-1j * 0)
1509 return vis_arr, cal
1512def vis_baseline_hist(
1513 obs: dict,
1514 params: dict,
1515 vis_cal: NDArray[np.complex128],
1516 vis_model_arr: NDArray[np.complex128],
1517) -> dict:
1518 """
1519 Create diagnostic histograms of both the mean and sigma for the percent difference between calibrated
1520 data and simulated model visibilities as a function of baseline length.
1522 Parameters
1523 ----------
1524 obs : dict
1525 Observation metadata dictionary
1526 params : dict
1527 Visibility metadata dictionary
1528 vis_cal : NDArray[np.complex128]
1529 Calibrated data visibilities
1530 vis_model_arr : NDArray[np.complex128]
1531 Simulated model visibilites
1533 Returns
1534 -------
1535 vis_baseline_hist : dict
1536 Dictionary of the mean and sigma histograms for the percent difference between calibrated data
1537 and simulated model visibilities as a function of baseline length.
1538 """
1539 kx_arr = params["uu"] / obs["kpix"]
1540 ky_arr = params["vv"] / obs["kpix"]
1541 kr_arr = np.sqrt(kx_arr**2 + ky_arr**2)
1542 # take the transpose of this, given our `vis_cal` and `vis_mode_arr` are the
1543 # transpose of the original FHD code
1544 dist_arr = np.outer(kr_arr, obs["baseline_info"]["freq"]).transpose() * obs["kpix"]
1545 dist_hist, bins, dist_ri = histogram(
1546 dist_arr, min=obs["min_baseline"], max=obs["max_baseline"], bin_size=5.0
1547 )
1549 vis_res_ratio_mean = np.zeros([obs["n_pol"], bins.size])
1550 vis_res_sigma = np.zeros([obs["n_pol"], bins.size])
1552 for pol_i in range(obs["n_pol"]):
1553 for bin_i in range(bins.size):
1554 if dist_hist[bin_i] > 0: 1554 ↛ 1575line 1554 didn't jump to line 1575 because the condition on line 1554 was always true
1555 inds = dist_ri[dist_ri[bin_i] : dist_ri[bin_i + 1]]
1556 model_vals = (vis_model_arr[pol_i]).flatten()[inds]
1557 wh_noflag = np.where(np.abs(model_vals) > 0)[0]
1558 if wh_noflag.size > 0: 1558 ↛ 1561line 1558 didn't jump to line 1561 because the condition on line 1558 was always true
1559 inds = inds[wh_noflag]
1560 else:
1561 continue
1562 # if Keyword_Set(calibration_visibilities_subtract) THEN BEGIN
1563 # but calibration_visibilities_subtract isn't a function keyword
1564 # so we'll only translate the False of that statement
1565 # vis_cal_use = (vis_cal[pol_i].transpose()).flatten()[inds]
1566 vis_cal_use = (vis_cal[pol_i]).flatten()[inds]
1567 vis_res_ratio_mean[pol_i, bin_i] = np.mean(
1568 np.abs(vis_cal_use - model_vals)
1569 ) / np.mean(np.abs(model_vals))
1570 vis_res_sigma[pol_i, bin_i] = np.sqrt(
1571 np.var(np.abs(vis_cal_use - model_vals))
1572 ) / np.mean(np.abs(model_vals))
1574 else:
1575 continue
1576 return {
1577 "baseline_length": bins.size,
1578 "vis_res_ratio_mean": vis_res_ratio_mean,
1579 "vis_res_sigma": vis_res_sigma,
1580 }
1583def cal_auto_ratio_divide(
1584 obs: dict,
1585 cal: dict,
1586 vis_auto: NDArray[np.float64],
1587 auto_tile_i: NDArray[np.integer],
1588) -> tuple[dict, NDArray[np.float64]]:
1589 """
1590 Remove antenna-dependent parameters (i.e. cable reflections) from the calculated gains
1591 to reduce the bias on individual tile variation before the creation of averaged quantities
1592 like the global bandpass. Antenna-dependent parameters are estimated from the square root of the
1593 autocorrelation visibilities normalized via a reference tile.
1595 Parameters
1596 ----------
1597 obs : dict
1598 Observation metadata dictionary
1599 cal : dict
1600 Calibration dictionary
1601 vis_auto : NDArray[np.float64]
1602 Data autocorrelations
1603 auto_tile_i : NDArray[np.integer]
1604 Index array of the tile array that have defined autocorrelations
1606 Returns
1607 -------
1608 cal : dict
1609 Calibration dictionary with gains that have reduced antenna-dependent bias
1610 auto_ratio : NDArray[np.float64]
1611 The estimation of the antenna-dependent bias through the square root of the autocorrelation
1612 visibilities normalized via a reference tile.
1613 """
1615 auto_ratio = np.empty([cal["n_pol"], obs["n_freq"], obs["n_tile"]])
1616 # This should be possible to Vectorize if it's slow
1617 for pol_i in range(cal["n_pol"]):
1618 # fhd_struct_init_cal puts the ref_antenna as 1 if it's not set, which is never appears to be
1619 v0 = vis_auto[pol_i, :, auto_tile_i[cal["ref_antenna"]]]
1620 auto_ratio[pol_i, :, auto_tile_i] = np.sqrt(
1621 vis_auto[pol_i, :, np.arange(auto_tile_i.size)] * weight_invert(v0)
1622 )
1623 cal["gain"][pol_i] = cal["gain"][pol_i] * weight_invert(auto_ratio[pol_i])
1624 return cal, auto_ratio
1627def cal_auto_ratio_remultiply(
1628 cal: dict, auto_ratio: NDArray[np.float64], auto_tile_i: NDArray[np.integer]
1629) -> dict:
1630 """
1631 Return antenna-dependent parameters to the calculated gains after averaged quantities
1632 have been calculated. The antenna-dependent parameters are captured via the square root of
1633 the referenced autocorrelation visiblities calculated in cal_auto_ratio_divide.
1635 Parameters
1636 ----------
1637 obs : dict
1638 Observation metadata dictionary
1639 cal : dict
1640 Calibration dictionary
1641 auto_ratio : NDArray[np.float64]
1642 Square root of the autocorrelation visibilities normalized via a reference tile
1643 auto_tile_i : NDArray[np.integer]
1644 Index array of the tile array that have defined autocorrelations
1646 Returns
1647 -------
1648 cal: dict
1649 Calibration dictonary containing the reformed gain
1650 """
1651 # Replaced for loop in remultiply, this should remultiply by the auto_ratios
1652 cal["gain"][: cal["n_pol"], :, auto_tile_i] = cal["gain"][
1653 : cal["n_pol"], :, auto_tile_i
1654 ] * np.abs(auto_ratio[: cal["n_pol"], :, auto_tile_i])
1655 return cal
1658def calculate_adaptive_gain(
1659 gain_list: NDArray[np.float64],
1660 convergence_list: NDArray[np.float64],
1661 iter: int,
1662 base_gain: int | float,
1663 final_convergence_estimate: float | None = None,
1664):
1665 """
1666 Perform a Kalman filter to calculate the best weighting to use in the next iteration of the
1667 linear least squares fitting between the data and simulated model, which reduces the number
1668 of total iterations till convergence.
1670 The Kalman filter takes previous convergence values, or the percent change in calibration solution from
1671 the previous iteration, to calculate the relative weight to give to the next calculated iteration versus
1672 the previous iteration. For example, a gain (relative weight in this context) of 1 would give the old
1673 solution and the new solution the same weighting, and thus the next guess in the linear least square solver
1674 is the average of the new and old solution. A gain (relative weight) of 2 would give the new solution twice
1675 as much weight at the old solution, and thus the next guess in the linear least square solver is the sum of
1676 the old solution and twice the new solution, all divided by three.
1678 Parameters
1679 ----------
1680 gain_list : NDArray[np.float64]
1681 Relative weighting between the previous iteration and new iteration in the linear least squares
1682 solver for calibration solutions
1683 convergence_list : NDArray[np.float64]
1684 An array of the percent change in the calibration solutions between one iteration and the next
1685 iter : int
1686 The current iteration in the linear least squares solver
1687 base_gain : int|float
1688 The initial relative weighting between the previous iteration and new iteration in the linear
1689 least squares solver for calibration solutions
1690 final_convergence_estimate : float, optional
1691 The prediction of the percent change of the previous iteration and the forward model estimate from
1692 the Kalman filter, by default None
1694 Returns
1695 -------
1696 gain : float
1697 Relative weighting between the previous iteration and the new iteration in the calibration
1698 linear least squares solver
1699 """
1700 if iter > 2:
1701 # To calculate the best gain to use, compare the past gains that have been used
1702 # with the resulting convergences to estimate the best gain to use.
1703 # Algorithmically, this is a Kalman filter.
1704 # If forward modeling proceeds perfectly, the convergence metric should
1705 # asymptotically approach a final value.
1706 # We can estimate that value from the measured changes in convergence
1707 # weighted by the gains used in each previous iteration.
1708 # For some applications such as calibration this may be known in advance.
1709 # In calibration, it is expressed as the change in a
1710 # value, in which case the final value should be zero.
1711 if final_convergence_estimate is None: 1711 ↛ 1712line 1711 didn't jump to line 1712 because the condition on line 1711 was never true
1712 est_final_conv = np.zeros(iter - 1)
1713 for i in range(iter - 1):
1714 final_convergence_test = (
1715 (1 + gain_list[i]) * convergence_list[i + 1] - convergence_list[i]
1716 ) / gain_list[i]
1717 # The convergence metric is strictly positive, so if the estimated final convergence is
1718 # less than zero, force it to zero.
1719 est_final_conv[i] = np.max((0, final_convergence_test))
1720 # Because the estimate may slowly change over time, only use the most recent measurements.
1721 final_convergence_estimate = np.median(est_final_conv[max(iter - 5, 0) :])
1722 last_gain = gain_list[iter - 1]
1723 last_conv = convergence_list[iter - 2]
1724 new_conv = convergence_list[iter - 1]
1725 # The predicted convergence is the value we would get if the new model calculated
1726 # in the previous iteration was perfect. Recall that the updated model that is
1727 # actually used is the gain-weighted average of the new and old model,
1728 # so the convergence would be similarly weighted.
1729 predicted_conv = (final_convergence_estimate * last_gain + last_conv) / (
1730 base_gain + last_gain
1731 )
1732 # If the measured and predicted convergence are very close, that indicates
1733 # that our forward model is accurate and we can use a more aggressive gain
1734 # If the measured convergence is significantly worse (or better!) than predicted,
1735 # that indicates that the model is not converging as expected and
1736 # we should use a more conservative gain.
1737 delta = (predicted_conv - new_conv) / (
1738 (last_conv - final_convergence_estimate) / (base_gain + last_gain)
1739 )
1740 new_gain = 1 - abs(delta)
1741 # Average the gains to prevent oscillating solutions.
1742 new_gain = (new_gain + last_gain) / 2
1743 # For some reason base_gain can be a numpy float in testing so putting in a tuple solves this.
1744 gain = np.max((base_gain / 2, new_gain))
1746 else:
1747 gain = base_gain
1748 gain_list[iter] = gain
1750 return gain