Coverage for PyFHD/calibration/calibration

1import numpy as np

2from numpy.typing import NDArray

3from logging import Logger

4from PyFHD.pyfhd_tools.pyfhd_utils import (

5 resistant_mean,

6 weight_invert,

7 rebin,

8 histogram,

10from copy import deepcopy

11from astropy.io import fits

12from astropy.constants import c

13from pathlib import Path

14from scipy.ndimage import uniform_filter

15import importlib_resources

18def vis_extract_autocorr(

19 obs: dict,

20 vis_arr: NDArray[np.complex128],

21 pyfhd_config: dict,

22 auto_tile_i: NDArray[np.integer] | None = None,

23) -> tuple[NDArray[np.float64], NDArray[np.integer]]:

24 """

25 Extract the auto-correlations if they exist from the full visibility array.

27 Parameters

28 ----------

29 obs : dict

30 Observation metadata dictionary

31 vis_arr : NDArray[np.complex128]

32 Uncalibrated data visiblities

33 pyfhd_config : dict

34 PyFHD's configuration dictionary containing all the options set for a PyFHD run

35 auto_tile_i : NDArray[np.integer] | None, optional

36 Index array for auto-correlation visibilities, by default None

38 Returns

39 -------

40 autocorr: NDArray[np.float64]

41 The auto-correlation visibilities

42 auto_tile_i : NDArray[np.integer]

43 The index array for auto-correlation visibilities

44 """

46 autocorr_i = np.where(

47 obs["baseline_info"]["tile_a"] == obs["baseline_info"]["tile_b"]

48 )[0]

49 if autocorr_i.size > 0: 49 ↛ 96line 49 didn't jump to line 96 because the condition on line 49 was always true

50 auto_tile_i = obs["baseline_info"]["tile_a"][autocorr_i] - 1

51 # As auto_tile_i is used for indexing we need to make it an integer array

52 auto_tile_i = auto_tile_i.astype(np.integer)

53 auto_tile_i_single = np.unique(auto_tile_i)

54 # expect it as a list of 2D arrays, so there might be trouble

55 if not pyfhd_config["cal_time_average"]:

56 freq_tile_shape = np.real(vis_arr[0][:, autocorr_i]).shape

57 auto_corr = np.zeros([obs["n_pol"]] + list(freq_tile_shape))

58 else:

59 auto_corr = np.zeros((obs["n_pol"], obs["n_freq"], auto_tile_i_single.size))

61 for pol_i in range(obs["n_pol"]):

62 # Auto-correlations by definition are enirely real, so take the real part here

63 # index this way in case people have vis_arr as one dimensional

64 # array, containing two further 2D arrays, rather than a proper 3D

65 # array. Turns out this indexing is consistent across both cases

66 auto_vals = np.real(vis_arr[pol_i][:, autocorr_i])

67 if pyfhd_config["cal_time_average"]:

68 auto_single = np.zeros((obs["n_freq"], auto_tile_i_single.size))

69 time_inds = np.where(obs["baseline_info"]["time_use"])[0]

70 for tile_i in range(auto_tile_i_single.size):

71 baseline_i = np.where(auto_tile_i == auto_tile_i_single[tile_i])[0]

72 baseline_i = baseline_i[time_inds]

73 if time_inds.size > 1: 73 ↛ 86line 73 didn't jump to line 86 because the condition on line 73 was always true

74 # OK, auto_vals is of shape (n_freq, n_autos)

75 # We want to average a subset of baseline_i within auto_vals,

76 # to average a specific auto-correlation over time

77 auto_single[:, tile_i] = (

78 np.sum(

79 auto_vals[:, baseline_i][np.arange(obs["n_freq"]), :],

80 axis=1,

81 )

82 / time_inds.size

83 )

85 else:

86 auto_single[:, tile_i] = auto_vals[:, baseline_i][

87 np.arange(obs["n_freq"]), :

88 ]

89 auto_vals = auto_single

90 auto_corr[pol_i, :, :] = auto_vals

91 if pyfhd_config["cal_time_average"]:

92 auto_tile_i = auto_tile_i_single

93 return auto_corr, auto_tile_i

94 else:

95 # Return auto_corr as 0 and auto_tile_i as an empty array

96 return np.zeros(1), np.zeros(0)

99def vis_cal_auto_init(

100 obs: dict,

101 cal: dict,

102 vis_arr: NDArray[np.complex128],

103 vis_model_arr: NDArray[np.complex128],

104 vis_auto: NDArray[np.float64],

105 vis_auto_model: NDArray[np.float64],

106 auto_tile_i: NDArray[np.integer],

107) -> NDArray[np.complex128]:

108 """

109 Initialize the calibration solutions using the autocorrelations prior to the linear least squares fit

110 for faster convergence. The auto-correlations and cross-correlations have separate formulisms for the

111 initialization.

112

113 Autos -- Square root of "Ratio of data autos to model autos" x "overall mean for both crosses and

114 autos of ratio of data to model" / "overall mean of ratio of data autos to model autos, with 1's

115 for crosses", results in a per-frequency, per-baseline gain scaling.

116 Crosses -- Square root of "overall mean for both crosses and autos of ratio of data to model" /

117 "overall mean of ratio of data autos to model autos, with 1's for crosses", results in a single

118 number gain scaling.

119

120 Parameters

121 ----------

122 obs : dict

123 Observation metadata dictionary

124 cal : dict

125 Calibration dictionary

126 vis_arr : NDArray[np.complex128]

127 Uncalibrated data visiblities

128 vis_model_arr : NDArray[np.complex128]

129 Simulated model visibilites

130 vis_auto : NDArray[np.float64]

131 Data auto-correlations

132 vis_auto_model : NDArray[np.float64]

133 Simulated model auto-correlations

134 auto_tile_i : NDArray[np.integer]

135 Index array for auto-correlation visibilities

136

137 Returns

138 -------

139 auto_gain : NDArray[np.complex128]

140 Initialization gain array

141 """

142 # Set out the

143 auto_scale = np.zeros((cal["n_pol"], obs["n_freq"], obs["n_tile"]))

144 auto_gain = np.ones(

145 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128

146 )

147 freq_i_use = np.where(obs["baseline_info"]["freq_use"])[0]

148 for pol_i in range(cal["n_pol"]):

149 res_mean_data = resistant_mean(np.abs(vis_arr[pol_i, freq_i_use, :]), 2)

150 res_mean_model = resistant_mean(np.abs(vis_model_arr[pol_i, freq_i_use, :]), 2)

151 auto_scale[pol_i] = np.sqrt(res_mean_data / res_mean_model)

152 auto_gain = np.ones(

153 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128

154 )

155 # Would love to get rid of the loop altogether, haven't figurted it out yet

156 for pol_i in range(cal["n_pol"]):

157 auto_gain[pol_i, :, :][:, auto_tile_i] = np.sqrt(

158 vis_auto[pol_i, :, :] * weight_invert(vis_auto_model[pol_i, :, :])

159 )

160 auto_gain[pol_i, :, :] *= auto_scale[pol_i] * weight_invert(

161 np.mean(auto_gain[pol_i, :, :])

162 )

163 auto_gain[pol_i, :, :][np.isnan(auto_gain[pol_i, :, :])] = 1

164 auto_gain[pol_i, :, :][auto_gain[pol_i, :, :] <= 0] = 1

165 return auto_gain

166

167

168def vis_calibration_flag(

169 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger

170) -> dict:

171 """

172 Flag tile and frequency outliers based on the calibration solutions. First, iteratively flag a maximum of three

173 times on amplitude with three tests: 1) flag frequencies above 5 sigma, 2) flag tiles above 5 sigma, and 3) flag

174 tiles either 2x lower or 2x higher than average. Second, iteratively flag a maximum of three times on phase with

175 two tests: 1) flag tiles with slopes above 5 sigma, and 2) flag tiles with per-frequency deviations from their slope

176 above 5 sigma.

177

178 Parameters

179 ----------

180 obs : dict

181 Observation metadata dictionary

182 cal : dict

183 Calibration dictionary

184 pyfhd_config : dict

185 PyFHD's configuration dictionary containing all the options set for a PyFHD run

186 logger : Logger

187 PyFHD's logger for displaying errors and info to the log files

188

189 Returns

190 -------

191 obs: dict

192 Observation metadata dictionary

193 """

194 amp_sigma_threshold = 5

195 amp_threshold = 2

196 phase_sigma_threshold = 5

197 freq_use = obs["baseline_info"]["freq_use"].copy()

198 for pol_i in range(cal["n_pol"]):

199 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0]

200 freq_use_i = np.nonzero(freq_use)[0]

201 gain = cal["gain"][pol_i]

202 phase = np.arctan2(gain.imag, gain.real)

203 amp = np.abs(gain)

204

205 # first flag based on overall amplitude

206 # extract_subarray is not being used as it was FHD's way of taking the fact that

207 # IDL's indexing can be weird and won't allow to index the result

208 # use np.where on gain, which will be multidimensional as well

209 amp_sub = amp[:, tile_use_i][freq_use_i, :]

210 gain_freq_fom = np.std(amp_sub, axis=1)

211 # Calculate the y values from a polynomial fit and keep the standard deviation of the error in gain_tile_fom

212 amp_sub_fit = np.zeros_like(amp_sub)

213 # Polynomial polyval can only take 1d arrays and doesn't vectorize across a 2d array.

214 for tile_i in range(tile_use_i.size):

215 amp_sub_fit[:, tile_i] = np.polynomial.polynomial.polyval(

216 freq_use_i,

217 np.polynomial.Polynomial.fit(

218 freq_use_i,

219 amp_sub[:, tile_i],

220 deg=pyfhd_config["cal_amp_degree_fit"],

221 )

222 .convert()

223 .coef,

224 )

225 # Get standard deviation of the residuals on a per tile basis

226 gain_tile_fom = np.std(amp_sub - amp_sub_fit, axis=0)

227 # Get the median of every tile, why is it call avg?

228 gain_tile_avg = np.median(amp_sub, axis=0)

229 gain_freq_fom[np.isnan(gain_freq_fom)] = 0

230 gain_tile_fom[np.isnan(gain_tile_fom)] = 0

231 freq_cut_i = np.where(gain_freq_fom == 0)[0]

232 freq_uncut_i = np.nonzero(gain_freq_fom)[0]

233 if freq_cut_i.size > 0: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 freq_use[freq_use_i][freq_cut_i] = 0

235 tile_cut_i = np.where(gain_tile_fom == 0)[0]

236 tile_uncut_i = np.nonzero(gain_tile_fom)[0]

237 if tile_cut_i.size > 0: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true

238 obs["baseline_info"]["tile_use"][tile_use_i][tile_cut_i] = 0

239 if freq_uncut_i.size == 0 or tile_uncut_i.size == 0: 239 ↛ 240line 239 didn't jump to line 240 because the condition on line 239 was never true

240 logger.error(

241 "The frequency and tile flagging inside calibration found some values not detected in previous flagging or calibration"

242 )

243

244 n_addl_cut = max(freq_cut_i.size + tile_cut_i.size, 1)

245 n_cut = freq_cut_i.size + tile_cut_i.size

246 iter = 0

247 while n_addl_cut > 0 and iter < 3:

248 gain_freq_sigma = np.std(gain_freq_fom[freq_uncut_i])

249 gain_tile_sigma = np.std(gain_tile_fom[tile_uncut_i])

250 freq_cut_i = np.where(

251 (

252 gain_freq_fom

253 - np.median(gain_freq_fom[freq_uncut_i])

254 - amp_sigma_threshold * gain_freq_sigma

255 )

256 > 0

257 )[0]

258 # Update the complement of freq_cut_i (the NOT) i.e. freq_uncut_i

259 freq_uncut_i = freq_uncut_i[~np.isin(freq_uncut_i, freq_cut_i)]

260 tile_cut_test1 = (

261 gain_tile_fom

262 - np.median(gain_tile_fom[tile_uncut_i])

263 - amp_sigma_threshold * gain_tile_sigma

264 ) > 0

265 tile_cut_test2 = (

266 gain_tile_avg < np.median(gain_tile_avg) / amp_threshold

267 ) | (gain_tile_avg > np.median(gain_tile_avg) * amp_threshold)

268 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0]

269 # Update the complement of tile_cut_i (the NOT) i.e. tile_uncut_i

270 tile_uncut_i = tile_uncut_i[~np.isin(tile_uncut_i, tile_cut_i)]

271 n_addl_cut = (freq_cut_i.size + tile_cut_i.size) - n_cut

272 n_cut = freq_cut_i.size + tile_cut_i.size

273 iter += 1

274 if (freq_cut_i.size) > 0: 274 ↛ 275line 274 didn't jump to line 275 because the condition on line 274 was never true

275 freq_use[freq_use_i[freq_cut_i]] = 0

276 if (tile_cut_i.size) > 0:

277 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0

278

279 # Reset freq_use_i and tile_use_i for flagging based on phase

280 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0]

281 freq_use_i = np.nonzero(freq_use)[0]

282

283 # Start flagging based on phase

284 phase_sub = phase[:, tile_use_i][freq_use_i, :]

285 phase_slope_arr = np.empty(tile_use_i.size)

286 phase_sigma_arr = np.empty(tile_use_i.size)

287 for tile_i in range(tile_use_i.size):

288 phase_use = np.unwrap(phase_sub[:, tile_i])

289 phase_params = np.polynomial.polynomial.Polynomial.fit(

290 freq_use_i, phase_use, deg=pyfhd_config["cal_phase_degree_fit"]

291 )

292 phase_params = phase_params.convert().coef

293 phase_fit = np.polynomial.polynomial.polyval(freq_use_i, phase_params)

294 phase_sigma2 = np.std(phase_use - phase_fit)

295 # In an unusual scenario sometimes you'll get an all 0 array to fit on, which gives only a zero back for the fit

296 phase_slope_arr[tile_i] = phase_params[1] if phase_params.size > 1 else 0

297 phase_sigma_arr[tile_i] = phase_sigma2

298 iter = 0

299 n_addl_cut = 1

300 n_cut = 0

301 while n_addl_cut > 0 and iter < 3:

302 slope_sigma = np.nanstd(phase_slope_arr)

303 tile_cut_test1 = (

304 np.abs(phase_slope_arr) - np.median(np.abs(phase_slope_arr))

305 ) > phase_sigma_threshold * slope_sigma

306 tile_cut_test2 = (phase_sigma_arr - np.median(phase_sigma_arr)) > (

307 phase_sigma_threshold * np.nanstd(phase_sigma_arr)

308 )

309 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0]

310 n_addl_cut = tile_cut_i.size - n_cut

311 n_cut = tile_cut_i.size

312 iter += 1

313 if tile_cut_i.size > 0:

314 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0

315 # If we decide not to flag the frequencies, ignore any frequency flagging

316 if pyfhd_config["flag_calibration_frequencies"]: 316 ↛ 317line 316 didn't jump to line 317 because the condition on line 316 was never true

317 obs["baseline_info"]["freq_use"] = freq_use

318 # Return the obs with an updated baseline_info on the use of tiles and frequency

319 return obs

320

321

322def transfer_bandpass(

323 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger

324) -> tuple[dict, dict]:

325 """

326 Apply a previously saved bandpass via a calfits file (github:pyuvdata). Check adherance to standards,

327 and match the polarizations, frequencies, timing, pointings, and tiles.

328

329 Parameters

330 ----------

331 obs : dict

332 Observation metadata dictionary

333 cal : dict

334 Calibration dictionary

335 pyfhd_config : dict

336 PyFHD's configuration dictionary containing all the options set for a PyFHD run

337 logger : Logger

338 PyFHD's logger for displaying errors and info to the log files

339

340 Returns

341 -------

342 cal_bandpass : dict

343 Calibration dictionary with bandpass gains

344 cal_remainder : dict

345 calibration dictionary with residuals after removing the bandpass gains

346 """

347 cal_bandpass = {}

348 try:

349 calfits = fits.open(

350 Path(pyfhd_config["input_path"], pyfhd_config["cal_bp_transfer"])

351 )

352 # Get the data

353 data_array = calfits[0].data

354 # Read in the header

355 naxis = calfits[0].header["naxis"]

356 n_jones = calfits[0].header["njones"]

357 if "delay" in calfits[0].header["caltype"]:

358 raise RuntimeWarning(

359 "Input Delay calibration not supported at this time, skipping calibration bandpass transfer."

360 )

361 time_integration = calfits[0].header["inttime"]

362 freq_channel_width = calfits[0].header["chwidth"]

363 x_orient = calfits[0].header["xorient"].strip()

364 data_dims = np.empty(naxis, dtype=np.integer)

365 data_types = naxis * [""]

366 for naxis_i in range(naxis):

367 # Get the dimensions of the data

368 data_dims[naxis_i] = calfits[0].header[f"naxis{naxis_i + 1}"]

369 # Get the ctypes

370 data_types[naxis_i] = (

371 calfits[0].header[f"ctype{naxis_i + 1}"].lower().strip()

372 )

373 data_types = np.array(data_types)

374 # Get the indexes for the FITS standard checks

375 data_index = np.nonzero("narrays" == data_types)[0][0]

376 ant_index = np.nonzero("antaxis" == data_types)[0][0]

377 freq_index = np.nonzero("freqs" == data_types)[0][0]

378 time_index = np.nonzero("time" == data_types)[0][0]

379 jones_index = np.nonzero("jones" == data_types)[0][0]

380 # Deal with spec_wind_index separately as default highband file doesn't have this in

381 # May cause issues with other fits files, adjust the code then.

382 spec_wind_index = np.nonzero("if" == data_types)[0]

383 if spec_wind_index.size == 0:

384 spec_wind_index = -1

385

386 # Check the indexes given to see if they match standards, if not raise exception

387 if (

388 data_index != 0

389 or ant_index != 4

390 or freq_index != 3

391 or time_index != 2

392 or jones_index != 1

393 ):

394 if (

395 data_index == 0

396 and ant_index == 5

397 and freq_index == 3

398 and time_index == 2

399 and jones_index == 1

400 and spec_wind_index == 4

401 ):

402 logger.info("Calfits adheres to the Fall 2018 pyuvdata convention")

403 if calfits[0].header["naxis5"] != 1:

404 raise RuntimeWarning(

405 "Calfits file includes more than one spectral window. Note that this feature is not yet supported in PyFHD."

406 )

407 # Remove spectral window dimension for compatibility

408 data_array = np.mean(data_array, axis=0)

409 else:

410 raise RuntimeWarning(

411 "Calfits file does not appear to adhere to standard. Please see github:pyuvdata/docs/references"

412 )

413

414 freq_start = calfits[0].header[f"crval{freq_index + 1}"]

415 time_start = calfits[0].header[f"crval{time_index + 1}"]

416 time_delt = calfits[0].header[f"cdelt{time_index + 1}"]

417 jones_start = calfits[0].header[f"crval{jones_index + 1}"]

418 jones_delt = calfits[0].header[f"cdelt{jones_index + 1}"]

419

420 n_ant_data = data_array.shape[0]

421 n_freq = data_array.shape[1]

422 n_time = data_array.shape[2]

423

424 # Check whether the number of polarizations specified matches the observation analysis run

425 jones_type_matrix = np.zeros(data_dims[1])

426 for jones_i in range(1, data_dims[1]):

427 jones_type_matrix[jones_i - 1] = jones_start + (jones_delt * jones_i)

428 if data_dims[1] > obs["n_pol"]:

429 logger.warning(

430 "More polarizations in calibration fits file than in observation analysis. Reducing calibration to match obs."

431 )

432 data_dims[1] = obs["n_pol"]

433 jones_type_matrix = jones_type_matrix[0 : obs["n_pol"]]

434 data_array = data_array[:, :, :, 0 : obs["n_pol"], :]

435 elif data_dims[1] < obs["n_pol"]:

436 raise RuntimeWarning(

437 "Not enough polarizations defined in the calibration fits file."

438 )

439

440 # Switch the pol convention to FHD standard if necessary

441 if x_orient == "north" or x_orient == "south":

442 data_array[:, :, :, 0, :], data_array[:, :, :, 1, :] = (

443 data_array[:, :, :, 1, :],

444 data_array[:, :, :, 0, :],

445 )

446 if obs["n_pol"] > 2:

447 data_array[:, :, :, 2, :], data_array[:, :, :, 3, :] = (

448 data_array[:, :, :, 3, :],

449 data_array[:, :, :, 2, :],

450 )

451

452 # Check to see if the calibraton and observation frequency resolution match

453 if freq_channel_width != obs["freq_res"]:

454 freq_factor = obs["freq_res"] / freq_channel_width

455 if freq_factor >= 1:

456 logic_test = freq_factor - np.floor(freq_factor)

457 else:

458 logic_test = 1 / freq_factor - np.floor(1 / freq_factor)

459 if logic_test != 0:

460 raise RuntimeWarning(

461 f"Calfits input freq channel width is not easily castable to the observation, different by a factor of {freq_factor}"

462 )

463 if freq_start != obs["baseline_info"]["freq"][0]:

464 raise RuntimeWarning(

465 "Calfits input freq start is not equal to observation freq start"

466 )

467 # Downselect the data array

468 if freq_factor > 1:

469 logger.warning(

470 f"Calfits input freq channel width is different by a factor of {freq_factor}. Avergaing Down."

471 )

472 # Set flagged indices to NAN to remove them from mean calculation

473 flag_inds = np.where(np.abs(np.squeeze(data_array[:, :, :, :, 2])) == 1)

474 data_array[flag_inds][0] = np.nan

475 data_array[flag_inds][1] = np.nan

476 for channel_i in range(obs["n_freq"]):

477 data_array[:, channel_i, :, :, :] = np.nanmean(

478 data_array[

479 :,

480 max(

481 channel_i * freq_factor - np.floor(freq_factor / 2), 0

482 ) : channel_i

483 * freq_factor

484 + np.floor(freq_factor / 2.0),

485 :,

486 :,

487 :,

488 ]

489 )

490 data_array = data_array[:, 0 : obs["n_freq"], :, :, :]

491 elif freq_factor < 1:

492 logger.warning(

493 f"Calfits input freq channel width is different by a factor of {freq_factor}. Using linear interpolation"

494 )

495 # The IDL code has 5 nested loops, and I can't think of the vectorization right now in a reasonable ampount of time

496 # Someone please vectorize laster if you can

497 data_array_temp = np.zeros(

498 (data_dims[4], obs["n_freq"], n_time, n_jones, 2)

499 )

500 for data_i in range(2):

501 for jones_i in range(n_jones):

502 for times_i in range(n_time):

503 for tile_i in range(data_dims[4]):

504 for channel_i in range(obs["n_freq"] - 1):

505 start_idx = int(channel_i * (1.0 / freq_factor))

506 end_idx = int((channel_i + 1) * (1.0 / freq_factor))

507 data_array_temp[

508 tile_i,

509 start_idx:end_idx,

510 times_i,

511 jones_i,

512 data_i,

513 ] = np.interp(

514 np.arange(start_idx, end_idx, 1.0),

515 np.arange(channel_i, channel_i + 1, 1.0),

516 data_array[

517 data_i,

518 jones_i,

519 times_i,

520 channel_i : channel_i + 1,

521 tile_i,

522 ],

523 )

524 data_array_temp[:, obs["n_freq"] - 1, :, :, :] = data_array[

525 :, n_freq - 1, :, :, :

526 ]

527 data_array = np.copy(data_array_temp)

528

529 # Check to see what time range this needs to be applied to, and if pointings are necessary

530 if n_time != 1:

531 sec_upperlimit = 2000

532 sec_lowerlimit = 1600

533 if (

534 time_integration < sec_upperlimit and time_integration > sec_lowerlimit

535 ) or (time_delt < sec_upperlimit and time_delt > sec_lowerlimit):

536 # Calibration fits are per pointing

537 # Keep all the delay patterns in a dictionary

538 delay_patterns = {

539 (-5): [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 3, 8, 13, 18],

540 (-4): [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15],

541 (-3): [0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9],

542 (-2): [0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6],

543 (-1): [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],

544 (0): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],

545 (5): [15, 10, 5, 0, 16, 11, 6, 1, 17, 12, 7, 2, 18, 13, 8, 3],

546 (4): [12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3],

547 (3): list(

548 reversed([0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9])

549 ),

550 (2): list(

551 reversed([0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6])

552 ),

553 (1): list(

554 reversed([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])

555 ),

556 }

557 obs_pointing = None

558 for pointing, pattern in delay_patterns.items():

559 if np.array_equal(obs["delays"], pattern):

560 obs_pointing = pointing

561 if obs_pointing is None:

562 raise RuntimeWarning(

563 "Pointing number not within 5 pointings around zenith."

564 )

565 obs_julian_date = obs["astr"]["mjdobs"] + 2400000.5

566

567 # Find corresponding in the calfits

568 # Number of days since Auguest 23 2013

569 days_since_ref = np.floor(obs_julian_date) - np.floor(time_start)

570 # pointing start shift amount depending on how many days since ref

571 obs_pointing_shift_since_ref = ((24 - 23.9344699) / 24) * days_since_ref

572 # pointing start time for HH:MM:SS on Aug23 (in JD) plus comparible pointing start time for reference, using calculated shift

573 pointing_jdhms_ref = (

574 np.array(

575 [

576 0.13611,

577 0.15157,

578 0.17565,

579 0.19963,

580 0.22222,

581 0.24342593,

582 0.26453705,

583 0.28574074,

584 0.30694,

585 0.33657407,

586 ]

587 )

588 + obs_pointing_shift_since_ref

589 )

590 pointing_num_ref = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]

591 # pointing start time for HH:MM:SS for calfits day (in JD)

592 pointing_jdhms_calfits = time_start - np.floor(time_start)

593 # find the closest index between reference and calfits

594 pointing_calfits_index = np.argmin(

595 np.abs(pointing_jdhms_ref - pointing_jdhms_calfits)

596 )

597 # make sure the index is greater than the pointing start time

598 if pointing_jdhms_calfits < pointing_jdhms_ref[pointing_calfits_index]:

599 pointing_calfits_index -= 1

600

601 if (

602 pointing_jdhms_calfits

603 > (

604 np.max(pointing_jdhms_ref)

605 + (pointing_jdhms_ref[1] - pointing_jdhms_ref[0])

606 )

607 ) or (

608 pointing_jdhms_calfits

609 < (

610 np.min(pointing_jdhms_ref)

611 - (pointing_jdhms_ref[1] - pointing_jdhms_ref[0])

612 )

613 ):

614 raise RuntimeWarning(

615 "Calfits does not start between five pointings before zenith and four pointings after zenith. Not suitable for pointing cal at this time."

616 )

617 # find which pointing is the start of the calfits data

618 pointing_calfits_start = pointing_num_ref[pointing_calfits_index]

619 if obs_pointing < pointing_calfits_start:

620 raise RuntimeWarning(

621 "Calfits file does not contain pointing of observation"

622 )

623 # select pointing index in calfits that matches observation

624 obs_pointing_index = abs(pointing_calfits_start - obs_pointing)

625 # choose the corresponding pointing from the calfits data array

626 data_array = data_array[:, :, obs_pointing_index, :, :]

627 elif np.floor(time_delt) == np.floor(obs["time_res"]):

628 # Calibration fits are per-timeres

629 logger.info(

630 "Averaging calfits to observation length, an FHD requirement at this time."

631 )

632 data_array_temp = np.zeros(

633 [data_dims[4], obs.n_freq, 1, data_dims[1], data_dims[0]]

634 )

635 data_array_temp[:, :, 0, :, :] = np.mean(data_array, axis=2)

636 data_array = np.copy(data_array_temp)

637 else:

638 # Calibration fits are for a random set of times

639 logger.info(

640 "Finding closest match in time between calfits and obs. Obs metadata assumed to report start time, calfits metadata assumed to report center time."

641 )

642 time_delta = time_integration / (60 * 60 * 24)

643 time_array = np.full(n_time, time_start + time_delta)

644 obs_julian_date = (

645 obs["astr"]["mjdobs"]

646 + 2400000.5

647 + ((obs["n_time"] * obs["time_res"]) / (2 * 60 * 60 * 24))

648 )

649 if (obs_julian_date < time_array[0] - 2 * time_delta) or (

650 obs_julian_date > time_array[-1] + 2 * time_delta

651 ):

652 raise RuntimeWarning(

653 "Observation does not seem to fit within the time frame of the calfits"

654 )

655 # find the closest index between calfits and observation

656 time_index = np.argmin(np.abs(obs_julian_date - time_array))

657 data_array = data_array[:, :, time_index, :, :]

658

659 # Check number of tiles

660 if n_ant_data != obs["n_tile"]:

661 raise RuntimeWarning(

662 "Number of antennas in calfits file does match observation antenna number"

663 )

664

665 # Now that the checks are done, return the cal structure

666 cal_bandpass["n_pol"] = min(obs["n_pol"], 2)

667 cal_bandpass["conv_thresh"] = 1e-7

668 cal_bandpass["gain"] = np.full(

669 (cal_bandpass["n_pol"], obs["n_tile"], obs["n_freq"]),

670 pyfhd_config["cal_gain_init"],

671 )

672 cal_bandpass["gain"][0 : obs["n_pol"], :, :] = np.squeeze(

673 data_array[:, :, 0, 0 : obs["n_pol"], 0]

674 ) + 1j * np.squeeze(data_array[:, :, 0, 0 : obs["n_pol"], 1])

675 logger.info("Calfits File has been read and cal_bandpass has been created")

676 except FileNotFoundError as e:

677 logger.error(

678 f"{pyfhd_config['cal_bp_transfer']} file wasn't found, skipping calibration bandpass transfer"

679 )

680 return {}, {}

681 except RuntimeWarning as e:

682 logger.error(e)

683 return {}, {}

684 cal_remainder = deepcopy(cal)

685 cal_remainder["gain"][0 : cal["n_pol"], :, :] = (

686 cal["gain"][0 : cal["n_pol"], :, :]

687 / cal_bandpass["gain"][0 : cal["n_pol"], :, :]

688 )

689 return cal_bandpass, cal_remainder

690

691

692def vis_cal_bandpass(

693 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger

694) -> tuple[dict, dict]:

695 """

696 Reduce the degrees of freedom on the per-frequency calibration amplitudes by averaging solutions

697 together. Options include averaging over tiles which use a particular beamformer-to-receiver cable

698 lengths/types, or averaging over all tiles for a global bandpass.

699

700 Parameters

701 ----------

702 obs : dict

703 Observation metadata dictionary

704 cal : dict

705 Calibration dictionary

706 pyfhd_config : dict

707 PyFHD's configuration dictionary containing all the options set for a PyFHD run

708 logger : Logger

709 PyFHD's logger for displaying errors and info to the log files

710

711 Returns

712 -------

713 cal_bandpass : dict

714 Calibration dictionary with bandpass gains

715 cal_remainder : dict

716 calibration dictionary with residuals after removing the bandpass gains

717 """

718 freq_use = np.nonzero(obs["baseline_info"]["freq_use"])[0]

719 tile_use = np.nonzero(obs["baseline_info"]["tile_use"])[0]

720 n_pol = cal["gain"].shape[0]

721 # Set a flag for global bandpass, will turn true if too many tiles are flagged

722 global_bandpass = False

723

724 # Initialize cal_bandpass and cal_remainder and transfer them in, if a file has been set (fits only supported right now)

725 if pyfhd_config["cal_bp_transfer"] is not None: 725 ↛ 726line 725 didn't jump to line 726 because the condition on line 725 was never true

726 cal_bandpass, cal_remainder = transfer_bandpass(obs, cal, pyfhd_config, logger)

727 if len(cal_bandpass.keys()) != 0 and len(cal_remainder.keys()) != 0:

728 logger.info(

729 f"Calibration Bandpass FITS file {pyfhd_config['cal_bp_transfer']} transferred in for cal_bandpass and cal_remainder"

730 )

731 return cal_bandpass, cal_remainder

732 cal_bandpass = deepcopy(cal)

733 cal_remainder = deepcopy(cal)

734

735 cal_bandpass_gain = np.empty(cal["gain"].shape, dtype=np.complex128)

736 cal_remainder_gain = np.empty(cal["gain"].shape, dtype=np.complex128)

737

738 if pyfhd_config["cable_bandpass_fit"]:

739 # Using preexisting file to extract information about which tiles have which cable length

740 # cable_len = np.loadtxt(Path(pyfhd_config["input"], pyfhd_config["cable-reflection-coefficients"]), skiprows=1)[:, 2].flatten()

741 cable_len_filepath = importlib_resources.files(

742 "PyFHD.resources.instrument_config"

743 ).joinpath(f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt")

744 cable_len = np.loadtxt(cable_len_filepath, skiprows=1)[:, 2].flatten()

745

746 # Taking tile information and cross-matching it with the nonflagged tiles array, resulting in nonflagged tile arrays grouped by cable length

747 cable_length_ref = np.unique(cable_len)

748 tile_use_arr = [0] * cable_length_ref.size

749 for cable_i in range(cable_length_ref.size):

750 tile_use_arr[cable_i] = np.where(

751 (obs["baseline_info"]["tile_use"])

752 & (cable_len == cable_length_ref[cable_i])

753 )[0]

754 if tile_use_arr[cable_i].size == 0: 754 ↛ 755line 754 didn't jump to line 755 because the condition on line 754 was never true

755 logger.warning(

756 "Too Many flagged tiles to implement bandpass cable averaging, using global bandpass."

757 )

758 global_bandpass = True

759 # n_freq x 13 array. columns are frequency, 90m xx, 90m yy, 150m xx, 150m yy, 230m xx, 230m yy, 320m xx, 320m yy, 400m xx, 400m yy, 524m xx, 524m yy

760 bandpass_arr = np.zeros(

761 (obs["n_freq"], cal["n_pol"] * cable_length_ref.size + 1)

762 )

763 bandpass_arr[:, 0] = obs["baseline_info"]["freq"]

764 bandpass_col_count = 1

765 if pyfhd_config["auto_ratio_calibration"]: 765 ↛ 768line 765 didn't jump to line 768 because the condition on line 765 was always true

766 logger.info("auto_ratio_calibration is set, using global bandpass")

767 global_bandpass = True

768 for cable_i in range(cable_length_ref.size):

769 # This is an option to calibrate over all tiles to find the 'global' bandpass. It will be looped over by the number

770 # of cable lengths, and will redo the same calculation everytime. It is inefficient, but effective.

771 if global_bandpass: 771 ↛ 774line 771 didn't jump to line 774 because the condition on line 771 was always true

772 tile_use_cable = tile_use

773 else:

774 tile_use_cable = tile_use_arr[cable_i]

775

776 for pol_i in range(cal["n_pol"]):

777 gain = cal["gain"][pol_i]

778 # gain2 is a temporary variable used in place of the gain array for an added layer of safety

779 if cable_i == 0 and pol_i == 0:

780 gain2 = np.zeros(cal["gain"].shape, dtype=np.complex128)

781 # Only use gains from unflagged tiles and frequencies, and calculate the amplitude and phase

782 gain_use = gain[freq_use, :][:, tile_use_cable]

783 amp = np.abs(gain_use)

784 # amp2 is a temporary variable used in place of the amp array for an added layer of safety

785 amp2 = np.zeros((freq_use.size, tile_use_cable.size))

786 # This is the normalization loop for each tile. If the mean of gain amplitudes over all frequencies is nonzero, then divide

787 # the gain amplitudes by that number, otherwise make the gain amplitudes zero.

788 for tile_i in range(tile_use_cable.size):

789 res_mean = resistant_mean(amp[:, tile_i], 2)

790 if res_mean != 0: 790 ↛ 793line 790 didn't jump to line 793 because the condition on line 790 was always true

791 amp2[:, tile_i] = amp[:, tile_i] / res_mean

792 else:

793 amp2[:, tile_i] = 0

794 # This finds the normalized gain amplitude mean per frequency over all tiles, which is the final bandpass per cable group.

795 bandpass_single = np.empty(freq_use.size)

796 # If this is slow, resistant_mean can be vectorized

797 for f_i in range(freq_use.size):

798 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2)

799 # Want iterative to start at 1 (to not overwrite freq) and store final bandpass per cable group.

800 bandpass_arr[freq_use, bandpass_col_count] = bandpass_single

801 bandpass_col_count += 1

802 # Fill temporary variable gain2, set equal to final bandpass per cable group for each tile that will use that bandpass.

803

804 # Vectorize later if possible

805 for tile_i in range(tile_use_cable.size):

806 gain2[pol_i, freq_use, tile_use_cable[tile_i]] = bandpass_single

807

808 # For the last bit at the end of the cable

809 if cable_i == cable_length_ref.size - 1:

810 # Set gain3 to the input gains

811 gain3 = cal["gain"][pol_i].copy()

812 # Set what will be passed back as the output gain as the final bandpass per cable type.

813 gain2_input = gain2[pol_i, :, :]

814 cal_bandpass_gain[pol_i] = gain2_input

815 # Set what will be passed back as the residual as the input gain divided by the final bandpass per cable type.

816 gain3[freq_use, :] /= gain2_input[freq_use, :]

817 cal_remainder_gain[pol_i] = gain3

818 # Add Levine Memo bandpass to the gain solutions here if you wish

819 else:

820 bandpass_arr = np.zeros((obs["n_freq"], cal["n_pol"] + 1))

821 bandpass_arr[:, 0] = obs["baseline_info"]["freq"]

822 for pol_i in range(cal["n_pol"]):

823 gain = cal["gain"][pol_i]

824 gain_use = gain[freq_use, :][:, tile_use]

825 amp = np.abs(gain_use)

826 amp2 = np.zeros((freq_use.size, tile_use.size))

827 for tile_i in range(tile_use.size):

828 res_mean = resistant_mean(amp[:, tile_i], 2)

829 if res_mean != 0: 829 ↛ 832line 829 didn't jump to line 832 because the condition on line 829 was always true

830 amp2[:, tile_i] = amp[:, tile_i] / res_mean

831 else:

832 amp2[:, tile_i] = 0

833

834 bandpass_single = np.empty(freq_use.size)

835 # If this is slow, resistant_mean can be vectorized

836 for f_i in range(freq_use.size):

837 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2)

838

839 bandpass_arr[freq_use, pol_i + 1] = bandpass_single

840 # Work out the gain for the bandpass

841 gain2 = np.zeros(gain.shape, dtype=np.complex128)

842 gain3 = deepcopy(gain)

843 # Vectorize later if possible

844 for tile_i in range(obs["n_tile"]):

845 gain2[freq_use, tile_i] = bandpass_single

846 gain3[freq_use, tile_i] /= bandpass_single

847

848 cal_bandpass_gain[pol_i, :, :] = gain2

849 cal_remainder_gain[pol_i, :, :] = gain3

850

851 cal_bandpass["gain"] = cal_bandpass_gain

852 cal_remainder["gain"] = cal_remainder_gain

853 return cal_bandpass, cal_remainder

854

855

856def vis_cal_polyfit(

857 obs: dict,

858 cal: dict,

859 auto_ratio: NDArray[np.float64] | None,

860 pyfhd_config: dict,

861 logger: Logger,

862) -> dict:

863 """

864 Reduce the degrees of freedom on the per-frequency calibration amplitudes and phases by fitting

865 the full frequency band with polynomials of a specified degree, with options for split polynomials

866 over certain regions.

867

868 In addition, fit cable reflections with a text file of fits, theoretical fits given cable length/type,

869 or the finding the maximum in delay space. Any of these can then be used as an initial estimate in a

870 hyperresolved FFT of the residual calibration solutions to fit the final mode, phase, and amplitude.

871 Residual calibration solutions can optionally be made by using an incoherent mean, or a mean over

872 all tiles which *do not* have the same cable length/type, to reduce bias in the residual, and

873 the cable reflections can also be fit using just the phases to reduce dependency on the polyphase

874 filter bank shape.

875

876 Parameters

877 ----------

878 obs : dict

879 Observation metadata dictionary

880 cal : dict

881 Calibration dictionary

882 auto_ratio: NDArray[np.float64] | None

883 The estimation of the antenna-dependent bias through the square root of the autocorrelation

884 visibilities normalized via a reference tile

885 pyfhd_config : dict

886 PyFHD's configuration dictionary containing all the options set for a PyFHD run

887 logger : Logger

888 PyFHD's logger for displaying errors and info to the log files

889

890 Returns

891 -------

892 cal : dict

893 Calibration dictionary with polynomial gain fits

894 pyfhd_config : dict

895 PyFHD's configuration dictionary containing all the options set for a PyFHD run

896 Is returned in case the digital_gain_jump_polyfit was set here.

897 """

898 # Keep the og_gain_arr for calculations later

899 og_gain_arr = np.copy(cal["gain"])

900 if ( 900 ↛ 914line 900 didn't jump to line 914 because the condition on line 900 was always true

901 pyfhd_config["cal_reflection_mode_theory"]

902 or pyfhd_config["cal_reflection_mode_file"]

903 or pyfhd_config["cal_reflection_mode_delay"]

904 or pyfhd_config["cal_reflection_hyperresolve"]

905 ):

906 if ( 906 ↛ 912line 906 didn't jump to line 912 because the condition on line 906 was always true

907 pyfhd_config["cal_reflection_mode_theory"]

908 and abs(pyfhd_config["cal_reflection_mode_theory"]) > 1

909 ):

910 cal_mode_fit = pyfhd_config["cal_reflection_mode_theory"]

911 else:

912 cal_mode_fit = True

913 else:

914 cal_mode_fit = False

915 freq_use = np.where(obs["baseline_info"]["freq_use"])[0]

916 tile_use = np.where(obs["baseline_info"]["tile_use"])[0]

917

918 pre_dig_inds = np.where(obs["baseline_info"]["freq"][freq_use] < 187.515e6)

919 if pre_dig_inds[0].size == 0: 919 ↛ 920line 919 didn't jump to line 920 because the condition on line 919 was never true

920 logger.warning(

921 "No frequencies below 187.515MHz, using full band polyfit, digital gain jump polyfit disabled."

922 )

923 pyfhd_config["digital_gain_jump_polyfit"] = False

924 else:

925 f_d = np.max(pre_dig_inds[0])

926 f_end = freq_use.size

927

928 if ((f_d + 1) == f_end) and pyfhd_config["digital_gain_jump_polyfit"]: 928 ↛ 929line 928 didn't jump to line 929 because the condition on line 928 was never true

929 logger.warning(

930 "No frequency found above 187.515MHz, using full band polyfit, digital gain jump polyfit disabled."

931 )

932 pyfhd_config["digital_gain_jump_polyfit"] = False

933

934 # If the observation date is beyond the date where digital gain jump polyfit is required, disable it

935 # Date is 2014-07-23 (23rd July 2014) or Julian Day 2456861.5000000

936 if obs["jd0"] > 2456861.5000000 and pyfhd_config["digital_gain_jump_polyfit"]: 936 ↛ 937line 936 didn't jump to line 937 because the condition on line 936 was never true

937 logger.warning(

938 "Observation date is beyond the date where digital gain jump polyfit is required, using full band polyfit, digital gain jump polyfit disabled."

939 )

940 pyfhd_config["digital_gain_jump_polyfit"] = False

941

942 # If the amp_degree or phase_degree weren't used, then apply the defaults

943 if not pyfhd_config["cal_amp_degree_fit"]: 943 ↛ 944line 943 didn't jump to line 944 because the condition on line 943 was never true

944 pyfhd_config["cal_amp_degree_fit"] = 2

945 if not pyfhd_config["cal_phase_degree_fit"]: 945 ↛ 946line 945 didn't jump to line 946 because the condition on line 945 was never true

946 pyfhd_config["cal_phase_degree_fit"] = 1

947

948 # If you wish to find any steps that are outliers beyond 5sigma, and remove them add that code here.

949 # The cal_step_fit option isn't in the eor_defaults_wrapper or defined in the FHD dictionary.

950

951 # Polynomial fitting over the frequency band

952 gain_residual = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"]))

953 # create amp_params with the shape of (n_pol, n_tile, amp_degree + 1)

954 # If we're using the digital_gain_jump_polyfit add extra dimension to amp_params

955 if pyfhd_config["digital_gain_jump_polyfit"]: 955 ↛ 965line 955 didn't jump to line 965 because the condition on line 955 was always true

956 cal["amp_params"] = np.empty(

957 (

958 cal["n_pol"],

959 obs["n_tile"],

960 pyfhd_config["cal_amp_degree_fit"],

961 pyfhd_config["cal_amp_degree_fit"],

962 )

963 )

964 else:

965 cal["amp_params"] = np.empty(

966 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_amp_degree_fit"] + 1)

967 )

968 cal["phase_params"] = np.empty(

969 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_phase_degree_fit"] + 1)

970 )

971 for pol_i in range(cal["n_pol"]):

972 gain_arr = np.copy(cal["gain"][pol_i])

973 gain_amp = np.abs(gain_arr)

974 gain_phase = np.arctan2(gain_arr.imag, gain_arr.real)

975 for tile_i in range(obs["n_tile"]):

976 gain = np.squeeze(gain_amp[freq_use, tile_i])

977 gain_fit = np.zeros(obs["n_freq"])

978 # Pre and post digital gain jump separately for highband MWA data

979 if pyfhd_config["digital_gain_jump_polyfit"]: 979 ↛ 1025line 979 didn't jump to line 1025 because the condition on line 979 was always true

980 if pre_dig_inds[0].size > 0: 980 ↛ 1020line 980 didn't jump to line 1020 because the condition on line 980 was always true

981 fit_params1 = (

982 np.polynomial.Polynomial.fit(

983 freq_use[0 : f_d + 1],

984 gain[0 : f_d + 1],

985 deg=pyfhd_config["cal_amp_degree_fit"] - 1,

986 )

987 .convert()

988 .coef

989 )

990 fit_params2 = (

991 np.polynomial.Polynomial.fit(

992 freq_use[f_d + 1 : f_end],

993 gain[f_d + 1 : f_end],

994 deg=pyfhd_config["cal_amp_degree_fit"] - 1,

995 )

996 .convert()

997 .coef

998 )

999 for di in range(pyfhd_config["cal_amp_degree_fit"]):

1000 gain_fit[freq_use[0] : freq_use[f_d] + 1] += fit_params1[di] * (

1001 # Had to use the size as use freq_use[f_d] could cause

1002 # off by one indexing errors (due to freq_use[0] potentially being

1003 # 1 or 0 which throws off the np.arange indexing)

1004 np.arange(gain_fit[freq_use[0] : freq_use[f_d] + 1].size)

1005 ** di

1006 )

1007 gain_fit[freq_use[f_d + 1] : freq_use[f_end - 1] + 1] += (

1008 fit_params2[di]

1009 * (

1010 np.arange(freq_use[f_end - 1] - freq_use[f_d + 1] + 1)

1011 + freq_use[f_d + 1]

1012 )

1013 ** di

1014 )

1015 # Do notice this is saving the coefficients on a per row basis, so fit_params1 a,b will be [pol_i, tile_i, 0, :]

1016 # While fit_params2 a, b coefficients will be in [pol_i, tile_i, 1, :]

1017 fit_params = np.vstack([fit_params1, fit_params2])

1018 cal["amp_params"][pol_i, tile_i] = fit_params

1019 else:

1020 logger.warning(

1021 "digital_gain_jump_polyfit only works with highband mwa data. Full band polyfit applied instead."

1022 )

1023 # Fit for amplitude

1024 else:

1025 fit_params = (

1026 np.polynomial.Polynomial.fit(

1027 freq_use, gain, deg=pyfhd_config["cal_amp_degree_fit"]

1028 )

1029 .convert()

1030 .coef

1031 )

1032 cal["amp_params"][pol_i, tile_i, :] = fit_params

1033 for di in range(pyfhd_config["cal_amp_degree_fit"]):

1034 gain_fit += fit_params[di] * np.arange(obs["n_freq"]) ** di

1035

1036 gain_residual[pol_i, :, tile_i] = gain_amp[:, tile_i] - gain_fit

1037

1038 # Fit for phase

1039 phase_use = np.unwrap(np.squeeze(gain_phase[freq_use, tile_i]))

1040 phase_params = (

1041 np.polynomial.Polynomial.fit(

1042 freq_use, phase_use, pyfhd_config["cal_phase_degree_fit"]

1043 )

1044 .convert()

1045 .coef

1046 )

1047 cal["phase_params"][pol_i, tile_i, :] = phase_params

1048 phase_fit = np.zeros(obs["n_freq"])

1049 for di in range(phase_params.size):

1050 phase_fit += phase_params[di] * np.arange(obs["n_freq"]) ** di

1051 gain_arr[:, tile_i] = gain_fit * np.exp(1j * phase_fit)

1052 cal["gain"][pol_i] = gain_arr

1053 # Cable Reflection Fitting

1054 if cal_mode_fit: 1054 ↛ 1284line 1054 didn't jump to line 1284 because the condition on line 1054 was always true

1055 if pyfhd_config["cal_reflection_mode_file"]: 1055 ↛ 1056line 1055 didn't jump to line 1056 because the condition on line 1055 was never true

1056 logger.info(

1057 "Using mwa calibration reflections fits from instrument_config/mwa_cable_reflection_coefficients.txt."

1058 )

1059 cable_len_filepath = importlib_resources.files(

1060 "PyFHD.resources.instrument_config"

1061 ).joinpath(

1062 f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt"

1063 )

1064 cable_reflections = np.loadtxt(cable_len_filepath, skiprows=1).transpose()

1065 cable_length = cable_reflections[2]

1066 tile_ref_flag = np.minimum(

1067 np.maximum(np.zeros_like(cable_reflections[4]), cable_reflections[4]),

1068 np.ones_like(cable_reflections[4]),

1069 )

1070 tile_mode_X = cable_reflections[5]

1071 tile_amp_X = cable_reflections[6]

1072 tile_phase_X = cable_reflections[7]

1073 tile_mode_Y = cable_reflections[8]

1074 tile_amp_Y = cable_reflections[9]

1075 tile_phase_Y = cable_reflections[10]

1076

1077 # Modes in fourier transform units

1078 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"]))

1079 mode_i_arr[0, :] = tile_mode_X * tile_ref_flag

1080 mode_i_arr[1, :] = tile_mode_Y * tile_ref_flag

1081

1082 amp_arr = np.vstack([tile_amp_X, tile_amp_Y])

1083 phase_arr = np.vstack[[tile_phase_X, tile_phase_Y]]

1084

1085 elif pyfhd_config["cal_reflection_mode_theory"]: 1085 ↛ 1112line 1085 didn't jump to line 1112 because the condition on line 1085 was always true

1086 logger.info(

1087 "Using theory calculation in nominal reflection mode calibration."

1088 )

1089 # Get the nominal tile lengths and velocity factors

1090 cable_len_filepath = importlib_resources.files(

1091 "PyFHD.resources.instrument_config"

1092 ).joinpath(f"{pyfhd_config['instrument']}_cable_length.txt")

1093 cable_length_data = np.loadtxt(cable_len_filepath, skiprows=1).transpose()

1094 cable_length = cable_length_data[2]

1095 cable_vf = cable_length_data[3]

1096 tile_ref_flag = np.minimum(np.maximum(0, cable_length_data[4]), 1)

1097

1098 # Nominal Reflect Time

1099 reflect_time = (2 * cable_length) / (c.value * cable_vf)

1100 bandwidth = (

1101 (

1102 np.max(obs["baseline_info"]["freq"])

1103 - np.min(obs["baseline_info"]["freq"])

1104 )

1105 * obs["n_freq"]

1106 ) / (obs["n_freq"] - 1)

1107 # Modes in fourier transform units

1108 mode_i_arr = np.tile(

1109 bandwidth * reflect_time * tile_ref_flag, [cal["n_pol"], 1]

1110 )

1111

1112 elif pyfhd_config["cal_reflection_mode_delay"]:

1113 logger.info(

1114 "Using calibration delay spectrum to calculate nominal reflection modes."

1115 )

1116 spec_mask = np.zeros(obs["n_freq"])

1117 spec_mask[freq_use] = 1

1118 freq_cut = np.where(spec_mask == 0)

1119 # IDL uses forward FFT by default

1120 spec_psf = np.abs(np.fft.fftn(spec_mask, norm="forward"))

1121 spec_inds = np.arange(obs["n_freq"] // 2)

1122 spec_psf = spec_psf[spec_inds]

1123 mode_test = np.zeros(obs["n_freq"] // 2)

1124 for pol_i in range(cal["n_pol"]):

1125 for ti in range(tile_use.size):

1126 tile_i = tile_use[ti]

1127 spec0 = np.abs(np.fft.fftn(gain_residual[pol_i, tile_i]))

1128 mode_test += spec0[spec_inds]

1129 psf_mask = np.zeros(obs["n_freq"] // 2)

1130

1131 if freq_cut[0].size > 0:

1132 psf_mask[np.where(spec_psf > (np.max(spec_psf) / 1000))] = 1

1133 # Replaces IDL smooth with edge_truncate

1134 psf_mask = uniform_filter(psf_mask, size=5, mode="nearest")

1135 mask_i = np.nonzero(psf_mask)

1136 if mask_i[0].size > 0:

1137 mode_test[mask_i] = 0

1138 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"])) + np.argmax(mode_test)

1139

1140 # Fit only certain cable lengths

1141 # Positive length indicates fit mode, negative length indicates exclude mode

1142 # This is currently assuming cal_mode_fit is an integer or number, not an array!

1143 # If you need an array to fit or exclude cable lengths, then create another option for it

1144 # in the config and adjust the code accordingly. Ensure every config option only has one purpose.

1145 if auto_ratio is None and cal_mode_fit != 1:

1146 tile_ref_logic = np.zeros(obs["n_tile"])

1147 if cal_mode_fit > 0: 1147 ↛ 1152line 1147 didn't jump to line 1152 because the condition on line 1147 was always true

1148 cable_cut_i = np.where(cable_length != cal_mode_fit)

1149 if cable_cut_i[0].size > 0: 1149 ↛ 1155line 1149 didn't jump to line 1155 because the condition on line 1149 was always true

1150 tile_ref_logic[cable_cut_i] = 1

1151 else:

1152 cable_cut_i = np.where(cable_length == abs(cal_mode_fit))

1153 if cable_cut_i[0].size > 0:

1154 tile_ref_logic[cable_cut_i] = 1

1155 cable_cut = np.nonzero(tile_ref_logic)

1156 tile_ref_flag = np.ones(obs["n_tile"])

1157 if cable_cut[0].size > 0: 1157 ↛ 1159line 1157 didn't jump to line 1159 because the condition on line 1157 was always true

1158 tile_ref_flag[cable_cut] = 0

1159 mode_i_arr *= tile_ref_flag

1160

1161 cal["mode_params"] = np.empty([cal["n_pol"], obs["n_tile"], 3])

1162 for pol_i in range(cal["n_pol"]):

1163 # Divide the polyfit to reveal the residual cable reflections better

1164 gain_arr = og_gain_arr[pol_i] / cal["gain"][pol_i]

1165 for ti in range(tile_use.size):

1166 tile_i = tile_use[ti]

1167 mode_i = mode_i_arr[pol_i, tile_i]

1168 if mode_i == 0:

1169 continue

1170 else:

1171 # Options to hyperresolve or fit the reflection modes/amp/phase given the nominal calculations

1172 if pyfhd_config["cal_reflection_hyperresolve"]: 1172 ↛ 1256line 1172 didn't jump to line 1256 because the condition on line 1172 was always true

1173 # start with nominal cable length

1174 mode0 = mode_i

1175 # overresolve the FT used for the fit (normal resolution would be dmode=1)

1176 dmode = 0.05

1177 # range around the central mode to test

1178 nmodes = 101

1179 # array of modes to try

1180 modes = (np.arange(nmodes) - nmodes // 2) * dmode + mode0

1181 # reshape for ease of computing

1182 modes = rebin(modes, (freq_use.size, nmodes)).T

1183

1184 if auto_ratio is not None:

1185 # Find tiles which will *not* be accidently coherent in their cable reflection in order to reduce bias

1186 inds = np.where(

1187 (obs["baseline_info"]["tile_use"])

1188 & (mode_i_arr[pol_i, :] > 0)

1189 & ((np.abs(mode_i_arr[pol_i, :] - mode_i)) > 0.01)

1190 )

1191 # mean over frequency for each tile

1192 freq_mean = np.nanmean(auto_ratio[pol_i], axis=0)

1193 norm_autos = auto_ratio[pol_i] / rebin(

1194 freq_mean, (obs["n_freq"], obs["n_tile"])

1195 )

1196 # mean over all tiles which *are not* accidently coherent as a func of freq

1197 incoherent_mean = np.nanmean(norm_autos[:, inds[0]], axis=1)

1198 # Residual and normalized (using incoherent mean) auto-correlation

1199 resautos = (

1200 norm_autos[:, tile_i] / incoherent_mean

1201 ) - np.nanmean(norm_autos[:, tile_i] / incoherent_mean)

1202 gain_temp = rebin(

1203 resautos[freq_use], (nmodes, freq_use.size)

1204 )

1205 else:

1206 # dimension manipulation, add dim for mode fitting

1207 # Subtract the mean so aliasing is reduced in the dft cable fitting

1208 gain_temp = rebin(

1209 gain_arr[freq_use, tile_i]

1210 - np.mean(gain_arr[freq_use, tile_i]),

1211 (nmodes, freq_use.size),

1212 )

1213 # freq_use matrix to multiply/collapse in fit

1214 freq_mat = rebin(freq_use, (nmodes, freq_use.size))

1215 # Perform DFT of gains to test modes

1216 test_fits = np.sum(

1217 np.exp(1j * 2 * np.pi / obs["n_freq"] * modes * freq_mat)

1218 * gain_temp,

1219 axis=1,

1220 )

1221 # Pick out highest amplitude fit (mode_ind gives the index of the mode)

1222 amp_use = np.max(np.abs(test_fits)) / freq_use.size

1223 mode_ind = np.argmax(np.abs(test_fits))

1224 # Phase of said fit

1225 phase_use = np.arctan2(

1226 test_fits[mode_ind].imag, test_fits[mode_ind].real

1227 )

1228 mode_i = modes[mode_ind, 0]

1229

1230 # Using the mode selected from the gains, optionally use the phase to find the amp and phase

1231 if auto_ratio is not None:

1232 # Find tiles which will not be accidently coherent in their cable reflection in order to reduce bias

1233 inds = np.where(

1234 (obs["baseline_info"]["tile_use"])

1235 & (mode_i_arr[pol_i, :] > 0)

1236 & (np.abs(mode_i_arr[pol_i, :] - mode_i) > 0.01)

1237 )

1238 residual_phase = np.arctan2(

1239 gain_arr[freq_use, :].imag, gain_arr[freq_use, :].real

1240 )

1241 incoherent_residual_phase = residual_phase[

1242 :, tile_i

1243 ] - np.nanmean(residual_phase[:, inds[0]], axis=1)

1244 test_fits = np.sum(

1245 np.exp(

1246 1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use

1247 )

1248 * incoherent_residual_phase

1249 )

1250 # Factor of 2 from fitting just the phase

1251 amp_use = 2 * np.abs(test_fits) / freq_use.size

1252 # Factor of pi/2 from just fitting the phase

1253 phase_use = (

1254 np.arctan2(test_fits.imag, test_fits.real) + np.pi / 2

1255 )

1256 elif pyfhd_config["cal_reflection_mode_file"]:

1257 # Use predetermined fits

1258 amp_use = amp_arr[pol_i, tile_i]

1259 phase_use = phase_arr[pol_i, tile_i]

1260 else:

1261 # Use nominal delay mode, but fit amplitude and phase of reflections

1262 mode_fit = np.sum(

1263 np.exp(1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use)

1264 * gain_arr[freq_use, tile_i]

1265 )

1266 amp_use = np.abs(mode_fit) / freq_use[0].size

1267 phase_use = np.arctan2(mode_fit.imag, mode_fit.real)

1268

1269 gain_mode_fit = amp_use * np.exp(

1270 -1j

1271 * 2

1272 * np.pi

1273 * (mode_i * np.arange(obs["n_freq"]) / obs["n_freq"])

1274 + 1j * phase_use

1275 )

1276 if auto_ratio is not None:

1277 # Only fit for the cable reflection in the phases

1278 cal["gain"][pol_i, :, tile_i] *= np.exp(1j * gain_mode_fit.imag)

1279 else:

1280 cal["gain"][pol_i, :, tile_i] *= 1 + gain_mode_fit

1281 cal["mode_params"][pol_i, tile_i] = np.array(

1282 [mode_i, amp_use, phase_use]

1283 )

1284 return cal, pyfhd_config

1285

1286

1287def vis_cal_auto_fit(

1288 obs: dict,

1289 cal: dict,

1290 vis_auto: NDArray[np.float64],

1291 vis_auto_model: NDArray[np.float64],

1292 auto_tile_i: NDArray[np.integer],

1293) -> dict:

1294 """

1295 Solve for each tile's calibration amplitude via the square root of the ratio of the data autocorrelation

1296 to the model autocorrelation using the definition of a gain. Then, remove dependence on the correlated

1297 noise term in the autocorrelations by scaling this amplitude down to the crosscorrelations gain via a

1298 simple, linear fit. Build a full, scaled autocorrelation gain solution by adding in the phase term via

1299 the crosscorrelation gains.

1300

1301 Parameters

1302 ----------

1303 obs : dict

1304 Observation metadata dictionary

1305 cal : dict

1306 Calibration dictionary

1307 vis_auto : NDArray[np.float64]

1308 Data autocorrelations

1309 vis_auto_model : NDArray[np.float64]

1310 Simulated model autocorrelations

1311 auto_tile_i : NDArray[np.integer]

1312 Index array of the tile array that have defined autocorrelations

1313

1314 Returns

1315 -------

1316 cal: dict

1317 Calibration dictionary with scaled autocorrelation gains

1318 """

1319 freq_i_use = np.nonzero(obs["baseline_info"]["freq_use"])

1320 freq_i_flag = np.where(obs["baseline_info"]["freq_use"] == 0)[0]

1321 # If the number of frequencies not being used is above 0, then ignore the frequencies surrounding them.

1322 if freq_i_flag.size > 0: 1322 ↛ 1334line 1322 didn't jump to line 1334 because the condition on line 1322 was always true

1323 freq_flag = np.zeros(obs["n_freq"])

1324 freq_flag[freq_i_use] = 1

1325 for freq_i in range(freq_i_flag.size):

1326 minimum = max(0, freq_i_flag[freq_i] - 1)

1327 maximum = min(obs["n_freq"], freq_i_flag[freq_i] + 2)

1328 freq_flag[minimum:maximum] = 0

1329 freq_i_use = np.nonzero(freq_flag)

1330 # Vectorized loop for via_cal_auto_fit lines 45-55 in IDL

1331 # However the logic still indexes the full 128 tiles, so need to shove

1332 # outputs into an empty array of correct size

1333 # We're not using the cross polarizations if they are present

1334 auto_gain = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"]))

1335 auto_gain[:, :, auto_tile_i] = np.sqrt(

1336 vis_auto[: cal["n_pol"]] * weight_invert(vis_auto_model[: cal["n_pol"]])

1337 )

1338 gain_cross = cal["gain"]

1339 fit_slope = np.empty((cal["n_pol"], obs["n_tile"]))

1340 fit_offset = np.empty_like(fit_slope)

1341

1342 # Didn't vectorize as the polyfit won't be vectorized

1343 for pol_i in range(cal["n_pol"]):

1344 for tile in range(auto_tile_i.size):

1345 tile_idx = auto_tile_i[tile]

1346 phase_cross_single = np.arctan2(

1347 gain_cross[pol_i, :, tile_idx].imag, gain_cross[pol_i, :, tile_idx].real

1348 )

1349

1350 gain_auto_single = np.abs(auto_gain[pol_i, :, tile_idx])

1351 gain_cross_single = np.abs(gain_cross[pol_i, :, tile_idx])

1352

1353 # mask out any NaN values; numpy doesn't like them,

1354 # I assume the IDL equiv function just masks them?

1355 # or maybe we need to do a catch for NaNs here, and abandon all

1356 # hope for a fit if there are NaNs?

1357 notnan = np.where(

1358 (np.isnan(gain_auto_single[freq_i_use]) != True)

1359 & (np.isnan(gain_cross_single[freq_i_use]) != True)

1360 )

1361 gain_auto_single_fit = gain_auto_single[freq_i_use][notnan]

1362 gain_cross_single_fit = gain_cross_single[freq_i_use][notnan]

1363

1364 # linfit from IDL uses chi-square error calculations to do the linear fit, instead of least squares.

1365 # The polynomial fit uses least square method

1366 x = np.vstack([gain_auto_single_fit, np.ones(gain_auto_single_fit.size)]).T

1367 fit_single = np.linalg.lstsq(x, gain_cross_single_fit, rcond=None)[0]

1368 # IDL gives the solution in terms of [A, B] while Python does [B, A] assuming we're

1369 # solving the equation y = A + Bx

1370 cal["gain"][pol_i, :, tile_idx] = (

1371 gain_auto_single * fit_single[0] + fit_single[1]

1372 ) * np.exp(1j * phase_cross_single)

1373 fit_slope[pol_i, tile_idx] = fit_single[0]

1374 fit_offset[pol_i, tile_idx] = fit_single[1]

1375 cal["auto_scale"] = np.sum(fit_slope, axis=1) / auto_tile_i.size

1376 cal["auto_params"] = np.empty([cal["n_pol"], cal["n_pol"], obs["n_tile"]])

1377 cal["auto_params"][0, :, :] = fit_offset

1378 cal["auto_params"][1, :, :] = fit_slope

1379 return cal

1380

1381

1382def vis_calibration_apply(

1383 vis_arr: NDArray[np.complex128],

1384 obs: dict,

1385 cal: dict,

1386 vis_model_arr: NDArray[np.complex128],

1387 vis_weights: NDArray[np.float64],

1388 logger: Logger,

1389) -> tuple[NDArray[np.complex128], dict]:

1390 """

1391 Apply the calibration solutions to the input, data visibilities to create calibrated, data visibilities using

1392 the definition of the gains.

1393

1394 Definition of the gain:

1395 (visibility for baseline of tile i and tile j) = (gain of tile i) (gain of tile j) (model visibility for baseline of tile i and tile j)

1396

1397 If only two othogonal polarizations were used to calibrate, calculate the phase offset between the two orthogonal dipoles to

1398 solve for a degeneracy in the cross polarizations. The formula can be derived by multiplying the gains by an equal and opposite

1399 phase in the linear least square solver and solving for the phase when the partial derivative w.r.t the offset is 0.

1400

1401 Parameters

1402 ----------

1403 vis_arr : NDArray[np.complex128]

1404 Uncalibrated data visiblities

1405 obs : dict

1406 Observation metadata dictionary

1407 cal : dict

1408 Calibration dictionary

1409 vis_model_arr : NDArray[np.complex128]

1410 Simulated model visibilites

1411 vis_weights : NDArray[np.float64]

1412 Weights (flags) of the visibilities

1413 logger : Logger

1414 PyFHD's logger for displaying errors and info to the log files

1415

1416 Returns

1417 -------

1418 vis_arr : NDArray[np.complex128]

1419 Calibrated visibilities

1420 cal : dict

1421 Calibration dictionary

1422 """

1423 # tile numbering starts at 1

1424 tile_a_i = obs["baseline_info"]["tile_a"] - 1

1425 tile_b_i = obs["baseline_info"]["tile_b"] - 1

1426

1427 n_pol_vis = vis_arr.shape[0]

1428 gain_pol_arr1 = [0, 1, 0, 1]

1429 gain_pol_arr2 = [0, 1, 1, 0]

1430

1431 # OK, it really makes sense to use native python functionality here

1432 # We're just trying to match up the frequency-dependent gains to the

1433 # correct baselines, and apply them. Can use `meshgrid` here instead of

1434 # `rebin`, which will make 2D indexing arrays, so we can directly leave

1435 # the gain arrays in the correct shape and index the directly. Using `rebin`

1436 # means we have to flatten them

1437 (

1438 inds_a_baseline,

1439 inds_a_freqs,

1440 ) = np.meshgrid(tile_a_i, np.arange(obs["n_freq"]))

1441 inds_b_baseline, inds_b_freqs = np.meshgrid(tile_b_i, np.arange(obs["n_freq"]))

1442

1443 for pol_i in range(n_pol_vis):

1444 gain_arr1 = cal["gain"][gain_pol_arr1[pol_i], :, :]

1445 gain_arr2 = cal["gain"][gain_pol_arr2[pol_i], :, :]

1446

1447 vis_gain = gain_arr1[inds_a_freqs, inds_a_baseline] * np.conjugate(

1448 gain_arr2[inds_b_freqs, inds_b_baseline]

1449 )

1450

1451 vis_arr[pol_i, :, :] *= weight_invert(vis_gain, use_abs=False)

1452

1453 # We haven't run FHD in a way that uses 4 pols yet so this is all

1454 # untested

1455 if n_pol_vis == 4:

1456 if type(vis_model_arr) == np.ndarray and type(vis_weights) == np.ndarray: 1456 ↛ 1506line 1456 didn't jump to line 1506 because the condition on line 1456 was always true

1457 # This if statement replaces vis_calibrate_crosspol_phase

1458 # as this was the only place where the function was used

1459 # Note inside vis_calibrate_crosspol_phase there is a

1460 # if n_pol_vis == 4 check hence only run if n_pol_vis == 4

1461 # This code should calculate the phase fit between the X and Y

1462 # antenna polarizations.

1463 # Use the xx flags (yy should be identitical at this point)

1464

1465 # this is num baselines per time step

1466 n_baselines = int(len(tile_a_i) / obs["n_times"])

1467 # reshape from (n_freq, n_baselines*n_times) to (n_freq, n_times, n_baselines). Turns out due to the row major vs col major difference

1468 # between IDL and python, this shape also changes

1469 new_shape = (obs["n_freq"], obs["n_times"], n_baselines)

1470 weights_use = np.reshape(vis_weights[0, :, :], new_shape)

1471 # carried over from FHD code

1472 weights_use = np.maximum(weights_use, np.zeros_like(weights_use))

1473 weights_use = np.minimum(weights_use, np.ones_like(weights_use))

1474

1475 # Average the visbilities in time

1476 axis_avg = 1

1477 vis_xy = np.reshape(vis_arr[2, :, :], new_shape)

1478 vis_xy = np.sum(vis_xy * weights_use, axis=axis_avg)

1479 vis_yx = np.reshape(vis_arr[3, :, :], new_shape)

1480 vis_yx = np.sum(vis_yx * weights_use, axis=axis_avg)

1481

1482 model_xy = np.reshape(vis_model_arr[2, :, :], new_shape)

1483 model_xy = np.sum(model_xy * weights_use, axis=axis_avg)

1484 model_yx = np.reshape(vis_model_arr[3, :, :], new_shape)

1485 model_yx = np.sum(model_yx * weights_use, axis=axis_avg)

1486

1487 # Remove Zeros

1488 weight = np.sum(weights_use, axis=axis_avg)

1489 i_use = np.nonzero(weight)

1490 vis_xy = np.squeeze(vis_xy[i_use])

1491 vis_yx = np.squeeze(vis_yx[i_use])

1492 model_xy = np.squeeze(model_xy[i_use])

1493 model_yx = np.squeeze(model_yx[i_use])

1494

1495 vis_sum = np.sum(np.conjugate(vis_xy) * model_xy) + np.sum(

1496 vis_yx * np.conjugate(model_yx)

1497 )

1498 cross_phase = np.arctan2(vis_sum.imag, vis_sum.real)

1499

1500 logger.info(

1501 f"Phase fit between X and Y antenna polarizations: {cross_phase}"

1502 )

1503

1504 cal["cross_phase"] = cross_phase

1505

1506 vis_arr[2, :, :] *= np.exp(1j * 0)

1507 vis_arr[3, :, :] *= np.exp(-1j * 0)

1508

1509 return vis_arr, cal

1510

1511

1512def vis_baseline_hist(

1513 obs: dict,

1514 params: dict,

1515 vis_cal: NDArray[np.complex128],

1516 vis_model_arr: NDArray[np.complex128],

1517) -> dict:

1518 """

1519 Create diagnostic histograms of both the mean and sigma for the percent difference between calibrated

1520 data and simulated model visibilities as a function of baseline length.

1521

1522 Parameters

1523 ----------

1524 obs : dict

1525 Observation metadata dictionary

1526 params : dict

1527 Visibility metadata dictionary

1528 vis_cal : NDArray[np.complex128]

1529 Calibrated data visibilities

1530 vis_model_arr : NDArray[np.complex128]

1531 Simulated model visibilites

1532

1533 Returns

1534 -------

1535 vis_baseline_hist : dict

1536 Dictionary of the mean and sigma histograms for the percent difference between calibrated data

1537 and simulated model visibilities as a function of baseline length.

1538 """

1539 kx_arr = params["uu"] / obs["kpix"]

1540 ky_arr = params["vv"] / obs["kpix"]

1541 kr_arr = np.sqrt(kx_arr**2 + ky_arr**2)

1542 # take the transpose of this, given our `vis_cal` and `vis_mode_arr` are the

1543 # transpose of the original FHD code

1544 dist_arr = np.outer(kr_arr, obs["baseline_info"]["freq"]).transpose() * obs["kpix"]

1545 dist_hist, bins, dist_ri = histogram(

1546 dist_arr, min=obs["min_baseline"], max=obs["max_baseline"], bin_size=5.0

1547 )

1548

1549 vis_res_ratio_mean = np.zeros([obs["n_pol"], bins.size])

1550 vis_res_sigma = np.zeros([obs["n_pol"], bins.size])

1551

1552 for pol_i in range(obs["n_pol"]):

1553 for bin_i in range(bins.size):

1554 if dist_hist[bin_i] > 0: 1554 ↛ 1575line 1554 didn't jump to line 1575 because the condition on line 1554 was always true

1555 inds = dist_ri[dist_ri[bin_i] : dist_ri[bin_i + 1]]

1556 model_vals = (vis_model_arr[pol_i]).flatten()[inds]

1557 wh_noflag = np.where(np.abs(model_vals) > 0)[0]

1558 if wh_noflag.size > 0: 1558 ↛ 1561line 1558 didn't jump to line 1561 because the condition on line 1558 was always true

1559 inds = inds[wh_noflag]

1560 else:

1561 continue

1562 # if Keyword_Set(calibration_visibilities_subtract) THEN BEGIN

1563 # but calibration_visibilities_subtract isn't a function keyword

1564 # so we'll only translate the False of that statement

1565 # vis_cal_use = (vis_cal[pol_i].transpose()).flatten()[inds]

1566 vis_cal_use = (vis_cal[pol_i]).flatten()[inds]

1567 vis_res_ratio_mean[pol_i, bin_i] = np.mean(

1568 np.abs(vis_cal_use - model_vals)

1569 ) / np.mean(np.abs(model_vals))

1570 vis_res_sigma[pol_i, bin_i] = np.sqrt(

1571 np.var(np.abs(vis_cal_use - model_vals))

1572 ) / np.mean(np.abs(model_vals))

1573

1574 else:

1575 continue

1576 return {

1577 "baseline_length": bins.size,

1578 "vis_res_ratio_mean": vis_res_ratio_mean,

1579 "vis_res_sigma": vis_res_sigma,

1580 }

1581

1582

1583def cal_auto_ratio_divide(

1584 obs: dict,

1585 cal: dict,

1586 vis_auto: NDArray[np.float64],

1587 auto_tile_i: NDArray[np.integer],

1588) -> tuple[dict, NDArray[np.float64]]:

1589 """

1590 Remove antenna-dependent parameters (i.e. cable reflections) from the calculated gains

1591 to reduce the bias on individual tile variation before the creation of averaged quantities

1592 like the global bandpass. Antenna-dependent parameters are estimated from the square root of the

1593 autocorrelation visibilities normalized via a reference tile.

1594

1595 Parameters

1596 ----------

1597 obs : dict

1598 Observation metadata dictionary

1599 cal : dict

1600 Calibration dictionary

1601 vis_auto : NDArray[np.float64]

1602 Data autocorrelations

1603 auto_tile_i : NDArray[np.integer]

1604 Index array of the tile array that have defined autocorrelations

1605

1606 Returns

1607 -------

1608 cal : dict

1609 Calibration dictionary with gains that have reduced antenna-dependent bias

1610 auto_ratio : NDArray[np.float64]

1611 The estimation of the antenna-dependent bias through the square root of the autocorrelation

1612 visibilities normalized via a reference tile.

1613 """

1614

1615 auto_ratio = np.empty([cal["n_pol"], obs["n_freq"], obs["n_tile"]])

1616 # This should be possible to Vectorize if it's slow

1617 for pol_i in range(cal["n_pol"]):

1618 # fhd_struct_init_cal puts the ref_antenna as 1 if it's not set, which is never appears to be

1619 v0 = vis_auto[pol_i, :, auto_tile_i[cal["ref_antenna"]]]

1620 auto_ratio[pol_i, :, auto_tile_i] = np.sqrt(

1621 vis_auto[pol_i, :, np.arange(auto_tile_i.size)] * weight_invert(v0)

1622 )

1623 cal["gain"][pol_i] = cal["gain"][pol_i] * weight_invert(auto_ratio[pol_i])

1624 return cal, auto_ratio

1625

1626

1627def cal_auto_ratio_remultiply(

1628 cal: dict, auto_ratio: NDArray[np.float64], auto_tile_i: NDArray[np.integer]

1629) -> dict:

1630 """

1631 Return antenna-dependent parameters to the calculated gains after averaged quantities

1632 have been calculated. The antenna-dependent parameters are captured via the square root of

1633 the referenced autocorrelation visiblities calculated in cal_auto_ratio_divide.

1634

1635 Parameters

1636 ----------

1637 obs : dict

1638 Observation metadata dictionary

1639 cal : dict

1640 Calibration dictionary

1641 auto_ratio : NDArray[np.float64]

1642 Square root of the autocorrelation visibilities normalized via a reference tile

1643 auto_tile_i : NDArray[np.integer]

1644 Index array of the tile array that have defined autocorrelations

1645

1646 Returns

1647 -------

1648 cal: dict

1649 Calibration dictonary containing the reformed gain

1650 """

1651 # Replaced for loop in remultiply, this should remultiply by the auto_ratios

1652 cal["gain"][: cal["n_pol"], :, auto_tile_i] = cal["gain"][

1653 : cal["n_pol"], :, auto_tile_i

1654 ] * np.abs(auto_ratio[: cal["n_pol"], :, auto_tile_i])

1655 return cal

1656

1657

1658def calculate_adaptive_gain(

1659 gain_list: NDArray[np.float64],

1660 convergence_list: NDArray[np.float64],

1661 iter: int,

1662 base_gain: int | float,

1663 final_convergence_estimate: float | None = None,

1664):

1665 """

1666 Perform a Kalman filter to calculate the best weighting to use in the next iteration of the

1667 linear least squares fitting between the data and simulated model, which reduces the number

1668 of total iterations till convergence.

1669

1670 The Kalman filter takes previous convergence values, or the percent change in calibration solution from

1671 the previous iteration, to calculate the relative weight to give to the next calculated iteration versus

1672 the previous iteration. For example, a gain (relative weight in this context) of 1 would give the old

1673 solution and the new solution the same weighting, and thus the next guess in the linear least square solver

1674 is the average of the new and old solution. A gain (relative weight) of 2 would give the new solution twice

1675 as much weight at the old solution, and thus the next guess in the linear least square solver is the sum of

1676 the old solution and twice the new solution, all divided by three.

1677

1678 Parameters

1679 ----------

1680 gain_list : NDArray[np.float64]

1681 Relative weighting between the previous iteration and new iteration in the linear least squares

1682 solver for calibration solutions

1683 convergence_list : NDArray[np.float64]

1684 An array of the percent change in the calibration solutions between one iteration and the next

1685 iter : int

1686 The current iteration in the linear least squares solver

1687 base_gain : int|float

1688 The initial relative weighting between the previous iteration and new iteration in the linear

1689 least squares solver for calibration solutions

1690 final_convergence_estimate : float, optional

1691 The prediction of the percent change of the previous iteration and the forward model estimate from

1692 the Kalman filter, by default None

1693

1694 Returns

1695 -------

1696 gain : float

1697 Relative weighting between the previous iteration and the new iteration in the calibration

1698 linear least squares solver

1699 """

1700 if iter > 2:

1701 # To calculate the best gain to use, compare the past gains that have been used

1702 # with the resulting convergences to estimate the best gain to use.

1703 # Algorithmically, this is a Kalman filter.

1704 # If forward modeling proceeds perfectly, the convergence metric should

1705 # asymptotically approach a final value.

1706 # We can estimate that value from the measured changes in convergence

1707 # weighted by the gains used in each previous iteration.

1708 # For some applications such as calibration this may be known in advance.

1709 # In calibration, it is expressed as the change in a

1710 # value, in which case the final value should be zero.

1711 if final_convergence_estimate is None: 1711 ↛ 1712line 1711 didn't jump to line 1712 because the condition on line 1711 was never true

1712 est_final_conv = np.zeros(iter - 1)

1713 for i in range(iter - 1):

1714 final_convergence_test = (

1715 (1 + gain_list[i]) * convergence_list[i + 1] - convergence_list[i]

1716 ) / gain_list[i]

1717 # The convergence metric is strictly positive, so if the estimated final convergence is

1718 # less than zero, force it to zero.

1719 est_final_conv[i] = np.max((0, final_convergence_test))

1720 # Because the estimate may slowly change over time, only use the most recent measurements.

1721 final_convergence_estimate = np.median(est_final_conv[max(iter - 5, 0) :])

1722 last_gain = gain_list[iter - 1]

1723 last_conv = convergence_list[iter - 2]

1724 new_conv = convergence_list[iter - 1]

1725 # The predicted convergence is the value we would get if the new model calculated

1726 # in the previous iteration was perfect. Recall that the updated model that is

1727 # actually used is the gain-weighted average of the new and old model,

1728 # so the convergence would be similarly weighted.

1729 predicted_conv = (final_convergence_estimate * last_gain + last_conv) / (

1730 base_gain + last_gain

1731 )

1732 # If the measured and predicted convergence are very close, that indicates

1733 # that our forward model is accurate and we can use a more aggressive gain

1734 # If the measured convergence is significantly worse (or better!) than predicted,

1735 # that indicates that the model is not converging as expected and

1736 # we should use a more conservative gain.

1737 delta = (predicted_conv - new_conv) / (

1738 (last_conv - final_convergence_estimate) / (base_gain + last_gain)

1739 )

1740 new_gain = 1 - abs(delta)

1741 # Average the gains to prevent oscillating solutions.

1742 new_gain = (new_gain + last_gain) / 2

1743 # For some reason base_gain can be a numpy float in testing so putting in a tuple solves this.

1744 gain = np.max((base_gain / 2, new_gain))

1745

1746 else:

1747 gain = base_gain

1748 gain_list[iter] = gain

1749

1750 return gain

Coverage for PyFHD/calibration/calibration_utils.py: 62%

655 statements