Coverage for PyFHD/calibration/calibration_utils.py: 62%

655 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 10:58 +0800

1import numpy as np 

2from numpy.typing import NDArray 

3from logging import Logger 

4from PyFHD.pyfhd_tools.pyfhd_utils import ( 

5 resistant_mean, 

6 weight_invert, 

7 rebin, 

8 histogram, 

9) 

10from copy import deepcopy 

11from astropy.io import fits 

12from astropy.constants import c 

13from pathlib import Path 

14from scipy.ndimage import uniform_filter 

15import importlib_resources 

16 

17 

18def vis_extract_autocorr( 

19 obs: dict, 

20 vis_arr: NDArray[np.complex128], 

21 pyfhd_config: dict, 

22 auto_tile_i: NDArray[np.integer] | None = None, 

23) -> tuple[NDArray[np.float64], NDArray[np.integer]]: 

24 """ 

25 Extract the auto-correlations if they exist from the full visibility array. 

26 

27 Parameters 

28 ---------- 

29 obs : dict 

30 Observation metadata dictionary 

31 vis_arr : NDArray[np.complex128] 

32 Uncalibrated data visiblities 

33 pyfhd_config : dict 

34 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

35 auto_tile_i : NDArray[np.integer] | None, optional 

36 Index array for auto-correlation visibilities, by default None 

37 

38 Returns 

39 ------- 

40 autocorr: NDArray[np.float64] 

41 The auto-correlation visibilities 

42 auto_tile_i : NDArray[np.integer] 

43 The index array for auto-correlation visibilities 

44 """ 

45 

46 autocorr_i = np.where( 

47 obs["baseline_info"]["tile_a"] == obs["baseline_info"]["tile_b"] 

48 )[0] 

49 if autocorr_i.size > 0: 49 ↛ 96line 49 didn't jump to line 96 because the condition on line 49 was always true

50 auto_tile_i = obs["baseline_info"]["tile_a"][autocorr_i] - 1 

51 # As auto_tile_i is used for indexing we need to make it an integer array 

52 auto_tile_i = auto_tile_i.astype(np.integer) 

53 auto_tile_i_single = np.unique(auto_tile_i) 

54 # expect it as a list of 2D arrays, so there might be trouble 

55 if not pyfhd_config["cal_time_average"]: 

56 freq_tile_shape = np.real(vis_arr[0][:, autocorr_i]).shape 

57 auto_corr = np.zeros([obs["n_pol"]] + list(freq_tile_shape)) 

58 else: 

59 auto_corr = np.zeros((obs["n_pol"], obs["n_freq"], auto_tile_i_single.size)) 

60 

61 for pol_i in range(obs["n_pol"]): 

62 # Auto-correlations by definition are enirely real, so take the real part here 

63 # index this way in case people have vis_arr as one dimensional 

64 # array, containing two further 2D arrays, rather than a proper 3D 

65 # array. Turns out this indexing is consistent across both cases 

66 auto_vals = np.real(vis_arr[pol_i][:, autocorr_i]) 

67 if pyfhd_config["cal_time_average"]: 

68 auto_single = np.zeros((obs["n_freq"], auto_tile_i_single.size)) 

69 time_inds = np.where(obs["baseline_info"]["time_use"])[0] 

70 for tile_i in range(auto_tile_i_single.size): 

71 baseline_i = np.where(auto_tile_i == auto_tile_i_single[tile_i])[0] 

72 baseline_i = baseline_i[time_inds] 

73 if time_inds.size > 1: 73 ↛ 86line 73 didn't jump to line 86 because the condition on line 73 was always true

74 # OK, auto_vals is of shape (n_freq, n_autos) 

75 # We want to average a subset of baseline_i within auto_vals, 

76 # to average a specific auto-correlation over time 

77 auto_single[:, tile_i] = ( 

78 np.sum( 

79 auto_vals[:, baseline_i][np.arange(obs["n_freq"]), :], 

80 axis=1, 

81 ) 

82 / time_inds.size 

83 ) 

84 

85 else: 

86 auto_single[:, tile_i] = auto_vals[:, baseline_i][ 

87 np.arange(obs["n_freq"]), : 

88 ] 

89 auto_vals = auto_single 

90 auto_corr[pol_i, :, :] = auto_vals 

91 if pyfhd_config["cal_time_average"]: 

92 auto_tile_i = auto_tile_i_single 

93 return auto_corr, auto_tile_i 

94 else: 

95 # Return auto_corr as 0 and auto_tile_i as an empty array 

96 return np.zeros(1), np.zeros(0) 

97 

98 

99def vis_cal_auto_init( 

100 obs: dict, 

101 cal: dict, 

102 vis_arr: NDArray[np.complex128], 

103 vis_model_arr: NDArray[np.complex128], 

104 vis_auto: NDArray[np.float64], 

105 vis_auto_model: NDArray[np.float64], 

106 auto_tile_i: NDArray[np.integer], 

107) -> NDArray[np.complex128]: 

108 """ 

109 Initialize the calibration solutions using the autocorrelations prior to the linear least squares fit 

110 for faster convergence. The auto-correlations and cross-correlations have separate formulisms for the 

111 initialization. 

112 

113 Autos -- Square root of "Ratio of data autos to model autos" x "overall mean for both crosses and 

114 autos of ratio of data to model" / "overall mean of ratio of data autos to model autos, with 1's 

115 for crosses", results in a per-frequency, per-baseline gain scaling. 

116 Crosses -- Square root of "overall mean for both crosses and autos of ratio of data to model" / 

117 "overall mean of ratio of data autos to model autos, with 1's for crosses", results in a single 

118 number gain scaling. 

119 

120 Parameters 

121 ---------- 

122 obs : dict 

123 Observation metadata dictionary 

124 cal : dict 

125 Calibration dictionary 

126 vis_arr : NDArray[np.complex128] 

127 Uncalibrated data visiblities 

128 vis_model_arr : NDArray[np.complex128] 

129 Simulated model visibilites 

130 vis_auto : NDArray[np.float64] 

131 Data auto-correlations 

132 vis_auto_model : NDArray[np.float64] 

133 Simulated model auto-correlations 

134 auto_tile_i : NDArray[np.integer] 

135 Index array for auto-correlation visibilities 

136 

137 Returns 

138 ------- 

139 auto_gain : NDArray[np.complex128] 

140 Initialization gain array 

141 """ 

142 # Set out the 

143 auto_scale = np.zeros((cal["n_pol"], obs["n_freq"], obs["n_tile"])) 

144 auto_gain = np.ones( 

145 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128 

146 ) 

147 freq_i_use = np.where(obs["baseline_info"]["freq_use"])[0] 

148 for pol_i in range(cal["n_pol"]): 

149 res_mean_data = resistant_mean(np.abs(vis_arr[pol_i, freq_i_use, :]), 2) 

150 res_mean_model = resistant_mean(np.abs(vis_model_arr[pol_i, freq_i_use, :]), 2) 

151 auto_scale[pol_i] = np.sqrt(res_mean_data / res_mean_model) 

152 auto_gain = np.ones( 

153 (cal["n_pol"], obs["n_freq"], obs["n_tile"]), dtype=np.complex128 

154 ) 

155 # Would love to get rid of the loop altogether, haven't figurted it out yet 

156 for pol_i in range(cal["n_pol"]): 

157 auto_gain[pol_i, :, :][:, auto_tile_i] = np.sqrt( 

158 vis_auto[pol_i, :, :] * weight_invert(vis_auto_model[pol_i, :, :]) 

159 ) 

160 auto_gain[pol_i, :, :] *= auto_scale[pol_i] * weight_invert( 

161 np.mean(auto_gain[pol_i, :, :]) 

162 ) 

163 auto_gain[pol_i, :, :][np.isnan(auto_gain[pol_i, :, :])] = 1 

164 auto_gain[pol_i, :, :][auto_gain[pol_i, :, :] <= 0] = 1 

165 return auto_gain 

166 

167 

168def vis_calibration_flag( 

169 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger 

170) -> dict: 

171 """ 

172 Flag tile and frequency outliers based on the calibration solutions. First, iteratively flag a maximum of three 

173 times on amplitude with three tests: 1) flag frequencies above 5 sigma, 2) flag tiles above 5 sigma, and 3) flag 

174 tiles either 2x lower or 2x higher than average. Second, iteratively flag a maximum of three times on phase with 

175 two tests: 1) flag tiles with slopes above 5 sigma, and 2) flag tiles with per-frequency deviations from their slope 

176 above 5 sigma. 

177 

178 Parameters 

179 ---------- 

180 obs : dict 

181 Observation metadata dictionary 

182 cal : dict 

183 Calibration dictionary 

184 pyfhd_config : dict 

185 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

186 logger : Logger 

187 PyFHD's logger for displaying errors and info to the log files 

188 

189 Returns 

190 ------- 

191 obs: dict 

192 Observation metadata dictionary 

193 """ 

194 amp_sigma_threshold = 5 

195 amp_threshold = 2 

196 phase_sigma_threshold = 5 

197 freq_use = obs["baseline_info"]["freq_use"].copy() 

198 for pol_i in range(cal["n_pol"]): 

199 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0] 

200 freq_use_i = np.nonzero(freq_use)[0] 

201 gain = cal["gain"][pol_i] 

202 phase = np.arctan2(gain.imag, gain.real) 

203 amp = np.abs(gain) 

204 

205 # first flag based on overall amplitude 

206 # extract_subarray is not being used as it was FHD's way of taking the fact that 

207 # IDL's indexing can be weird and won't allow to index the result 

208 # use np.where on gain, which will be multidimensional as well 

209 amp_sub = amp[:, tile_use_i][freq_use_i, :] 

210 gain_freq_fom = np.std(amp_sub, axis=1) 

211 # Calculate the y values from a polynomial fit and keep the standard deviation of the error in gain_tile_fom 

212 amp_sub_fit = np.zeros_like(amp_sub) 

213 # Polynomial polyval can only take 1d arrays and doesn't vectorize across a 2d array. 

214 for tile_i in range(tile_use_i.size): 

215 amp_sub_fit[:, tile_i] = np.polynomial.polynomial.polyval( 

216 freq_use_i, 

217 np.polynomial.Polynomial.fit( 

218 freq_use_i, 

219 amp_sub[:, tile_i], 

220 deg=pyfhd_config["cal_amp_degree_fit"], 

221 ) 

222 .convert() 

223 .coef, 

224 ) 

225 # Get standard deviation of the residuals on a per tile basis 

226 gain_tile_fom = np.std(amp_sub - amp_sub_fit, axis=0) 

227 # Get the median of every tile, why is it call avg? 

228 gain_tile_avg = np.median(amp_sub, axis=0) 

229 gain_freq_fom[np.isnan(gain_freq_fom)] = 0 

230 gain_tile_fom[np.isnan(gain_tile_fom)] = 0 

231 freq_cut_i = np.where(gain_freq_fom == 0)[0] 

232 freq_uncut_i = np.nonzero(gain_freq_fom)[0] 

233 if freq_cut_i.size > 0: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 freq_use[freq_use_i][freq_cut_i] = 0 

235 tile_cut_i = np.where(gain_tile_fom == 0)[0] 

236 tile_uncut_i = np.nonzero(gain_tile_fom)[0] 

237 if tile_cut_i.size > 0: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true

238 obs["baseline_info"]["tile_use"][tile_use_i][tile_cut_i] = 0 

239 if freq_uncut_i.size == 0 or tile_uncut_i.size == 0: 239 ↛ 240line 239 didn't jump to line 240 because the condition on line 239 was never true

240 logger.error( 

241 "The frequency and tile flagging inside calibration found some values not detected in previous flagging or calibration" 

242 ) 

243 

244 n_addl_cut = max(freq_cut_i.size + tile_cut_i.size, 1) 

245 n_cut = freq_cut_i.size + tile_cut_i.size 

246 iter = 0 

247 while n_addl_cut > 0 and iter < 3: 

248 gain_freq_sigma = np.std(gain_freq_fom[freq_uncut_i]) 

249 gain_tile_sigma = np.std(gain_tile_fom[tile_uncut_i]) 

250 freq_cut_i = np.where( 

251 ( 

252 gain_freq_fom 

253 - np.median(gain_freq_fom[freq_uncut_i]) 

254 - amp_sigma_threshold * gain_freq_sigma 

255 ) 

256 > 0 

257 )[0] 

258 # Update the complement of freq_cut_i (the NOT) i.e. freq_uncut_i 

259 freq_uncut_i = freq_uncut_i[~np.isin(freq_uncut_i, freq_cut_i)] 

260 tile_cut_test1 = ( 

261 gain_tile_fom 

262 - np.median(gain_tile_fom[tile_uncut_i]) 

263 - amp_sigma_threshold * gain_tile_sigma 

264 ) > 0 

265 tile_cut_test2 = ( 

266 gain_tile_avg < np.median(gain_tile_avg) / amp_threshold 

267 ) | (gain_tile_avg > np.median(gain_tile_avg) * amp_threshold) 

268 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0] 

269 # Update the complement of tile_cut_i (the NOT) i.e. tile_uncut_i 

270 tile_uncut_i = tile_uncut_i[~np.isin(tile_uncut_i, tile_cut_i)] 

271 n_addl_cut = (freq_cut_i.size + tile_cut_i.size) - n_cut 

272 n_cut = freq_cut_i.size + tile_cut_i.size 

273 iter += 1 

274 if (freq_cut_i.size) > 0: 274 ↛ 275line 274 didn't jump to line 275 because the condition on line 274 was never true

275 freq_use[freq_use_i[freq_cut_i]] = 0 

276 if (tile_cut_i.size) > 0: 

277 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0 

278 

279 # Reset freq_use_i and tile_use_i for flagging based on phase 

280 tile_use_i = np.nonzero(obs["baseline_info"]["tile_use"])[0] 

281 freq_use_i = np.nonzero(freq_use)[0] 

282 

283 # Start flagging based on phase 

284 phase_sub = phase[:, tile_use_i][freq_use_i, :] 

285 phase_slope_arr = np.empty(tile_use_i.size) 

286 phase_sigma_arr = np.empty(tile_use_i.size) 

287 for tile_i in range(tile_use_i.size): 

288 phase_use = np.unwrap(phase_sub[:, tile_i]) 

289 phase_params = np.polynomial.polynomial.Polynomial.fit( 

290 freq_use_i, phase_use, deg=pyfhd_config["cal_phase_degree_fit"] 

291 ) 

292 phase_params = phase_params.convert().coef 

293 phase_fit = np.polynomial.polynomial.polyval(freq_use_i, phase_params) 

294 phase_sigma2 = np.std(phase_use - phase_fit) 

295 # In an unusual scenario sometimes you'll get an all 0 array to fit on, which gives only a zero back for the fit 

296 phase_slope_arr[tile_i] = phase_params[1] if phase_params.size > 1 else 0 

297 phase_sigma_arr[tile_i] = phase_sigma2 

298 iter = 0 

299 n_addl_cut = 1 

300 n_cut = 0 

301 while n_addl_cut > 0 and iter < 3: 

302 slope_sigma = np.nanstd(phase_slope_arr) 

303 tile_cut_test1 = ( 

304 np.abs(phase_slope_arr) - np.median(np.abs(phase_slope_arr)) 

305 ) > phase_sigma_threshold * slope_sigma 

306 tile_cut_test2 = (phase_sigma_arr - np.median(phase_sigma_arr)) > ( 

307 phase_sigma_threshold * np.nanstd(phase_sigma_arr) 

308 ) 

309 tile_cut_i = np.where(tile_cut_test1 | tile_cut_test2)[0] 

310 n_addl_cut = tile_cut_i.size - n_cut 

311 n_cut = tile_cut_i.size 

312 iter += 1 

313 if tile_cut_i.size > 0: 

314 obs["baseline_info"]["tile_use"][tile_use_i[tile_cut_i]] = 0 

315 # If we decide not to flag the frequencies, ignore any frequency flagging 

316 if pyfhd_config["flag_calibration_frequencies"]: 316 ↛ 317line 316 didn't jump to line 317 because the condition on line 316 was never true

317 obs["baseline_info"]["freq_use"] = freq_use 

318 # Return the obs with an updated baseline_info on the use of tiles and frequency 

319 return obs 

320 

321 

322def transfer_bandpass( 

323 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger 

324) -> tuple[dict, dict]: 

325 """ 

326 Apply a previously saved bandpass via a calfits file (github:pyuvdata). Check adherance to standards, 

327 and match the polarizations, frequencies, timing, pointings, and tiles. 

328 

329 Parameters 

330 ---------- 

331 obs : dict 

332 Observation metadata dictionary 

333 cal : dict 

334 Calibration dictionary 

335 pyfhd_config : dict 

336 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

337 logger : Logger 

338 PyFHD's logger for displaying errors and info to the log files 

339 

340 Returns 

341 ------- 

342 cal_bandpass : dict 

343 Calibration dictionary with bandpass gains 

344 cal_remainder : dict 

345 calibration dictionary with residuals after removing the bandpass gains 

346 """ 

347 cal_bandpass = {} 

348 try: 

349 calfits = fits.open( 

350 Path(pyfhd_config["input_path"], pyfhd_config["cal_bp_transfer"]) 

351 ) 

352 # Get the data 

353 data_array = calfits[0].data 

354 # Read in the header 

355 naxis = calfits[0].header["naxis"] 

356 n_jones = calfits[0].header["njones"] 

357 if "delay" in calfits[0].header["caltype"]: 

358 raise RuntimeWarning( 

359 "Input Delay calibration not supported at this time, skipping calibration bandpass transfer." 

360 ) 

361 time_integration = calfits[0].header["inttime"] 

362 freq_channel_width = calfits[0].header["chwidth"] 

363 x_orient = calfits[0].header["xorient"].strip() 

364 data_dims = np.empty(naxis, dtype=np.integer) 

365 data_types = naxis * [""] 

366 for naxis_i in range(naxis): 

367 # Get the dimensions of the data 

368 data_dims[naxis_i] = calfits[0].header[f"naxis{naxis_i + 1}"] 

369 # Get the ctypes 

370 data_types[naxis_i] = ( 

371 calfits[0].header[f"ctype{naxis_i + 1}"].lower().strip() 

372 ) 

373 data_types = np.array(data_types) 

374 # Get the indexes for the FITS standard checks 

375 data_index = np.nonzero("narrays" == data_types)[0][0] 

376 ant_index = np.nonzero("antaxis" == data_types)[0][0] 

377 freq_index = np.nonzero("freqs" == data_types)[0][0] 

378 time_index = np.nonzero("time" == data_types)[0][0] 

379 jones_index = np.nonzero("jones" == data_types)[0][0] 

380 # Deal with spec_wind_index separately as default highband file doesn't have this in 

381 # May cause issues with other fits files, adjust the code then. 

382 spec_wind_index = np.nonzero("if" == data_types)[0] 

383 if spec_wind_index.size == 0: 

384 spec_wind_index = -1 

385 

386 # Check the indexes given to see if they match standards, if not raise exception 

387 if ( 

388 data_index != 0 

389 or ant_index != 4 

390 or freq_index != 3 

391 or time_index != 2 

392 or jones_index != 1 

393 ): 

394 if ( 

395 data_index == 0 

396 and ant_index == 5 

397 and freq_index == 3 

398 and time_index == 2 

399 and jones_index == 1 

400 and spec_wind_index == 4 

401 ): 

402 logger.info("Calfits adheres to the Fall 2018 pyuvdata convention") 

403 if calfits[0].header["naxis5"] != 1: 

404 raise RuntimeWarning( 

405 "Calfits file includes more than one spectral window. Note that this feature is not yet supported in PyFHD." 

406 ) 

407 # Remove spectral window dimension for compatibility 

408 data_array = np.mean(data_array, axis=0) 

409 else: 

410 raise RuntimeWarning( 

411 "Calfits file does not appear to adhere to standard. Please see github:pyuvdata/docs/references" 

412 ) 

413 

414 freq_start = calfits[0].header[f"crval{freq_index + 1}"] 

415 time_start = calfits[0].header[f"crval{time_index + 1}"] 

416 time_delt = calfits[0].header[f"cdelt{time_index + 1}"] 

417 jones_start = calfits[0].header[f"crval{jones_index + 1}"] 

418 jones_delt = calfits[0].header[f"cdelt{jones_index + 1}"] 

419 

420 n_ant_data = data_array.shape[0] 

421 n_freq = data_array.shape[1] 

422 n_time = data_array.shape[2] 

423 

424 # Check whether the number of polarizations specified matches the observation analysis run 

425 jones_type_matrix = np.zeros(data_dims[1]) 

426 for jones_i in range(1, data_dims[1]): 

427 jones_type_matrix[jones_i - 1] = jones_start + (jones_delt * jones_i) 

428 if data_dims[1] > obs["n_pol"]: 

429 logger.warning( 

430 "More polarizations in calibration fits file than in observation analysis. Reducing calibration to match obs." 

431 ) 

432 data_dims[1] = obs["n_pol"] 

433 jones_type_matrix = jones_type_matrix[0 : obs["n_pol"]] 

434 data_array = data_array[:, :, :, 0 : obs["n_pol"], :] 

435 elif data_dims[1] < obs["n_pol"]: 

436 raise RuntimeWarning( 

437 "Not enough polarizations defined in the calibration fits file." 

438 ) 

439 

440 # Switch the pol convention to FHD standard if necessary 

441 if x_orient == "north" or x_orient == "south": 

442 data_array[:, :, :, 0, :], data_array[:, :, :, 1, :] = ( 

443 data_array[:, :, :, 1, :], 

444 data_array[:, :, :, 0, :], 

445 ) 

446 if obs["n_pol"] > 2: 

447 data_array[:, :, :, 2, :], data_array[:, :, :, 3, :] = ( 

448 data_array[:, :, :, 3, :], 

449 data_array[:, :, :, 2, :], 

450 ) 

451 

452 # Check to see if the calibraton and observation frequency resolution match 

453 if freq_channel_width != obs["freq_res"]: 

454 freq_factor = obs["freq_res"] / freq_channel_width 

455 if freq_factor >= 1: 

456 logic_test = freq_factor - np.floor(freq_factor) 

457 else: 

458 logic_test = 1 / freq_factor - np.floor(1 / freq_factor) 

459 if logic_test != 0: 

460 raise RuntimeWarning( 

461 f"Calfits input freq channel width is not easily castable to the observation, different by a factor of {freq_factor}" 

462 ) 

463 if freq_start != obs["baseline_info"]["freq"][0]: 

464 raise RuntimeWarning( 

465 "Calfits input freq start is not equal to observation freq start" 

466 ) 

467 # Downselect the data array 

468 if freq_factor > 1: 

469 logger.warning( 

470 f"Calfits input freq channel width is different by a factor of {freq_factor}. Avergaing Down." 

471 ) 

472 # Set flagged indices to NAN to remove them from mean calculation 

473 flag_inds = np.where(np.abs(np.squeeze(data_array[:, :, :, :, 2])) == 1) 

474 data_array[flag_inds][0] = np.nan 

475 data_array[flag_inds][1] = np.nan 

476 for channel_i in range(obs["n_freq"]): 

477 data_array[:, channel_i, :, :, :] = np.nanmean( 

478 data_array[ 

479 :, 

480 max( 

481 channel_i * freq_factor - np.floor(freq_factor / 2), 0 

482 ) : channel_i 

483 * freq_factor 

484 + np.floor(freq_factor / 2.0), 

485 :, 

486 :, 

487 :, 

488 ] 

489 ) 

490 data_array = data_array[:, 0 : obs["n_freq"], :, :, :] 

491 elif freq_factor < 1: 

492 logger.warning( 

493 f"Calfits input freq channel width is different by a factor of {freq_factor}. Using linear interpolation" 

494 ) 

495 # The IDL code has 5 nested loops, and I can't think of the vectorization right now in a reasonable ampount of time 

496 # Someone please vectorize laster if you can 

497 data_array_temp = np.zeros( 

498 (data_dims[4], obs["n_freq"], n_time, n_jones, 2) 

499 ) 

500 for data_i in range(2): 

501 for jones_i in range(n_jones): 

502 for times_i in range(n_time): 

503 for tile_i in range(data_dims[4]): 

504 for channel_i in range(obs["n_freq"] - 1): 

505 start_idx = int(channel_i * (1.0 / freq_factor)) 

506 end_idx = int((channel_i + 1) * (1.0 / freq_factor)) 

507 data_array_temp[ 

508 tile_i, 

509 start_idx:end_idx, 

510 times_i, 

511 jones_i, 

512 data_i, 

513 ] = np.interp( 

514 np.arange(start_idx, end_idx, 1.0), 

515 np.arange(channel_i, channel_i + 1, 1.0), 

516 data_array[ 

517 data_i, 

518 jones_i, 

519 times_i, 

520 channel_i : channel_i + 1, 

521 tile_i, 

522 ], 

523 ) 

524 data_array_temp[:, obs["n_freq"] - 1, :, :, :] = data_array[ 

525 :, n_freq - 1, :, :, : 

526 ] 

527 data_array = np.copy(data_array_temp) 

528 

529 # Check to see what time range this needs to be applied to, and if pointings are necessary 

530 if n_time != 1: 

531 sec_upperlimit = 2000 

532 sec_lowerlimit = 1600 

533 if ( 

534 time_integration < sec_upperlimit and time_integration > sec_lowerlimit 

535 ) or (time_delt < sec_upperlimit and time_delt > sec_lowerlimit): 

536 # Calibration fits are per pointing 

537 # Keep all the delay patterns in a dictionary 

538 delay_patterns = { 

539 (-5): [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 3, 8, 13, 18], 

540 (-4): [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15], 

541 (-3): [0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9], 

542 (-2): [0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6], 

543 (-1): [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], 

544 (0): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 

545 (5): [15, 10, 5, 0, 16, 11, 6, 1, 17, 12, 7, 2, 18, 13, 8, 3], 

546 (4): [12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3], 

547 (3): list( 

548 reversed([0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9, 0, 3, 6, 9]) 

549 ), 

550 (2): list( 

551 reversed([0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6, 0, 2, 4, 6]) 

552 ), 

553 (1): list( 

554 reversed([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) 

555 ), 

556 } 

557 obs_pointing = None 

558 for pointing, pattern in delay_patterns.items(): 

559 if np.array_equal(obs["delays"], pattern): 

560 obs_pointing = pointing 

561 if obs_pointing is None: 

562 raise RuntimeWarning( 

563 "Pointing number not within 5 pointings around zenith." 

564 ) 

565 obs_julian_date = obs["astr"]["mjdobs"] + 2400000.5 

566 

567 # Find corresponding in the calfits 

568 # Number of days since Auguest 23 2013 

569 days_since_ref = np.floor(obs_julian_date) - np.floor(time_start) 

570 # pointing start shift amount depending on how many days since ref 

571 obs_pointing_shift_since_ref = ((24 - 23.9344699) / 24) * days_since_ref 

572 # pointing start time for HH:MM:SS on Aug23 (in JD) plus comparible pointing start time for reference, using calculated shift 

573 pointing_jdhms_ref = ( 

574 np.array( 

575 [ 

576 0.13611, 

577 0.15157, 

578 0.17565, 

579 0.19963, 

580 0.22222, 

581 0.24342593, 

582 0.26453705, 

583 0.28574074, 

584 0.30694, 

585 0.33657407, 

586 ] 

587 ) 

588 + obs_pointing_shift_since_ref 

589 ) 

590 pointing_num_ref = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] 

591 # pointing start time for HH:MM:SS for calfits day (in JD) 

592 pointing_jdhms_calfits = time_start - np.floor(time_start) 

593 # find the closest index between reference and calfits 

594 pointing_calfits_index = np.argmin( 

595 np.abs(pointing_jdhms_ref - pointing_jdhms_calfits) 

596 ) 

597 # make sure the index is greater than the pointing start time 

598 if pointing_jdhms_calfits < pointing_jdhms_ref[pointing_calfits_index]: 

599 pointing_calfits_index -= 1 

600 

601 if ( 

602 pointing_jdhms_calfits 

603 > ( 

604 np.max(pointing_jdhms_ref) 

605 + (pointing_jdhms_ref[1] - pointing_jdhms_ref[0]) 

606 ) 

607 ) or ( 

608 pointing_jdhms_calfits 

609 < ( 

610 np.min(pointing_jdhms_ref) 

611 - (pointing_jdhms_ref[1] - pointing_jdhms_ref[0]) 

612 ) 

613 ): 

614 raise RuntimeWarning( 

615 "Calfits does not start between five pointings before zenith and four pointings after zenith. Not suitable for pointing cal at this time." 

616 ) 

617 # find which pointing is the start of the calfits data 

618 pointing_calfits_start = pointing_num_ref[pointing_calfits_index] 

619 if obs_pointing < pointing_calfits_start: 

620 raise RuntimeWarning( 

621 "Calfits file does not contain pointing of observation" 

622 ) 

623 # select pointing index in calfits that matches observation 

624 obs_pointing_index = abs(pointing_calfits_start - obs_pointing) 

625 # choose the corresponding pointing from the calfits data array 

626 data_array = data_array[:, :, obs_pointing_index, :, :] 

627 elif np.floor(time_delt) == np.floor(obs["time_res"]): 

628 # Calibration fits are per-timeres 

629 logger.info( 

630 "Averaging calfits to observation length, an FHD requirement at this time." 

631 ) 

632 data_array_temp = np.zeros( 

633 [data_dims[4], obs.n_freq, 1, data_dims[1], data_dims[0]] 

634 ) 

635 data_array_temp[:, :, 0, :, :] = np.mean(data_array, axis=2) 

636 data_array = np.copy(data_array_temp) 

637 else: 

638 # Calibration fits are for a random set of times 

639 logger.info( 

640 "Finding closest match in time between calfits and obs. Obs metadata assumed to report start time, calfits metadata assumed to report center time." 

641 ) 

642 time_delta = time_integration / (60 * 60 * 24) 

643 time_array = np.full(n_time, time_start + time_delta) 

644 obs_julian_date = ( 

645 obs["astr"]["mjdobs"] 

646 + 2400000.5 

647 + ((obs["n_time"] * obs["time_res"]) / (2 * 60 * 60 * 24)) 

648 ) 

649 if (obs_julian_date < time_array[0] - 2 * time_delta) or ( 

650 obs_julian_date > time_array[-1] + 2 * time_delta 

651 ): 

652 raise RuntimeWarning( 

653 "Observation does not seem to fit within the time frame of the calfits" 

654 ) 

655 # find the closest index between calfits and observation 

656 time_index = np.argmin(np.abs(obs_julian_date - time_array)) 

657 data_array = data_array[:, :, time_index, :, :] 

658 

659 # Check number of tiles 

660 if n_ant_data != obs["n_tile"]: 

661 raise RuntimeWarning( 

662 "Number of antennas in calfits file does match observation antenna number" 

663 ) 

664 

665 # Now that the checks are done, return the cal structure 

666 cal_bandpass["n_pol"] = min(obs["n_pol"], 2) 

667 cal_bandpass["conv_thresh"] = 1e-7 

668 cal_bandpass["gain"] = np.full( 

669 (cal_bandpass["n_pol"], obs["n_tile"], obs["n_freq"]), 

670 pyfhd_config["cal_gain_init"], 

671 ) 

672 cal_bandpass["gain"][0 : obs["n_pol"], :, :] = np.squeeze( 

673 data_array[:, :, 0, 0 : obs["n_pol"], 0] 

674 ) + 1j * np.squeeze(data_array[:, :, 0, 0 : obs["n_pol"], 1]) 

675 logger.info("Calfits File has been read and cal_bandpass has been created") 

676 except FileNotFoundError as e: 

677 logger.error( 

678 f"{pyfhd_config['cal_bp_transfer']} file wasn't found, skipping calibration bandpass transfer" 

679 ) 

680 return {}, {} 

681 except RuntimeWarning as e: 

682 logger.error(e) 

683 return {}, {} 

684 cal_remainder = deepcopy(cal) 

685 cal_remainder["gain"][0 : cal["n_pol"], :, :] = ( 

686 cal["gain"][0 : cal["n_pol"], :, :] 

687 / cal_bandpass["gain"][0 : cal["n_pol"], :, :] 

688 ) 

689 return cal_bandpass, cal_remainder 

690 

691 

692def vis_cal_bandpass( 

693 obs: dict, cal: dict, pyfhd_config: dict, logger: Logger 

694) -> tuple[dict, dict]: 

695 """ 

696 Reduce the degrees of freedom on the per-frequency calibration amplitudes by averaging solutions 

697 together. Options include averaging over tiles which use a particular beamformer-to-receiver cable 

698 lengths/types, or averaging over all tiles for a global bandpass. 

699 

700 Parameters 

701 ---------- 

702 obs : dict 

703 Observation metadata dictionary 

704 cal : dict 

705 Calibration dictionary 

706 pyfhd_config : dict 

707 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

708 logger : Logger 

709 PyFHD's logger for displaying errors and info to the log files 

710 

711 Returns 

712 ------- 

713 cal_bandpass : dict 

714 Calibration dictionary with bandpass gains 

715 cal_remainder : dict 

716 calibration dictionary with residuals after removing the bandpass gains 

717 """ 

718 freq_use = np.nonzero(obs["baseline_info"]["freq_use"])[0] 

719 tile_use = np.nonzero(obs["baseline_info"]["tile_use"])[0] 

720 n_pol = cal["gain"].shape[0] 

721 # Set a flag for global bandpass, will turn true if too many tiles are flagged 

722 global_bandpass = False 

723 

724 # Initialize cal_bandpass and cal_remainder and transfer them in, if a file has been set (fits only supported right now) 

725 if pyfhd_config["cal_bp_transfer"] is not None: 725 ↛ 726line 725 didn't jump to line 726 because the condition on line 725 was never true

726 cal_bandpass, cal_remainder = transfer_bandpass(obs, cal, pyfhd_config, logger) 

727 if len(cal_bandpass.keys()) != 0 and len(cal_remainder.keys()) != 0: 

728 logger.info( 

729 f"Calibration Bandpass FITS file {pyfhd_config['cal_bp_transfer']} transferred in for cal_bandpass and cal_remainder" 

730 ) 

731 return cal_bandpass, cal_remainder 

732 cal_bandpass = deepcopy(cal) 

733 cal_remainder = deepcopy(cal) 

734 

735 cal_bandpass_gain = np.empty(cal["gain"].shape, dtype=np.complex128) 

736 cal_remainder_gain = np.empty(cal["gain"].shape, dtype=np.complex128) 

737 

738 if pyfhd_config["cable_bandpass_fit"]: 

739 # Using preexisting file to extract information about which tiles have which cable length 

740 # cable_len = np.loadtxt(Path(pyfhd_config["input"], pyfhd_config["cable-reflection-coefficients"]), skiprows=1)[:, 2].flatten() 

741 cable_len_filepath = importlib_resources.files( 

742 "PyFHD.resources.instrument_config" 

743 ).joinpath(f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt") 

744 cable_len = np.loadtxt(cable_len_filepath, skiprows=1)[:, 2].flatten() 

745 

746 # Taking tile information and cross-matching it with the nonflagged tiles array, resulting in nonflagged tile arrays grouped by cable length 

747 cable_length_ref = np.unique(cable_len) 

748 tile_use_arr = [0] * cable_length_ref.size 

749 for cable_i in range(cable_length_ref.size): 

750 tile_use_arr[cable_i] = np.where( 

751 (obs["baseline_info"]["tile_use"]) 

752 & (cable_len == cable_length_ref[cable_i]) 

753 )[0] 

754 if tile_use_arr[cable_i].size == 0: 754 ↛ 755line 754 didn't jump to line 755 because the condition on line 754 was never true

755 logger.warning( 

756 "Too Many flagged tiles to implement bandpass cable averaging, using global bandpass." 

757 ) 

758 global_bandpass = True 

759 # n_freq x 13 array. columns are frequency, 90m xx, 90m yy, 150m xx, 150m yy, 230m xx, 230m yy, 320m xx, 320m yy, 400m xx, 400m yy, 524m xx, 524m yy 

760 bandpass_arr = np.zeros( 

761 (obs["n_freq"], cal["n_pol"] * cable_length_ref.size + 1) 

762 ) 

763 bandpass_arr[:, 0] = obs["baseline_info"]["freq"] 

764 bandpass_col_count = 1 

765 if pyfhd_config["auto_ratio_calibration"]: 765 ↛ 768line 765 didn't jump to line 768 because the condition on line 765 was always true

766 logger.info("auto_ratio_calibration is set, using global bandpass") 

767 global_bandpass = True 

768 for cable_i in range(cable_length_ref.size): 

769 # This is an option to calibrate over all tiles to find the 'global' bandpass. It will be looped over by the number 

770 # of cable lengths, and will redo the same calculation everytime. It is inefficient, but effective. 

771 if global_bandpass: 771 ↛ 774line 771 didn't jump to line 774 because the condition on line 771 was always true

772 tile_use_cable = tile_use 

773 else: 

774 tile_use_cable = tile_use_arr[cable_i] 

775 

776 for pol_i in range(cal["n_pol"]): 

777 gain = cal["gain"][pol_i] 

778 # gain2 is a temporary variable used in place of the gain array for an added layer of safety 

779 if cable_i == 0 and pol_i == 0: 

780 gain2 = np.zeros(cal["gain"].shape, dtype=np.complex128) 

781 # Only use gains from unflagged tiles and frequencies, and calculate the amplitude and phase 

782 gain_use = gain[freq_use, :][:, tile_use_cable] 

783 amp = np.abs(gain_use) 

784 # amp2 is a temporary variable used in place of the amp array for an added layer of safety 

785 amp2 = np.zeros((freq_use.size, tile_use_cable.size)) 

786 # This is the normalization loop for each tile. If the mean of gain amplitudes over all frequencies is nonzero, then divide 

787 # the gain amplitudes by that number, otherwise make the gain amplitudes zero. 

788 for tile_i in range(tile_use_cable.size): 

789 res_mean = resistant_mean(amp[:, tile_i], 2) 

790 if res_mean != 0: 790 ↛ 793line 790 didn't jump to line 793 because the condition on line 790 was always true

791 amp2[:, tile_i] = amp[:, tile_i] / res_mean 

792 else: 

793 amp2[:, tile_i] = 0 

794 # This finds the normalized gain amplitude mean per frequency over all tiles, which is the final bandpass per cable group. 

795 bandpass_single = np.empty(freq_use.size) 

796 # If this is slow, resistant_mean can be vectorized 

797 for f_i in range(freq_use.size): 

798 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2) 

799 # Want iterative to start at 1 (to not overwrite freq) and store final bandpass per cable group. 

800 bandpass_arr[freq_use, bandpass_col_count] = bandpass_single 

801 bandpass_col_count += 1 

802 # Fill temporary variable gain2, set equal to final bandpass per cable group for each tile that will use that bandpass. 

803 

804 # Vectorize later if possible 

805 for tile_i in range(tile_use_cable.size): 

806 gain2[pol_i, freq_use, tile_use_cable[tile_i]] = bandpass_single 

807 

808 # For the last bit at the end of the cable 

809 if cable_i == cable_length_ref.size - 1: 

810 # Set gain3 to the input gains 

811 gain3 = cal["gain"][pol_i].copy() 

812 # Set what will be passed back as the output gain as the final bandpass per cable type. 

813 gain2_input = gain2[pol_i, :, :] 

814 cal_bandpass_gain[pol_i] = gain2_input 

815 # Set what will be passed back as the residual as the input gain divided by the final bandpass per cable type. 

816 gain3[freq_use, :] /= gain2_input[freq_use, :] 

817 cal_remainder_gain[pol_i] = gain3 

818 # Add Levine Memo bandpass to the gain solutions here if you wish 

819 else: 

820 bandpass_arr = np.zeros((obs["n_freq"], cal["n_pol"] + 1)) 

821 bandpass_arr[:, 0] = obs["baseline_info"]["freq"] 

822 for pol_i in range(cal["n_pol"]): 

823 gain = cal["gain"][pol_i] 

824 gain_use = gain[freq_use, :][:, tile_use] 

825 amp = np.abs(gain_use) 

826 amp2 = np.zeros((freq_use.size, tile_use.size)) 

827 for tile_i in range(tile_use.size): 

828 res_mean = resistant_mean(amp[:, tile_i], 2) 

829 if res_mean != 0: 829 ↛ 832line 829 didn't jump to line 832 because the condition on line 829 was always true

830 amp2[:, tile_i] = amp[:, tile_i] / res_mean 

831 else: 

832 amp2[:, tile_i] = 0 

833 

834 bandpass_single = np.empty(freq_use.size) 

835 # If this is slow, resistant_mean can be vectorized 

836 for f_i in range(freq_use.size): 

837 bandpass_single[f_i] = resistant_mean(amp2[f_i, :], 2) 

838 

839 bandpass_arr[freq_use, pol_i + 1] = bandpass_single 

840 # Work out the gain for the bandpass 

841 gain2 = np.zeros(gain.shape, dtype=np.complex128) 

842 gain3 = deepcopy(gain) 

843 # Vectorize later if possible 

844 for tile_i in range(obs["n_tile"]): 

845 gain2[freq_use, tile_i] = bandpass_single 

846 gain3[freq_use, tile_i] /= bandpass_single 

847 

848 cal_bandpass_gain[pol_i, :, :] = gain2 

849 cal_remainder_gain[pol_i, :, :] = gain3 

850 

851 cal_bandpass["gain"] = cal_bandpass_gain 

852 cal_remainder["gain"] = cal_remainder_gain 

853 return cal_bandpass, cal_remainder 

854 

855 

856def vis_cal_polyfit( 

857 obs: dict, 

858 cal: dict, 

859 auto_ratio: NDArray[np.float64] | None, 

860 pyfhd_config: dict, 

861 logger: Logger, 

862) -> dict: 

863 """ 

864 Reduce the degrees of freedom on the per-frequency calibration amplitudes and phases by fitting 

865 the full frequency band with polynomials of a specified degree, with options for split polynomials 

866 over certain regions. 

867 

868 In addition, fit cable reflections with a text file of fits, theoretical fits given cable length/type, 

869 or the finding the maximum in delay space. Any of these can then be used as an initial estimate in a 

870 hyperresolved FFT of the residual calibration solutions to fit the final mode, phase, and amplitude. 

871 Residual calibration solutions can optionally be made by using an incoherent mean, or a mean over 

872 all tiles which *do not* have the same cable length/type, to reduce bias in the residual, and 

873 the cable reflections can also be fit using just the phases to reduce dependency on the polyphase 

874 filter bank shape. 

875 

876 Parameters 

877 ---------- 

878 obs : dict 

879 Observation metadata dictionary 

880 cal : dict 

881 Calibration dictionary 

882 auto_ratio: NDArray[np.float64] | None 

883 The estimation of the antenna-dependent bias through the square root of the autocorrelation 

884 visibilities normalized via a reference tile 

885 pyfhd_config : dict 

886 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

887 logger : Logger 

888 PyFHD's logger for displaying errors and info to the log files 

889 

890 Returns 

891 ------- 

892 cal : dict 

893 Calibration dictionary with polynomial gain fits 

894 pyfhd_config : dict 

895 PyFHD's configuration dictionary containing all the options set for a PyFHD run 

896 Is returned in case the digital_gain_jump_polyfit was set here. 

897 """ 

898 # Keep the og_gain_arr for calculations later 

899 og_gain_arr = np.copy(cal["gain"]) 

900 if ( 900 ↛ 914line 900 didn't jump to line 914 because the condition on line 900 was always true

901 pyfhd_config["cal_reflection_mode_theory"] 

902 or pyfhd_config["cal_reflection_mode_file"] 

903 or pyfhd_config["cal_reflection_mode_delay"] 

904 or pyfhd_config["cal_reflection_hyperresolve"] 

905 ): 

906 if ( 906 ↛ 912line 906 didn't jump to line 912 because the condition on line 906 was always true

907 pyfhd_config["cal_reflection_mode_theory"] 

908 and abs(pyfhd_config["cal_reflection_mode_theory"]) > 1 

909 ): 

910 cal_mode_fit = pyfhd_config["cal_reflection_mode_theory"] 

911 else: 

912 cal_mode_fit = True 

913 else: 

914 cal_mode_fit = False 

915 freq_use = np.where(obs["baseline_info"]["freq_use"])[0] 

916 tile_use = np.where(obs["baseline_info"]["tile_use"])[0] 

917 

918 pre_dig_inds = np.where(obs["baseline_info"]["freq"][freq_use] < 187.515e6) 

919 if pre_dig_inds[0].size == 0: 919 ↛ 920line 919 didn't jump to line 920 because the condition on line 919 was never true

920 logger.warning( 

921 "No frequencies below 187.515MHz, using full band polyfit, digital gain jump polyfit disabled." 

922 ) 

923 pyfhd_config["digital_gain_jump_polyfit"] = False 

924 else: 

925 f_d = np.max(pre_dig_inds[0]) 

926 f_end = freq_use.size 

927 

928 if ((f_d + 1) == f_end) and pyfhd_config["digital_gain_jump_polyfit"]: 928 ↛ 929line 928 didn't jump to line 929 because the condition on line 928 was never true

929 logger.warning( 

930 "No frequency found above 187.515MHz, using full band polyfit, digital gain jump polyfit disabled." 

931 ) 

932 pyfhd_config["digital_gain_jump_polyfit"] = False 

933 

934 # If the observation date is beyond the date where digital gain jump polyfit is required, disable it 

935 # Date is 2014-07-23 (23rd July 2014) or Julian Day 2456861.5000000 

936 if obs["jd0"] > 2456861.5000000 and pyfhd_config["digital_gain_jump_polyfit"]: 936 ↛ 937line 936 didn't jump to line 937 because the condition on line 936 was never true

937 logger.warning( 

938 "Observation date is beyond the date where digital gain jump polyfit is required, using full band polyfit, digital gain jump polyfit disabled." 

939 ) 

940 pyfhd_config["digital_gain_jump_polyfit"] = False 

941 

942 # If the amp_degree or phase_degree weren't used, then apply the defaults 

943 if not pyfhd_config["cal_amp_degree_fit"]: 943 ↛ 944line 943 didn't jump to line 944 because the condition on line 943 was never true

944 pyfhd_config["cal_amp_degree_fit"] = 2 

945 if not pyfhd_config["cal_phase_degree_fit"]: 945 ↛ 946line 945 didn't jump to line 946 because the condition on line 945 was never true

946 pyfhd_config["cal_phase_degree_fit"] = 1 

947 

948 # If you wish to find any steps that are outliers beyond 5sigma, and remove them add that code here. 

949 # The cal_step_fit option isn't in the eor_defaults_wrapper or defined in the FHD dictionary. 

950 

951 # Polynomial fitting over the frequency band 

952 gain_residual = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"])) 

953 # create amp_params with the shape of (n_pol, n_tile, amp_degree + 1) 

954 # If we're using the digital_gain_jump_polyfit add extra dimension to amp_params 

955 if pyfhd_config["digital_gain_jump_polyfit"]: 955 ↛ 965line 955 didn't jump to line 965 because the condition on line 955 was always true

956 cal["amp_params"] = np.empty( 

957 ( 

958 cal["n_pol"], 

959 obs["n_tile"], 

960 pyfhd_config["cal_amp_degree_fit"], 

961 pyfhd_config["cal_amp_degree_fit"], 

962 ) 

963 ) 

964 else: 

965 cal["amp_params"] = np.empty( 

966 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_amp_degree_fit"] + 1) 

967 ) 

968 cal["phase_params"] = np.empty( 

969 (cal["n_pol"], obs["n_tile"], pyfhd_config["cal_phase_degree_fit"] + 1) 

970 ) 

971 for pol_i in range(cal["n_pol"]): 

972 gain_arr = np.copy(cal["gain"][pol_i]) 

973 gain_amp = np.abs(gain_arr) 

974 gain_phase = np.arctan2(gain_arr.imag, gain_arr.real) 

975 for tile_i in range(obs["n_tile"]): 

976 gain = np.squeeze(gain_amp[freq_use, tile_i]) 

977 gain_fit = np.zeros(obs["n_freq"]) 

978 # Pre and post digital gain jump separately for highband MWA data 

979 if pyfhd_config["digital_gain_jump_polyfit"]: 979 ↛ 1025line 979 didn't jump to line 1025 because the condition on line 979 was always true

980 if pre_dig_inds[0].size > 0: 980 ↛ 1020line 980 didn't jump to line 1020 because the condition on line 980 was always true

981 fit_params1 = ( 

982 np.polynomial.Polynomial.fit( 

983 freq_use[0 : f_d + 1], 

984 gain[0 : f_d + 1], 

985 deg=pyfhd_config["cal_amp_degree_fit"] - 1, 

986 ) 

987 .convert() 

988 .coef 

989 ) 

990 fit_params2 = ( 

991 np.polynomial.Polynomial.fit( 

992 freq_use[f_d + 1 : f_end], 

993 gain[f_d + 1 : f_end], 

994 deg=pyfhd_config["cal_amp_degree_fit"] - 1, 

995 ) 

996 .convert() 

997 .coef 

998 ) 

999 for di in range(pyfhd_config["cal_amp_degree_fit"]): 

1000 gain_fit[freq_use[0] : freq_use[f_d] + 1] += fit_params1[di] * ( 

1001 # Had to use the size as use freq_use[f_d] could cause 

1002 # off by one indexing errors (due to freq_use[0] potentially being 

1003 # 1 or 0 which throws off the np.arange indexing) 

1004 np.arange(gain_fit[freq_use[0] : freq_use[f_d] + 1].size) 

1005 ** di 

1006 ) 

1007 gain_fit[freq_use[f_d + 1] : freq_use[f_end - 1] + 1] += ( 

1008 fit_params2[di] 

1009 * ( 

1010 np.arange(freq_use[f_end - 1] - freq_use[f_d + 1] + 1) 

1011 + freq_use[f_d + 1] 

1012 ) 

1013 ** di 

1014 ) 

1015 # Do notice this is saving the coefficients on a per row basis, so fit_params1 a,b will be [pol_i, tile_i, 0, :] 

1016 # While fit_params2 a, b coefficients will be in [pol_i, tile_i, 1, :] 

1017 fit_params = np.vstack([fit_params1, fit_params2]) 

1018 cal["amp_params"][pol_i, tile_i] = fit_params 

1019 else: 

1020 logger.warning( 

1021 "digital_gain_jump_polyfit only works with highband mwa data. Full band polyfit applied instead." 

1022 ) 

1023 # Fit for amplitude 

1024 else: 

1025 fit_params = ( 

1026 np.polynomial.Polynomial.fit( 

1027 freq_use, gain, deg=pyfhd_config["cal_amp_degree_fit"] 

1028 ) 

1029 .convert() 

1030 .coef 

1031 ) 

1032 cal["amp_params"][pol_i, tile_i, :] = fit_params 

1033 for di in range(pyfhd_config["cal_amp_degree_fit"]): 

1034 gain_fit += fit_params[di] * np.arange(obs["n_freq"]) ** di 

1035 

1036 gain_residual[pol_i, :, tile_i] = gain_amp[:, tile_i] - gain_fit 

1037 

1038 # Fit for phase 

1039 phase_use = np.unwrap(np.squeeze(gain_phase[freq_use, tile_i])) 

1040 phase_params = ( 

1041 np.polynomial.Polynomial.fit( 

1042 freq_use, phase_use, pyfhd_config["cal_phase_degree_fit"] 

1043 ) 

1044 .convert() 

1045 .coef 

1046 ) 

1047 cal["phase_params"][pol_i, tile_i, :] = phase_params 

1048 phase_fit = np.zeros(obs["n_freq"]) 

1049 for di in range(phase_params.size): 

1050 phase_fit += phase_params[di] * np.arange(obs["n_freq"]) ** di 

1051 gain_arr[:, tile_i] = gain_fit * np.exp(1j * phase_fit) 

1052 cal["gain"][pol_i] = gain_arr 

1053 # Cable Reflection Fitting 

1054 if cal_mode_fit: 1054 ↛ 1284line 1054 didn't jump to line 1284 because the condition on line 1054 was always true

1055 if pyfhd_config["cal_reflection_mode_file"]: 1055 ↛ 1056line 1055 didn't jump to line 1056 because the condition on line 1055 was never true

1056 logger.info( 

1057 "Using mwa calibration reflections fits from instrument_config/mwa_cable_reflection_coefficients.txt." 

1058 ) 

1059 cable_len_filepath = importlib_resources.files( 

1060 "PyFHD.resources.instrument_config" 

1061 ).joinpath( 

1062 f"{pyfhd_config['instrument']}_cable_reflection_coefficients.txt" 

1063 ) 

1064 cable_reflections = np.loadtxt(cable_len_filepath, skiprows=1).transpose() 

1065 cable_length = cable_reflections[2] 

1066 tile_ref_flag = np.minimum( 

1067 np.maximum(np.zeros_like(cable_reflections[4]), cable_reflections[4]), 

1068 np.ones_like(cable_reflections[4]), 

1069 ) 

1070 tile_mode_X = cable_reflections[5] 

1071 tile_amp_X = cable_reflections[6] 

1072 tile_phase_X = cable_reflections[7] 

1073 tile_mode_Y = cable_reflections[8] 

1074 tile_amp_Y = cable_reflections[9] 

1075 tile_phase_Y = cable_reflections[10] 

1076 

1077 # Modes in fourier transform units 

1078 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"])) 

1079 mode_i_arr[0, :] = tile_mode_X * tile_ref_flag 

1080 mode_i_arr[1, :] = tile_mode_Y * tile_ref_flag 

1081 

1082 amp_arr = np.vstack([tile_amp_X, tile_amp_Y]) 

1083 phase_arr = np.vstack[[tile_phase_X, tile_phase_Y]] 

1084 

1085 elif pyfhd_config["cal_reflection_mode_theory"]: 1085 ↛ 1112line 1085 didn't jump to line 1112 because the condition on line 1085 was always true

1086 logger.info( 

1087 "Using theory calculation in nominal reflection mode calibration." 

1088 ) 

1089 # Get the nominal tile lengths and velocity factors 

1090 cable_len_filepath = importlib_resources.files( 

1091 "PyFHD.resources.instrument_config" 

1092 ).joinpath(f"{pyfhd_config['instrument']}_cable_length.txt") 

1093 cable_length_data = np.loadtxt(cable_len_filepath, skiprows=1).transpose() 

1094 cable_length = cable_length_data[2] 

1095 cable_vf = cable_length_data[3] 

1096 tile_ref_flag = np.minimum(np.maximum(0, cable_length_data[4]), 1) 

1097 

1098 # Nominal Reflect Time 

1099 reflect_time = (2 * cable_length) / (c.value * cable_vf) 

1100 bandwidth = ( 

1101 ( 

1102 np.max(obs["baseline_info"]["freq"]) 

1103 - np.min(obs["baseline_info"]["freq"]) 

1104 ) 

1105 * obs["n_freq"] 

1106 ) / (obs["n_freq"] - 1) 

1107 # Modes in fourier transform units 

1108 mode_i_arr = np.tile( 

1109 bandwidth * reflect_time * tile_ref_flag, [cal["n_pol"], 1] 

1110 ) 

1111 

1112 elif pyfhd_config["cal_reflection_mode_delay"]: 

1113 logger.info( 

1114 "Using calibration delay spectrum to calculate nominal reflection modes." 

1115 ) 

1116 spec_mask = np.zeros(obs["n_freq"]) 

1117 spec_mask[freq_use] = 1 

1118 freq_cut = np.where(spec_mask == 0) 

1119 # IDL uses forward FFT by default 

1120 spec_psf = np.abs(np.fft.fftn(spec_mask, norm="forward")) 

1121 spec_inds = np.arange(obs["n_freq"] // 2) 

1122 spec_psf = spec_psf[spec_inds] 

1123 mode_test = np.zeros(obs["n_freq"] // 2) 

1124 for pol_i in range(cal["n_pol"]): 

1125 for ti in range(tile_use.size): 

1126 tile_i = tile_use[ti] 

1127 spec0 = np.abs(np.fft.fftn(gain_residual[pol_i, tile_i])) 

1128 mode_test += spec0[spec_inds] 

1129 psf_mask = np.zeros(obs["n_freq"] // 2) 

1130 

1131 if freq_cut[0].size > 0: 

1132 psf_mask[np.where(spec_psf > (np.max(spec_psf) / 1000))] = 1 

1133 # Replaces IDL smooth with edge_truncate 

1134 psf_mask = uniform_filter(psf_mask, size=5, mode="nearest") 

1135 mask_i = np.nonzero(psf_mask) 

1136 if mask_i[0].size > 0: 

1137 mode_test[mask_i] = 0 

1138 mode_i_arr = np.zeros((cal["n_pol"], obs["n_tile"])) + np.argmax(mode_test) 

1139 

1140 # Fit only certain cable lengths 

1141 # Positive length indicates fit mode, negative length indicates exclude mode 

1142 # This is currently assuming cal_mode_fit is an integer or number, not an array! 

1143 # If you need an array to fit or exclude cable lengths, then create another option for it 

1144 # in the config and adjust the code accordingly. Ensure every config option only has one purpose. 

1145 if auto_ratio is None and cal_mode_fit != 1: 

1146 tile_ref_logic = np.zeros(obs["n_tile"]) 

1147 if cal_mode_fit > 0: 1147 ↛ 1152line 1147 didn't jump to line 1152 because the condition on line 1147 was always true

1148 cable_cut_i = np.where(cable_length != cal_mode_fit) 

1149 if cable_cut_i[0].size > 0: 1149 ↛ 1155line 1149 didn't jump to line 1155 because the condition on line 1149 was always true

1150 tile_ref_logic[cable_cut_i] = 1 

1151 else: 

1152 cable_cut_i = np.where(cable_length == abs(cal_mode_fit)) 

1153 if cable_cut_i[0].size > 0: 

1154 tile_ref_logic[cable_cut_i] = 1 

1155 cable_cut = np.nonzero(tile_ref_logic) 

1156 tile_ref_flag = np.ones(obs["n_tile"]) 

1157 if cable_cut[0].size > 0: 1157 ↛ 1159line 1157 didn't jump to line 1159 because the condition on line 1157 was always true

1158 tile_ref_flag[cable_cut] = 0 

1159 mode_i_arr *= tile_ref_flag 

1160 

1161 cal["mode_params"] = np.empty([cal["n_pol"], obs["n_tile"], 3]) 

1162 for pol_i in range(cal["n_pol"]): 

1163 # Divide the polyfit to reveal the residual cable reflections better 

1164 gain_arr = og_gain_arr[pol_i] / cal["gain"][pol_i] 

1165 for ti in range(tile_use.size): 

1166 tile_i = tile_use[ti] 

1167 mode_i = mode_i_arr[pol_i, tile_i] 

1168 if mode_i == 0: 

1169 continue 

1170 else: 

1171 # Options to hyperresolve or fit the reflection modes/amp/phase given the nominal calculations 

1172 if pyfhd_config["cal_reflection_hyperresolve"]: 1172 ↛ 1256line 1172 didn't jump to line 1256 because the condition on line 1172 was always true

1173 # start with nominal cable length 

1174 mode0 = mode_i 

1175 # overresolve the FT used for the fit (normal resolution would be dmode=1) 

1176 dmode = 0.05 

1177 # range around the central mode to test 

1178 nmodes = 101 

1179 # array of modes to try 

1180 modes = (np.arange(nmodes) - nmodes // 2) * dmode + mode0 

1181 # reshape for ease of computing 

1182 modes = rebin(modes, (freq_use.size, nmodes)).T 

1183 

1184 if auto_ratio is not None: 

1185 # Find tiles which will *not* be accidently coherent in their cable reflection in order to reduce bias 

1186 inds = np.where( 

1187 (obs["baseline_info"]["tile_use"]) 

1188 & (mode_i_arr[pol_i, :] > 0) 

1189 & ((np.abs(mode_i_arr[pol_i, :] - mode_i)) > 0.01) 

1190 ) 

1191 # mean over frequency for each tile 

1192 freq_mean = np.nanmean(auto_ratio[pol_i], axis=0) 

1193 norm_autos = auto_ratio[pol_i] / rebin( 

1194 freq_mean, (obs["n_freq"], obs["n_tile"]) 

1195 ) 

1196 # mean over all tiles which *are not* accidently coherent as a func of freq 

1197 incoherent_mean = np.nanmean(norm_autos[:, inds[0]], axis=1) 

1198 # Residual and normalized (using incoherent mean) auto-correlation 

1199 resautos = ( 

1200 norm_autos[:, tile_i] / incoherent_mean 

1201 ) - np.nanmean(norm_autos[:, tile_i] / incoherent_mean) 

1202 gain_temp = rebin( 

1203 resautos[freq_use], (nmodes, freq_use.size) 

1204 ) 

1205 else: 

1206 # dimension manipulation, add dim for mode fitting 

1207 # Subtract the mean so aliasing is reduced in the dft cable fitting 

1208 gain_temp = rebin( 

1209 gain_arr[freq_use, tile_i] 

1210 - np.mean(gain_arr[freq_use, tile_i]), 

1211 (nmodes, freq_use.size), 

1212 ) 

1213 # freq_use matrix to multiply/collapse in fit 

1214 freq_mat = rebin(freq_use, (nmodes, freq_use.size)) 

1215 # Perform DFT of gains to test modes 

1216 test_fits = np.sum( 

1217 np.exp(1j * 2 * np.pi / obs["n_freq"] * modes * freq_mat) 

1218 * gain_temp, 

1219 axis=1, 

1220 ) 

1221 # Pick out highest amplitude fit (mode_ind gives the index of the mode) 

1222 amp_use = np.max(np.abs(test_fits)) / freq_use.size 

1223 mode_ind = np.argmax(np.abs(test_fits)) 

1224 # Phase of said fit 

1225 phase_use = np.arctan2( 

1226 test_fits[mode_ind].imag, test_fits[mode_ind].real 

1227 ) 

1228 mode_i = modes[mode_ind, 0] 

1229 

1230 # Using the mode selected from the gains, optionally use the phase to find the amp and phase 

1231 if auto_ratio is not None: 

1232 # Find tiles which will not be accidently coherent in their cable reflection in order to reduce bias 

1233 inds = np.where( 

1234 (obs["baseline_info"]["tile_use"]) 

1235 & (mode_i_arr[pol_i, :] > 0) 

1236 & (np.abs(mode_i_arr[pol_i, :] - mode_i) > 0.01) 

1237 ) 

1238 residual_phase = np.arctan2( 

1239 gain_arr[freq_use, :].imag, gain_arr[freq_use, :].real 

1240 ) 

1241 incoherent_residual_phase = residual_phase[ 

1242 :, tile_i 

1243 ] - np.nanmean(residual_phase[:, inds[0]], axis=1) 

1244 test_fits = np.sum( 

1245 np.exp( 

1246 1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use 

1247 ) 

1248 * incoherent_residual_phase 

1249 ) 

1250 # Factor of 2 from fitting just the phase 

1251 amp_use = 2 * np.abs(test_fits) / freq_use.size 

1252 # Factor of pi/2 from just fitting the phase 

1253 phase_use = ( 

1254 np.arctan2(test_fits.imag, test_fits.real) + np.pi / 2 

1255 ) 

1256 elif pyfhd_config["cal_reflection_mode_file"]: 

1257 # Use predetermined fits 

1258 amp_use = amp_arr[pol_i, tile_i] 

1259 phase_use = phase_arr[pol_i, tile_i] 

1260 else: 

1261 # Use nominal delay mode, but fit amplitude and phase of reflections 

1262 mode_fit = np.sum( 

1263 np.exp(1j * 2 * np.pi / obs["n_freq"] * mode_i * freq_use) 

1264 * gain_arr[freq_use, tile_i] 

1265 ) 

1266 amp_use = np.abs(mode_fit) / freq_use[0].size 

1267 phase_use = np.arctan2(mode_fit.imag, mode_fit.real) 

1268 

1269 gain_mode_fit = amp_use * np.exp( 

1270 -1j 

1271 * 2 

1272 * np.pi 

1273 * (mode_i * np.arange(obs["n_freq"]) / obs["n_freq"]) 

1274 + 1j * phase_use 

1275 ) 

1276 if auto_ratio is not None: 

1277 # Only fit for the cable reflection in the phases 

1278 cal["gain"][pol_i, :, tile_i] *= np.exp(1j * gain_mode_fit.imag) 

1279 else: 

1280 cal["gain"][pol_i, :, tile_i] *= 1 + gain_mode_fit 

1281 cal["mode_params"][pol_i, tile_i] = np.array( 

1282 [mode_i, amp_use, phase_use] 

1283 ) 

1284 return cal, pyfhd_config 

1285 

1286 

1287def vis_cal_auto_fit( 

1288 obs: dict, 

1289 cal: dict, 

1290 vis_auto: NDArray[np.float64], 

1291 vis_auto_model: NDArray[np.float64], 

1292 auto_tile_i: NDArray[np.integer], 

1293) -> dict: 

1294 """ 

1295 Solve for each tile's calibration amplitude via the square root of the ratio of the data autocorrelation 

1296 to the model autocorrelation using the definition of a gain. Then, remove dependence on the correlated 

1297 noise term in the autocorrelations by scaling this amplitude down to the crosscorrelations gain via a 

1298 simple, linear fit. Build a full, scaled autocorrelation gain solution by adding in the phase term via 

1299 the crosscorrelation gains. 

1300 

1301 Parameters 

1302 ---------- 

1303 obs : dict 

1304 Observation metadata dictionary 

1305 cal : dict 

1306 Calibration dictionary 

1307 vis_auto : NDArray[np.float64] 

1308 Data autocorrelations 

1309 vis_auto_model : NDArray[np.float64] 

1310 Simulated model autocorrelations 

1311 auto_tile_i : NDArray[np.integer] 

1312 Index array of the tile array that have defined autocorrelations 

1313 

1314 Returns 

1315 ------- 

1316 cal: dict 

1317 Calibration dictionary with scaled autocorrelation gains 

1318 """ 

1319 freq_i_use = np.nonzero(obs["baseline_info"]["freq_use"]) 

1320 freq_i_flag = np.where(obs["baseline_info"]["freq_use"] == 0)[0] 

1321 # If the number of frequencies not being used is above 0, then ignore the frequencies surrounding them. 

1322 if freq_i_flag.size > 0: 1322 ↛ 1334line 1322 didn't jump to line 1334 because the condition on line 1322 was always true

1323 freq_flag = np.zeros(obs["n_freq"]) 

1324 freq_flag[freq_i_use] = 1 

1325 for freq_i in range(freq_i_flag.size): 

1326 minimum = max(0, freq_i_flag[freq_i] - 1) 

1327 maximum = min(obs["n_freq"], freq_i_flag[freq_i] + 2) 

1328 freq_flag[minimum:maximum] = 0 

1329 freq_i_use = np.nonzero(freq_flag) 

1330 # Vectorized loop for via_cal_auto_fit lines 45-55 in IDL 

1331 # However the logic still indexes the full 128 tiles, so need to shove 

1332 # outputs into an empty array of correct size 

1333 # We're not using the cross polarizations if they are present 

1334 auto_gain = np.empty((cal["n_pol"], obs["n_freq"], obs["n_tile"])) 

1335 auto_gain[:, :, auto_tile_i] = np.sqrt( 

1336 vis_auto[: cal["n_pol"]] * weight_invert(vis_auto_model[: cal["n_pol"]]) 

1337 ) 

1338 gain_cross = cal["gain"] 

1339 fit_slope = np.empty((cal["n_pol"], obs["n_tile"])) 

1340 fit_offset = np.empty_like(fit_slope) 

1341 

1342 # Didn't vectorize as the polyfit won't be vectorized 

1343 for pol_i in range(cal["n_pol"]): 

1344 for tile in range(auto_tile_i.size): 

1345 tile_idx = auto_tile_i[tile] 

1346 phase_cross_single = np.arctan2( 

1347 gain_cross[pol_i, :, tile_idx].imag, gain_cross[pol_i, :, tile_idx].real 

1348 ) 

1349 

1350 gain_auto_single = np.abs(auto_gain[pol_i, :, tile_idx]) 

1351 gain_cross_single = np.abs(gain_cross[pol_i, :, tile_idx]) 

1352 

1353 # mask out any NaN values; numpy doesn't like them, 

1354 # I assume the IDL equiv function just masks them? 

1355 # or maybe we need to do a catch for NaNs here, and abandon all 

1356 # hope for a fit if there are NaNs? 

1357 notnan = np.where( 

1358 (np.isnan(gain_auto_single[freq_i_use]) != True) 

1359 & (np.isnan(gain_cross_single[freq_i_use]) != True) 

1360 ) 

1361 gain_auto_single_fit = gain_auto_single[freq_i_use][notnan] 

1362 gain_cross_single_fit = gain_cross_single[freq_i_use][notnan] 

1363 

1364 # linfit from IDL uses chi-square error calculations to do the linear fit, instead of least squares. 

1365 # The polynomial fit uses least square method 

1366 x = np.vstack([gain_auto_single_fit, np.ones(gain_auto_single_fit.size)]).T 

1367 fit_single = np.linalg.lstsq(x, gain_cross_single_fit, rcond=None)[0] 

1368 # IDL gives the solution in terms of [A, B] while Python does [B, A] assuming we're 

1369 # solving the equation y = A + Bx 

1370 cal["gain"][pol_i, :, tile_idx] = ( 

1371 gain_auto_single * fit_single[0] + fit_single[1] 

1372 ) * np.exp(1j * phase_cross_single) 

1373 fit_slope[pol_i, tile_idx] = fit_single[0] 

1374 fit_offset[pol_i, tile_idx] = fit_single[1] 

1375 cal["auto_scale"] = np.sum(fit_slope, axis=1) / auto_tile_i.size 

1376 cal["auto_params"] = np.empty([cal["n_pol"], cal["n_pol"], obs["n_tile"]]) 

1377 cal["auto_params"][0, :, :] = fit_offset 

1378 cal["auto_params"][1, :, :] = fit_slope 

1379 return cal 

1380 

1381 

1382def vis_calibration_apply( 

1383 vis_arr: NDArray[np.complex128], 

1384 obs: dict, 

1385 cal: dict, 

1386 vis_model_arr: NDArray[np.complex128], 

1387 vis_weights: NDArray[np.float64], 

1388 logger: Logger, 

1389) -> tuple[NDArray[np.complex128], dict]: 

1390 """ 

1391 Apply the calibration solutions to the input, data visibilities to create calibrated, data visibilities using 

1392 the definition of the gains. 

1393 

1394 Definition of the gain: 

1395 (visibility for baseline of tile i and tile j) = (gain of tile i) (gain of tile j) (model visibility for baseline of tile i and tile j) 

1396 

1397 If only two othogonal polarizations were used to calibrate, calculate the phase offset between the two orthogonal dipoles to 

1398 solve for a degeneracy in the cross polarizations. The formula can be derived by multiplying the gains by an equal and opposite 

1399 phase in the linear least square solver and solving for the phase when the partial derivative w.r.t the offset is 0. 

1400 

1401 Parameters 

1402 ---------- 

1403 vis_arr : NDArray[np.complex128] 

1404 Uncalibrated data visiblities 

1405 obs : dict 

1406 Observation metadata dictionary 

1407 cal : dict 

1408 Calibration dictionary 

1409 vis_model_arr : NDArray[np.complex128] 

1410 Simulated model visibilites 

1411 vis_weights : NDArray[np.float64] 

1412 Weights (flags) of the visibilities 

1413 logger : Logger 

1414 PyFHD's logger for displaying errors and info to the log files 

1415 

1416 Returns 

1417 ------- 

1418 vis_arr : NDArray[np.complex128] 

1419 Calibrated visibilities 

1420 cal : dict 

1421 Calibration dictionary 

1422 """ 

1423 # tile numbering starts at 1 

1424 tile_a_i = obs["baseline_info"]["tile_a"] - 1 

1425 tile_b_i = obs["baseline_info"]["tile_b"] - 1 

1426 

1427 n_pol_vis = vis_arr.shape[0] 

1428 gain_pol_arr1 = [0, 1, 0, 1] 

1429 gain_pol_arr2 = [0, 1, 1, 0] 

1430 

1431 # OK, it really makes sense to use native python functionality here 

1432 # We're just trying to match up the frequency-dependent gains to the 

1433 # correct baselines, and apply them. Can use `meshgrid` here instead of 

1434 # `rebin`, which will make 2D indexing arrays, so we can directly leave 

1435 # the gain arrays in the correct shape and index the directly. Using `rebin` 

1436 # means we have to flatten them 

1437 ( 

1438 inds_a_baseline, 

1439 inds_a_freqs, 

1440 ) = np.meshgrid(tile_a_i, np.arange(obs["n_freq"])) 

1441 inds_b_baseline, inds_b_freqs = np.meshgrid(tile_b_i, np.arange(obs["n_freq"])) 

1442 

1443 for pol_i in range(n_pol_vis): 

1444 gain_arr1 = cal["gain"][gain_pol_arr1[pol_i], :, :] 

1445 gain_arr2 = cal["gain"][gain_pol_arr2[pol_i], :, :] 

1446 

1447 vis_gain = gain_arr1[inds_a_freqs, inds_a_baseline] * np.conjugate( 

1448 gain_arr2[inds_b_freqs, inds_b_baseline] 

1449 ) 

1450 

1451 vis_arr[pol_i, :, :] *= weight_invert(vis_gain, use_abs=False) 

1452 

1453 # We haven't run FHD in a way that uses 4 pols yet so this is all 

1454 # untested 

1455 if n_pol_vis == 4: 

1456 if type(vis_model_arr) == np.ndarray and type(vis_weights) == np.ndarray: 1456 ↛ 1506line 1456 didn't jump to line 1506 because the condition on line 1456 was always true

1457 # This if statement replaces vis_calibrate_crosspol_phase 

1458 # as this was the only place where the function was used 

1459 # Note inside vis_calibrate_crosspol_phase there is a 

1460 # if n_pol_vis == 4 check hence only run if n_pol_vis == 4 

1461 # This code should calculate the phase fit between the X and Y 

1462 # antenna polarizations. 

1463 # Use the xx flags (yy should be identitical at this point) 

1464 

1465 # this is num baselines per time step 

1466 n_baselines = int(len(tile_a_i) / obs["n_times"]) 

1467 # reshape from (n_freq, n_baselines*n_times) to (n_freq, n_times, n_baselines). Turns out due to the row major vs col major difference 

1468 # between IDL and python, this shape also changes 

1469 new_shape = (obs["n_freq"], obs["n_times"], n_baselines) 

1470 weights_use = np.reshape(vis_weights[0, :, :], new_shape) 

1471 # carried over from FHD code 

1472 weights_use = np.maximum(weights_use, np.zeros_like(weights_use)) 

1473 weights_use = np.minimum(weights_use, np.ones_like(weights_use)) 

1474 

1475 # Average the visbilities in time 

1476 axis_avg = 1 

1477 vis_xy = np.reshape(vis_arr[2, :, :], new_shape) 

1478 vis_xy = np.sum(vis_xy * weights_use, axis=axis_avg) 

1479 vis_yx = np.reshape(vis_arr[3, :, :], new_shape) 

1480 vis_yx = np.sum(vis_yx * weights_use, axis=axis_avg) 

1481 

1482 model_xy = np.reshape(vis_model_arr[2, :, :], new_shape) 

1483 model_xy = np.sum(model_xy * weights_use, axis=axis_avg) 

1484 model_yx = np.reshape(vis_model_arr[3, :, :], new_shape) 

1485 model_yx = np.sum(model_yx * weights_use, axis=axis_avg) 

1486 

1487 # Remove Zeros 

1488 weight = np.sum(weights_use, axis=axis_avg) 

1489 i_use = np.nonzero(weight) 

1490 vis_xy = np.squeeze(vis_xy[i_use]) 

1491 vis_yx = np.squeeze(vis_yx[i_use]) 

1492 model_xy = np.squeeze(model_xy[i_use]) 

1493 model_yx = np.squeeze(model_yx[i_use]) 

1494 

1495 vis_sum = np.sum(np.conjugate(vis_xy) * model_xy) + np.sum( 

1496 vis_yx * np.conjugate(model_yx) 

1497 ) 

1498 cross_phase = np.arctan2(vis_sum.imag, vis_sum.real) 

1499 

1500 logger.info( 

1501 f"Phase fit between X and Y antenna polarizations: {cross_phase}" 

1502 ) 

1503 

1504 cal["cross_phase"] = cross_phase 

1505 

1506 vis_arr[2, :, :] *= np.exp(1j * 0) 

1507 vis_arr[3, :, :] *= np.exp(-1j * 0) 

1508 

1509 return vis_arr, cal 

1510 

1511 

1512def vis_baseline_hist( 

1513 obs: dict, 

1514 params: dict, 

1515 vis_cal: NDArray[np.complex128], 

1516 vis_model_arr: NDArray[np.complex128], 

1517) -> dict: 

1518 """ 

1519 Create diagnostic histograms of both the mean and sigma for the percent difference between calibrated 

1520 data and simulated model visibilities as a function of baseline length. 

1521 

1522 Parameters 

1523 ---------- 

1524 obs : dict 

1525 Observation metadata dictionary 

1526 params : dict 

1527 Visibility metadata dictionary 

1528 vis_cal : NDArray[np.complex128] 

1529 Calibrated data visibilities 

1530 vis_model_arr : NDArray[np.complex128] 

1531 Simulated model visibilites 

1532 

1533 Returns 

1534 ------- 

1535 vis_baseline_hist : dict 

1536 Dictionary of the mean and sigma histograms for the percent difference between calibrated data 

1537 and simulated model visibilities as a function of baseline length. 

1538 """ 

1539 kx_arr = params["uu"] / obs["kpix"] 

1540 ky_arr = params["vv"] / obs["kpix"] 

1541 kr_arr = np.sqrt(kx_arr**2 + ky_arr**2) 

1542 # take the transpose of this, given our `vis_cal` and `vis_mode_arr` are the 

1543 # transpose of the original FHD code 

1544 dist_arr = np.outer(kr_arr, obs["baseline_info"]["freq"]).transpose() * obs["kpix"] 

1545 dist_hist, bins, dist_ri = histogram( 

1546 dist_arr, min=obs["min_baseline"], max=obs["max_baseline"], bin_size=5.0 

1547 ) 

1548 

1549 vis_res_ratio_mean = np.zeros([obs["n_pol"], bins.size]) 

1550 vis_res_sigma = np.zeros([obs["n_pol"], bins.size]) 

1551 

1552 for pol_i in range(obs["n_pol"]): 

1553 for bin_i in range(bins.size): 

1554 if dist_hist[bin_i] > 0: 1554 ↛ 1575line 1554 didn't jump to line 1575 because the condition on line 1554 was always true

1555 inds = dist_ri[dist_ri[bin_i] : dist_ri[bin_i + 1]] 

1556 model_vals = (vis_model_arr[pol_i]).flatten()[inds] 

1557 wh_noflag = np.where(np.abs(model_vals) > 0)[0] 

1558 if wh_noflag.size > 0: 1558 ↛ 1561line 1558 didn't jump to line 1561 because the condition on line 1558 was always true

1559 inds = inds[wh_noflag] 

1560 else: 

1561 continue 

1562 # if Keyword_Set(calibration_visibilities_subtract) THEN BEGIN 

1563 # but calibration_visibilities_subtract isn't a function keyword 

1564 # so we'll only translate the False of that statement 

1565 # vis_cal_use = (vis_cal[pol_i].transpose()).flatten()[inds] 

1566 vis_cal_use = (vis_cal[pol_i]).flatten()[inds] 

1567 vis_res_ratio_mean[pol_i, bin_i] = np.mean( 

1568 np.abs(vis_cal_use - model_vals) 

1569 ) / np.mean(np.abs(model_vals)) 

1570 vis_res_sigma[pol_i, bin_i] = np.sqrt( 

1571 np.var(np.abs(vis_cal_use - model_vals)) 

1572 ) / np.mean(np.abs(model_vals)) 

1573 

1574 else: 

1575 continue 

1576 return { 

1577 "baseline_length": bins.size, 

1578 "vis_res_ratio_mean": vis_res_ratio_mean, 

1579 "vis_res_sigma": vis_res_sigma, 

1580 } 

1581 

1582 

1583def cal_auto_ratio_divide( 

1584 obs: dict, 

1585 cal: dict, 

1586 vis_auto: NDArray[np.float64], 

1587 auto_tile_i: NDArray[np.integer], 

1588) -> tuple[dict, NDArray[np.float64]]: 

1589 """ 

1590 Remove antenna-dependent parameters (i.e. cable reflections) from the calculated gains 

1591 to reduce the bias on individual tile variation before the creation of averaged quantities 

1592 like the global bandpass. Antenna-dependent parameters are estimated from the square root of the 

1593 autocorrelation visibilities normalized via a reference tile. 

1594 

1595 Parameters 

1596 ---------- 

1597 obs : dict 

1598 Observation metadata dictionary 

1599 cal : dict 

1600 Calibration dictionary 

1601 vis_auto : NDArray[np.float64] 

1602 Data autocorrelations 

1603 auto_tile_i : NDArray[np.integer] 

1604 Index array of the tile array that have defined autocorrelations 

1605 

1606 Returns 

1607 ------- 

1608 cal : dict 

1609 Calibration dictionary with gains that have reduced antenna-dependent bias 

1610 auto_ratio : NDArray[np.float64] 

1611 The estimation of the antenna-dependent bias through the square root of the autocorrelation 

1612 visibilities normalized via a reference tile. 

1613 """ 

1614 

1615 auto_ratio = np.empty([cal["n_pol"], obs["n_freq"], obs["n_tile"]]) 

1616 # This should be possible to Vectorize if it's slow 

1617 for pol_i in range(cal["n_pol"]): 

1618 # fhd_struct_init_cal puts the ref_antenna as 1 if it's not set, which is never appears to be 

1619 v0 = vis_auto[pol_i, :, auto_tile_i[cal["ref_antenna"]]] 

1620 auto_ratio[pol_i, :, auto_tile_i] = np.sqrt( 

1621 vis_auto[pol_i, :, np.arange(auto_tile_i.size)] * weight_invert(v0) 

1622 ) 

1623 cal["gain"][pol_i] = cal["gain"][pol_i] * weight_invert(auto_ratio[pol_i]) 

1624 return cal, auto_ratio 

1625 

1626 

1627def cal_auto_ratio_remultiply( 

1628 cal: dict, auto_ratio: NDArray[np.float64], auto_tile_i: NDArray[np.integer] 

1629) -> dict: 

1630 """ 

1631 Return antenna-dependent parameters to the calculated gains after averaged quantities 

1632 have been calculated. The antenna-dependent parameters are captured via the square root of 

1633 the referenced autocorrelation visiblities calculated in cal_auto_ratio_divide. 

1634 

1635 Parameters 

1636 ---------- 

1637 obs : dict 

1638 Observation metadata dictionary 

1639 cal : dict 

1640 Calibration dictionary 

1641 auto_ratio : NDArray[np.float64] 

1642 Square root of the autocorrelation visibilities normalized via a reference tile 

1643 auto_tile_i : NDArray[np.integer] 

1644 Index array of the tile array that have defined autocorrelations 

1645 

1646 Returns 

1647 ------- 

1648 cal: dict 

1649 Calibration dictonary containing the reformed gain 

1650 """ 

1651 # Replaced for loop in remultiply, this should remultiply by the auto_ratios 

1652 cal["gain"][: cal["n_pol"], :, auto_tile_i] = cal["gain"][ 

1653 : cal["n_pol"], :, auto_tile_i 

1654 ] * np.abs(auto_ratio[: cal["n_pol"], :, auto_tile_i]) 

1655 return cal 

1656 

1657 

1658def calculate_adaptive_gain( 

1659 gain_list: NDArray[np.float64], 

1660 convergence_list: NDArray[np.float64], 

1661 iter: int, 

1662 base_gain: int | float, 

1663 final_convergence_estimate: float | None = None, 

1664): 

1665 """ 

1666 Perform a Kalman filter to calculate the best weighting to use in the next iteration of the 

1667 linear least squares fitting between the data and simulated model, which reduces the number 

1668 of total iterations till convergence. 

1669 

1670 The Kalman filter takes previous convergence values, or the percent change in calibration solution from 

1671 the previous iteration, to calculate the relative weight to give to the next calculated iteration versus 

1672 the previous iteration. For example, a gain (relative weight in this context) of 1 would give the old 

1673 solution and the new solution the same weighting, and thus the next guess in the linear least square solver 

1674 is the average of the new and old solution. A gain (relative weight) of 2 would give the new solution twice 

1675 as much weight at the old solution, and thus the next guess in the linear least square solver is the sum of 

1676 the old solution and twice the new solution, all divided by three. 

1677 

1678 Parameters 

1679 ---------- 

1680 gain_list : NDArray[np.float64] 

1681 Relative weighting between the previous iteration and new iteration in the linear least squares 

1682 solver for calibration solutions 

1683 convergence_list : NDArray[np.float64] 

1684 An array of the percent change in the calibration solutions between one iteration and the next 

1685 iter : int 

1686 The current iteration in the linear least squares solver 

1687 base_gain : int|float 

1688 The initial relative weighting between the previous iteration and new iteration in the linear 

1689 least squares solver for calibration solutions 

1690 final_convergence_estimate : float, optional 

1691 The prediction of the percent change of the previous iteration and the forward model estimate from 

1692 the Kalman filter, by default None 

1693 

1694 Returns 

1695 ------- 

1696 gain : float 

1697 Relative weighting between the previous iteration and the new iteration in the calibration 

1698 linear least squares solver 

1699 """ 

1700 if iter > 2: 

1701 # To calculate the best gain to use, compare the past gains that have been used 

1702 # with the resulting convergences to estimate the best gain to use. 

1703 # Algorithmically, this is a Kalman filter. 

1704 # If forward modeling proceeds perfectly, the convergence metric should 

1705 # asymptotically approach a final value. 

1706 # We can estimate that value from the measured changes in convergence 

1707 # weighted by the gains used in each previous iteration. 

1708 # For some applications such as calibration this may be known in advance. 

1709 # In calibration, it is expressed as the change in a 

1710 # value, in which case the final value should be zero. 

1711 if final_convergence_estimate is None: 1711 ↛ 1712line 1711 didn't jump to line 1712 because the condition on line 1711 was never true

1712 est_final_conv = np.zeros(iter - 1) 

1713 for i in range(iter - 1): 

1714 final_convergence_test = ( 

1715 (1 + gain_list[i]) * convergence_list[i + 1] - convergence_list[i] 

1716 ) / gain_list[i] 

1717 # The convergence metric is strictly positive, so if the estimated final convergence is 

1718 # less than zero, force it to zero. 

1719 est_final_conv[i] = np.max((0, final_convergence_test)) 

1720 # Because the estimate may slowly change over time, only use the most recent measurements. 

1721 final_convergence_estimate = np.median(est_final_conv[max(iter - 5, 0) :]) 

1722 last_gain = gain_list[iter - 1] 

1723 last_conv = convergence_list[iter - 2] 

1724 new_conv = convergence_list[iter - 1] 

1725 # The predicted convergence is the value we would get if the new model calculated 

1726 # in the previous iteration was perfect. Recall that the updated model that is 

1727 # actually used is the gain-weighted average of the new and old model, 

1728 # so the convergence would be similarly weighted. 

1729 predicted_conv = (final_convergence_estimate * last_gain + last_conv) / ( 

1730 base_gain + last_gain 

1731 ) 

1732 # If the measured and predicted convergence are very close, that indicates 

1733 # that our forward model is accurate and we can use a more aggressive gain 

1734 # If the measured convergence is significantly worse (or better!) than predicted, 

1735 # that indicates that the model is not converging as expected and 

1736 # we should use a more conservative gain. 

1737 delta = (predicted_conv - new_conv) / ( 

1738 (last_conv - final_convergence_estimate) / (base_gain + last_gain) 

1739 ) 

1740 new_gain = 1 - abs(delta) 

1741 # Average the gains to prevent oscillating solutions. 

1742 new_gain = (new_gain + last_gain) / 2 

1743 # For some reason base_gain can be a numpy float in testing so putting in a tuple solves this. 

1744 gain = np.max((base_gain / 2, new_gain)) 

1745 

1746 else: 

1747 gain = base_gain 

1748 gain_list[iter] = gain 

1749 

1750 return gain