Coverage for PyFHD/flagging/flagging.py: 60%

194 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 10:58 +0800

1import numpy as np 

2from numpy.typing import NDArray, ArrayLike 

3from logging import Logger 

4from PyFHD.pyfhd_tools.pyfhd_utils import idl_median, histogram 

5 

6 

7def vis_flag_tiles( 

8 obs: dict, 

9 vis_weight_arr: NDArray[np.float64], 

10 tiles_to_flag: ArrayLike, 

11 logger: Logger, 

12) -> np.ndarray: 

13 """ 

14 Flag tiles in the visibility weights array with a given array or list of tiles to flag 

15 containing the names of the tiles, NOT the indexes. 

16 

17 Parameters 

18 ---------- 

19 obs : dict 

20 The observation metadata dictionary 

21 vis_weight_arr : NDArray[np.float64] 

22 The visibility weight array 

23 tiles_to_flag : ArrayLike 

24 The tiles to flag 

25 logger : Logger 

26 PyFHD's Logger 

27 

28 Returns 

29 ------- 

30 flagged_vis_weight_arr: NDArray[np.float64] 

31 A vis_weight_arr where the tiles to flag have been set to 0 

32 """ 

33 tile_flag_list_use = np.array([], dtype=np.int64) 

34 for flag in tiles_to_flag: 

35 if type(flag) == str and flag.strip() in obs["baseline_info"]["tile_names"]: 

36 logger.info(f"Manually flagging tile {flag}") 

37 flag_idx = np.where(obs["baseline_info"]["tile_names"] == flag.strip())[0][ 

38 0 

39 ] 

40 tile_flag_list_use.append(flag_idx + 1) 

41 else: 

42 logger.warning( 

43 f"{flag} wasn't found in obs['baseline_info']['tile_names'], skipping it" 

44 ) 

45 hist_a, _, ra = histogram(obs["baseline_info"]["tile_a"], min=1) 

46 hist_b, _, rb = histogram(obs["baseline_info"]["tile_b"], min=1) 

47 hist_c, _, _ = histogram(tile_flag_list_use, min=1) 

48 # hist_A and hist_b should be the same size 

49 hist_ab = hist_a + hist_b 

50 n_bin = min(np.size(hist_ab), np.size(hist_c)) 

51 tile_cut_i = np.where((hist_ab[0:n_bin] > 0) | (hist_c[0:n_bin] > 0))[0] 

52 if np.size(tile_cut_i) > 0: 

53 for cut_idx in range(np.size(tile_cut_i)): 

54 ti = tile_cut_i[cut_idx] 

55 na = ra[ra[ti + 1] - 1] - ra[ra[ti]] 

56 if na > 0: 

57 vis_weight_arr[:, :, ra[ra[ti] : ra[ti + 1] - 1]] = 0 

58 nb = rb[rb[ti + 1] - 1] - rb[rb[ti]] 

59 if nb > 0: 

60 vis_weight_arr[:, :, rb[rb[ti] : rb[ti + 1] - 1]] = 0 

61 return vis_weight_arr 

62 

63 

64def vis_flag_basic( 

65 vis_weight_arr: NDArray[np.float64], 

66 vis_arr: NDArray[np.complex128], 

67 obs: dict, 

68 pyfhd_config: dict, 

69 logger: Logger, 

70) -> tuple[np.ndarray, dict]: 

71 """ 

72 Do some basic flagging on frequencies and tiles based on the confgiruation given by pyfhd_config 

73 such as `flag_freq_start`, `flag_freq_end`, `instrument` and `flag_tile_names`. To flag the frequencies and 

74 tiles, the arrays in `obs['baseline_info']`, *`freq_use`* and *`tile_use`* will be adjusted to 0's where 

75 the tile or frequency is flagged. The `vis_weight_arr` will be turned to 0's in the associated frequencies 

76 and tiles that have been flagged. 

77 

78 Parameters 

79 ---------- 

80 vis_weight_arr : NDArray[np.float64] 

81 The visibility weights array 

82 vis_arr : NDArray[np.complex128] 

83 The visibilities array 

84 obs : dict 

85 The observation dictionary containing the frequency and tile flag arrays 

86 params : dict 

87 _description_ 

88 pyfhd_config : dict 

89 PyFHD's configuration dictionary 

90 logger : Logger 

91 PyFHD's logger 

92 

93 Returns 

94 ------- 

95 vis_weight_arr : NDArray[np.complex128] 

96 Updated visibility weights with flagged tiles and frequencies 

97 obs : dict 

98 observation metadata dictionary containing updated frequency and tile flags 

99 """ 

100 freq_arr = obs["baseline_info"]["freq"].copy() 

101 

102 # If you wish to adjust things based on the configuration option mask_mirror_indices do that here 

103 # and add the config the pyfhd_setup. There was no description for it in FHD. 

104 

105 if pyfhd_config["flag_freq_start"]: 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 logger.info( 

107 f'Flagging frequencies less than {pyfhd_config["flag_freq_start"]}MHz' 

108 ) 

109 frequency_MHz = freq_arr / 1e6 

110 freq_start_cut = np.where(frequency_MHz < pyfhd_config["flag_freq_start"]) 

111 if np.size(freq_start_cut) > 0: 

112 vis_weight_arr[:, freq_start_cut, :] = 0 

113 if pyfhd_config["flag_freq_end"]: 113 ↛ 114line 113 didn't jump to line 114 because the condition on line 113 was never true

114 logger.info( 

115 f'Flagging frequencies more than {pyfhd_config["flag_freq_end"]}MHz' 

116 ) 

117 frequency_MHz = freq_arr / 1e6 

118 freq_end_cut = np.where(frequency_MHz > pyfhd_config["flag_freq_end"]) 

119 if np.size(freq_end_cut) > 0: 

120 vis_weight_arr[:, freq_end_cut, :] = 0 

121 # This section replaces the function vis_flag_tile 

122 if len(pyfhd_config["flag_tiles"]) > 0: 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true

123 vis_weight_arr = vis_flag_tiles( 

124 obs, vis_weight_arr, pyfhd_config["flag_tiles"], logger 

125 ) 

126 # Here I'm going to assume the mwa data you're using is more than 32 tiles 

127 # If you wish to implement flagging for mwa when it had 32 tiles, do that here 

128 # Flagging based on channel width 

129 if pyfhd_config["flag_frequencies"]: 129 ↛ 141line 129 didn't jump to line 141 because the condition on line 129 was always true

130 freq_avg = 768 // obs["n_freq"] 

131 channel_edge_flag_width = np.ceil(2 / freq_avg) 

132 coarse_channel_width = 32 // freq_avg 

133 fine_channel_i = np.arange(obs["n_freq"]) % coarse_channel_width 

134 channel_edge_flag = np.where( 

135 np.minimum(fine_channel_i, (coarse_channel_width - 1) - fine_channel_i) 

136 < channel_edge_flag_width 

137 ) 

138 if np.size(channel_edge_flag) > 0: 138 ↛ 141line 138 didn't jump to line 141 because the condition on line 138 was always true

139 vis_weight_arr[:, channel_edge_flag, :] = 0 

140 

141 tile_a_i = obs["baseline_info"]["tile_a"] - 1 

142 tile_b_i = obs["baseline_info"]["tile_b"] - 1 

143 freq_use = np.ones(obs["n_freq"], dtype=np.int64) 

144 tile_use = np.ones(obs["n_tile"], dtype=np.int64) 

145 for pol_i in range(obs["n_pol"]): 

146 baseline_flag = np.max(vis_weight_arr[pol_i], axis=0) 

147 freq_flag = np.max(vis_weight_arr[pol_i], axis=1) 

148 fi_use = np.where(freq_flag > 0) 

149 bi_use = np.where(baseline_flag > 0) 

150 

151 freq_use_temp = np.zeros(obs["n_freq"], dtype=np.int64) 

152 if np.size(fi_use) > 0: 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was always true

153 freq_use_temp[fi_use] = 1 

154 freq_use *= freq_use_temp 

155 

156 tile_use_temp = np.zeros(obs["n_tile"], dtype=np.int64) 

157 if np.size(bi_use) > 0: 157 ↛ 160line 157 didn't jump to line 160 because the condition on line 157 was always true

158 tile_use_temp[tile_a_i[bi_use]] = 1 

159 tile_use_temp[tile_b_i[bi_use]] = 1 

160 tile_use *= tile_use_temp 

161 

162 # In the case we choose to not flag any frequencies 

163 # if pre-processing has flagged frequencies, need to unflag them if the data are nonzero 

164 # (but DON'T unflag tiles that should be flagged) 

165 if not pyfhd_config["flag_frequencies"]: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true

166 freq_cut_i = np.where(freq_use == 0) 

167 if np.size(freq_cut_i) > 0: 

168 for pol_i in range(obs["n_pol"]): 

169 freq_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=-1), 0) 

170 freq_flag = np.minimum(freq_flag, 1) 

171 freq_unflag_i = np.where(freq_flag == 0)[0] 

172 if np.size(freq_unflag_i) > 0: 

173 baseline_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=0), 0) 

174 bi_use = np.where(baseline_flag > 0)[0] 

175 for fi in range(np.size(freq_unflag_i)): 

176 data_test = np.abs(vis_arr[pol_i][freq_unflag_i[fi], bi_use]) 

177 data_test = data_test.T 

178 unflag_i = np.where(data_test > 0) 

179 if np.size(unflag_i) > 0: 

180 vis_weight_arr[pol_i][ 

181 freq_unflag_i[fi], bi_use[unflag_i] 

182 ] = 1 

183 freq_use = np.ones(obs["n_freq"], dtype=np.int64) 

184 

185 # Time based flagging 

186 if np.min(obs["baseline_info"]["time_use"]) <= 0: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true

187 bin_offset = obs["baseline_info"]["bin_offset"] 

188 bin_offset = np.hstack([bin_offset, np.size(obs["baseline_info"]["tile_a"])]) 

189 time_bin = np.zeros(np.size(obs["baseline_info"]["tile_a"])) 

190 for ti in range(obs["n_time"]): 

191 if obs["baseline_info"]["time_use"][ti] <= 0: 

192 vis_weight_arr[:, :, bin_offset[ti] : bin_offset[ti + 1] - 1] = 0 

193 

194 tile_use_indexes = np.where((tile_use) & (obs["baseline_info"]["tile_use"]))[0] 

195 tile_use_zeros = np.zeros_like(obs["baseline_info"]["tile_use"]) 

196 tile_use_zeros[tile_use_indexes] = 1 

197 obs["baseline_info"]["tile_use"] = tile_use_zeros 

198 freq_use_indexes = np.where((freq_use) & (obs["baseline_info"]["freq_use"]))[0] 

199 freq_use_zeros = np.zeros_like(obs["baseline_info"]["freq_use"]) 

200 freq_use_zeros[freq_use_indexes] = 1 

201 obs["baseline_info"]["freq_use"] = freq_use_zeros 

202 

203 obs["n_time_flag"] = np.count_nonzero(obs["baseline_info"]["time_use"] == 0) 

204 obs["n_tile_flag"] = np.count_nonzero(obs["baseline_info"]["tile_use"] == 0) 

205 obs["n_freq_flag"] = np.count_nonzero(obs["baseline_info"]["freq_use"] == 0) 

206 

207 if np.max(vis_weight_arr) == 0: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 raise ValueError("All data has been flagged, check your flagging parameters") 

209 

210 return vis_weight_arr, obs 

211 

212 

213def vis_flag( 

214 vis_arr: NDArray[np.complex128], 

215 vis_weights: NDArray[np.float64], 

216 obs: dict, 

217 params: dict, 

218 logger: Logger, 

219) -> tuple[np.ndarray, dict]: 

220 """ 

221 Flag frequencies, times, and tiles which are 3 sigma away from the median 

222 of the visibility amplitudes, along with baselines which are outside the 

223 supplied minimum and maximum baseline distances. 

224 

225 Parameters 

226 ---------- 

227 vis_arr : NDArray[np.complex128] 

228 The visibility array 

229 vis_weights : NDArray[np.float64] 

230 The visibility weights array 

231 obs : dict 

232 The observation dictionary 

233 params : dict 

234 The dictionary containing uu, vv, ww 

235 logger : Logger 

236 PyFHD's Logger 

237 

238 Returns 

239 ------- 

240 vis_weights : np.ndarray 

241 Flagged vis_weights 

242 obs : dict 

243 Flagged tiles and frequencies observation metadata dictionary 

244 """ 

245 flag_nsigma = 3 

246 data_abs = np.abs(vis_arr[0]) 

247 if obs["n_pol"] > 1: 247 ↛ 249line 247 didn't jump to line 249 because the condition on line 247 was always true

248 data_abs = np.sqrt(data_abs**2 + np.abs(vis_arr[1]) ** 2) 

249 n_tiles_use = max( 

250 np.max(obs["baseline_info"]["tile_a"]), np.max(obs["baseline_info"]["tile_b"]) 

251 ) 

252 uv_dist = np.sqrt(params["uu"] ** 2 + params["vv"] ** 2) * idl_median( 

253 obs["baseline_info"]["freq"] 

254 ) 

255 cut_baselines_i = np.where( 

256 (uv_dist < obs["min_baseline"]) | (uv_dist > obs["max_baseline"]) 

257 )[0] 

258 if cut_baselines_i.size > 0: 258 ↛ 261line 258 didn't jump to line 261 because the condition on line 258 was always true

259 vis_weights[: obs["n_pol"], :, cut_baselines_i] = 0 

260 

261 tile_fom = np.zeros(n_tiles_use, dtype=np.float64) 

262 for tile_i in range(n_tiles_use): 

263 tile_ai = np.where((obs["baseline_info"]["tile_a"] - 1) == tile_i) 

264 tile_bi = np.where((obs["baseline_info"]["tile_b"] - 1) == tile_i) 

265 if tile_ai[0].size == 0 and tile_bi[0].size == 0: 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true

266 continue 

267 if tile_bi[0].size > 0: 267 ↛ 273line 267 didn't jump to line 273 because the condition on line 267 was always true

268 if tile_ai[0].size > 0: 268 ↛ 271line 268 didn't jump to line 271 because the condition on line 268 was always true

269 tile_abi = np.hstack([tile_ai[0], tile_bi[0]]) 

270 else: 

271 tile_abi = tile_bi 

272 else: 

273 tile_abi = tile_ai 

274 data_subset = data_abs[:, tile_abi] 

275 for pol_i in range(min(obs["n_pol"], 2)): 

276 # Dunno why this needs two separate indexes but it works so leave it 

277 i_use = np.where((vis_weights[pol_i][:, tile_abi] > 0) & (data_subset > 0)) 

278 if i_use[0].size > 10: 

279 tile_fom[tile_i] += np.std(data_subset[i_use]) 

280 

281 freq_fom = np.zeros(obs["n_freq"]) 

282 for freq_i in range(obs["n_freq"]): 

283 data_subset = data_abs[freq_i, :] 

284 for pol_i in range(min(obs["n_pol"], 2)): 

285 i_use = np.where((vis_weights[pol_i, freq_i, :] > 0) & (data_subset > 0)) 

286 if i_use[0].size > 10: 

287 freq_fom[freq_i] += np.std(data_subset[i_use]) 

288 

289 freq_nonzero = np.nonzero(freq_fom)[0] 

290 tile_nonzero = np.nonzero(tile_fom) 

291 tile_mean = idl_median(tile_fom[tile_nonzero]) 

292 tile_dev = np.std(tile_fom[tile_nonzero]) 

293 # IDL Median with WIDTH set is doing a median_filter with the size indicating the kernel size 

294 freq_mean1 = idl_median(freq_fom[freq_nonzero], width=obs["n_freq"] // 20) 

295 freq_mean = np.zeros(obs["n_freq"]) 

296 freq_mean[freq_nonzero] = freq_mean1 

297 freq_dev = np.std(freq_fom[freq_nonzero] - freq_mean[freq_nonzero]) 

298 

299 # We actually want the complements of the where from IDL translation, adjusted conditions accordingly 

300 tile_cut0 = np.where( 

301 (np.abs(tile_mean - tile_fom) <= 2 * flag_nsigma * tile_dev) | (tile_fom != 0) 

302 )[0] 

303 freq_cut0 = np.where( 

304 (np.abs(freq_mean - freq_fom) <= 2 * flag_nsigma * freq_dev) | (freq_fom != 0) 

305 )[0] 

306 tile_mean2 = idl_median(tile_fom[tile_cut0]) 

307 tile_dev2 = np.std(tile_fom[tile_cut0]) 

308 freq_mean2 = idl_median(freq_fom, width=obs["n_freq"] // 20) 

309 freq_dev2 = np.std((freq_fom - freq_mean2)[freq_cut0]) 

310 # Currently assuming tile_cut and freq_cut are 1D 

311 tile_cut = np.where( 

312 (np.abs(tile_mean2 - tile_fom) > (flag_nsigma * tile_dev2)) | (tile_fom == 0) 

313 )[0] 

314 freq_cut = np.where( 

315 (np.abs(freq_mean2 - freq_fom) > (flag_nsigma * freq_dev2)) | (freq_fom == 0) 

316 )[0] 

317 

318 if tile_cut.size > 0: 318 ↛ 328line 318 didn't jump to line 328 because the condition on line 318 was always true

319 logger.info(f"Tiles Cut: {obs['baseline_info']['tile_names'][tile_cut]}") 

320 for bad_i in range(tile_cut.size): 

321 cut_a_i = np.where(obs["baseline_info"]["tile_a"] == tile_cut[bad_i] + 1) 

322 cut_b_i = np.where(obs["baseline_info"]["tile_b"] == tile_cut[bad_i] + 1) 

323 if cut_a_i[0].size > 0: 323 ↛ 325line 323 didn't jump to line 325 because the condition on line 323 was always true

324 vis_weights[0 : obs["n_pol"], :, cut_a_i] = 0 

325 if cut_b_i[0].size > 0: 325 ↛ 320line 325 didn't jump to line 320 because the condition on line 325 was always true

326 vis_weights[0 : obs["n_pol"], :, cut_b_i] = 0 

327 obs["baseline_info"]["tile_use"][tile_cut] = 0 

328 if freq_cut.size > 0: 328 ↛ 332line 328 didn't jump to line 332 because the condition on line 328 was always true

329 vis_weights[0 : obs["n_pol"], freq_cut] = 0 

330 obs["baseline_info"]["freq_use"][freq_cut] = 0 

331 

332 bin_offset = np.append( 

333 obs["baseline_info"]["bin_offset"], obs["baseline_info"]["tile_a"].size 

334 ) 

335 time_bin = np.zeros(obs["baseline_info"]["tile_a"].size) 

336 for ti in range(len(obs["baseline_info"]["time_use"])): 

337 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti 

338 time_fom = np.zeros(obs["baseline_info"]["time_use"].size) 

339 for ti in range(len(obs["baseline_info"]["time_use"])): 

340 data_subset = data_abs[:, bin_offset[ti] : bin_offset[ti + 1]] 

341 for pol_i in range(min(obs["n_pol"], 2)): 

342 i_use = np.where( 

343 vis_weights[pol_i, :, bin_offset[ti] : bin_offset[ti + 1]] > 0 

344 ) 

345 if i_use[0].size > 10: 345 ↛ 341line 345 didn't jump to line 341 because the condition on line 345 was always true

346 time_fom[ti] += np.std(data_subset[i_use]) 

347 time_nonzero = np.nonzero(time_fom) 

348 time_mean = idl_median(time_fom[time_nonzero]) 

349 time_dev = np.std(time_fom[time_nonzero]) 

350 time_cut0 = np.where( 

351 (np.abs(time_mean - time_fom) <= 2 * flag_nsigma * time_dev) | (time_fom != 0) 

352 ) 

353 time_mean2 = idl_median(time_fom[time_cut0]) 

354 time_dev2 = np.std(time_fom[time_cut0]) 

355 time_cut = np.where( 

356 (np.abs(time_mean2 - time_fom) > 2 * flag_nsigma * time_dev2) | (time_fom == 0) 

357 )[0] 

358 for ti in range(time_cut.size): 358 ↛ 359line 358 didn't jump to line 359 because the loop on line 358 never started

359 ti_cut = np.where(time_bin == time_cut[ti]) 

360 if ti_cut.size > 0: 

361 vis_weights[: obs["n_pol"], :, ti_cut] = 0 

362 

363 obs["n_vis"] = (np.where(vis_weights[0] > 0)[0]).size 

364 

365 return vis_weights, obs