Coverage for PyFHD/flagging/flagging.py: 60%

1import numpy as np

2from numpy.typing import NDArray, ArrayLike

3from logging import Logger

4from PyFHD.pyfhd_tools.pyfhd_utils import idl_median, histogram

7def vis_flag_tiles(

8 obs: dict,

9 vis_weight_arr: NDArray[np.float64],

10 tiles_to_flag: ArrayLike,

11 logger: Logger,

12) -> np.ndarray:

13 """

14 Flag tiles in the visibility weights array with a given array or list of tiles to flag

15 containing the names of the tiles, NOT the indexes.

17 Parameters

18 ----------

19 obs : dict

20 The observation metadata dictionary

21 vis_weight_arr : NDArray[np.float64]

22 The visibility weight array

23 tiles_to_flag : ArrayLike

24 The tiles to flag

25 logger : Logger

26 PyFHD's Logger

28 Returns

29 -------

30 flagged_vis_weight_arr: NDArray[np.float64]

31 A vis_weight_arr where the tiles to flag have been set to 0

32 """

33 tile_flag_list_use = np.array([], dtype=np.int64)

34 for flag in tiles_to_flag:

35 if type(flag) == str and flag.strip() in obs["baseline_info"]["tile_names"]:

36 logger.info(f"Manually flagging tile {flag}")

37 flag_idx = np.where(obs["baseline_info"]["tile_names"] == flag.strip())[0][

38 0

39 ]

40 tile_flag_list_use.append(flag_idx + 1)

41 else:

42 logger.warning(

43 f"{flag} wasn't found in obs['baseline_info']['tile_names'], skipping it"

44 )

45 hist_a, _, ra = histogram(obs["baseline_info"]["tile_a"], min=1)

46 hist_b, _, rb = histogram(obs["baseline_info"]["tile_b"], min=1)

47 hist_c, _, _ = histogram(tile_flag_list_use, min=1)

48 # hist_A and hist_b should be the same size

49 hist_ab = hist_a + hist_b

50 n_bin = min(np.size(hist_ab), np.size(hist_c))

51 tile_cut_i = np.where((hist_ab[0:n_bin] > 0) | (hist_c[0:n_bin] > 0))[0]

52 if np.size(tile_cut_i) > 0:

53 for cut_idx in range(np.size(tile_cut_i)):

54 ti = tile_cut_i[cut_idx]

55 na = ra[ra[ti + 1] - 1] - ra[ra[ti]]

56 if na > 0:

57 vis_weight_arr[:, :, ra[ra[ti] : ra[ti + 1] - 1]] = 0

58 nb = rb[rb[ti + 1] - 1] - rb[rb[ti]]

59 if nb > 0:

60 vis_weight_arr[:, :, rb[rb[ti] : rb[ti + 1] - 1]] = 0

61 return vis_weight_arr

64def vis_flag_basic(

65 vis_weight_arr: NDArray[np.float64],

66 vis_arr: NDArray[np.complex128],

67 obs: dict,

68 pyfhd_config: dict,

69 logger: Logger,

70) -> tuple[np.ndarray, dict]:

71 """

72 Do some basic flagging on frequencies and tiles based on the confgiruation given by pyfhd_config

73 such as `flag_freq_start`, `flag_freq_end`, `instrument` and `flag_tile_names`. To flag the frequencies and

74 tiles, the arrays in `obs['baseline_info']`, *`freq_use`* and *`tile_use`* will be adjusted to 0's where

75 the tile or frequency is flagged. The `vis_weight_arr` will be turned to 0's in the associated frequencies

76 and tiles that have been flagged.

78 Parameters

79 ----------

80 vis_weight_arr : NDArray[np.float64]

81 The visibility weights array

82 vis_arr : NDArray[np.complex128]

83 The visibilities array

84 obs : dict

85 The observation dictionary containing the frequency and tile flag arrays

86 params : dict

87 _description_

88 pyfhd_config : dict

89 PyFHD's configuration dictionary

90 logger : Logger

91 PyFHD's logger

93 Returns

94 -------

95 vis_weight_arr : NDArray[np.complex128]

96 Updated visibility weights with flagged tiles and frequencies

97 obs : dict

98 observation metadata dictionary containing updated frequency and tile flags

99 """

100 freq_arr = obs["baseline_info"]["freq"].copy()

101

102 # If you wish to adjust things based on the configuration option mask_mirror_indices do that here

103 # and add the config the pyfhd_setup. There was no description for it in FHD.

104

105 if pyfhd_config["flag_freq_start"]: 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 logger.info(

107 f'Flagging frequencies less than {pyfhd_config["flag_freq_start"]}MHz'

108 )

109 frequency_MHz = freq_arr / 1e6

110 freq_start_cut = np.where(frequency_MHz < pyfhd_config["flag_freq_start"])

111 if np.size(freq_start_cut) > 0:

112 vis_weight_arr[:, freq_start_cut, :] = 0

113 if pyfhd_config["flag_freq_end"]: 113 ↛ 114line 113 didn't jump to line 114 because the condition on line 113 was never true

114 logger.info(

115 f'Flagging frequencies more than {pyfhd_config["flag_freq_end"]}MHz'

116 )

117 frequency_MHz = freq_arr / 1e6

118 freq_end_cut = np.where(frequency_MHz > pyfhd_config["flag_freq_end"])

119 if np.size(freq_end_cut) > 0:

120 vis_weight_arr[:, freq_end_cut, :] = 0

121 # This section replaces the function vis_flag_tile

122 if len(pyfhd_config["flag_tiles"]) > 0: 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true

123 vis_weight_arr = vis_flag_tiles(

124 obs, vis_weight_arr, pyfhd_config["flag_tiles"], logger

125 )

126 # Here I'm going to assume the mwa data you're using is more than 32 tiles

127 # If you wish to implement flagging for mwa when it had 32 tiles, do that here

128 # Flagging based on channel width

129 if pyfhd_config["flag_frequencies"]: 129 ↛ 141line 129 didn't jump to line 141 because the condition on line 129 was always true

130 freq_avg = 768 // obs["n_freq"]

131 channel_edge_flag_width = np.ceil(2 / freq_avg)

132 coarse_channel_width = 32 // freq_avg

133 fine_channel_i = np.arange(obs["n_freq"]) % coarse_channel_width

134 channel_edge_flag = np.where(

135 np.minimum(fine_channel_i, (coarse_channel_width - 1) - fine_channel_i)

136 < channel_edge_flag_width

137 )

138 if np.size(channel_edge_flag) > 0: 138 ↛ 141line 138 didn't jump to line 141 because the condition on line 138 was always true

139 vis_weight_arr[:, channel_edge_flag, :] = 0

140

141 tile_a_i = obs["baseline_info"]["tile_a"] - 1

142 tile_b_i = obs["baseline_info"]["tile_b"] - 1

143 freq_use = np.ones(obs["n_freq"], dtype=np.int64)

144 tile_use = np.ones(obs["n_tile"], dtype=np.int64)

145 for pol_i in range(obs["n_pol"]):

146 baseline_flag = np.max(vis_weight_arr[pol_i], axis=0)

147 freq_flag = np.max(vis_weight_arr[pol_i], axis=1)

148 fi_use = np.where(freq_flag > 0)

149 bi_use = np.where(baseline_flag > 0)

150

151 freq_use_temp = np.zeros(obs["n_freq"], dtype=np.int64)

152 if np.size(fi_use) > 0: 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was always true

153 freq_use_temp[fi_use] = 1

154 freq_use *= freq_use_temp

155

156 tile_use_temp = np.zeros(obs["n_tile"], dtype=np.int64)

157 if np.size(bi_use) > 0: 157 ↛ 160line 157 didn't jump to line 160 because the condition on line 157 was always true

158 tile_use_temp[tile_a_i[bi_use]] = 1

159 tile_use_temp[tile_b_i[bi_use]] = 1

160 tile_use *= tile_use_temp

161

162 # In the case we choose to not flag any frequencies

163 # if pre-processing has flagged frequencies, need to unflag them if the data are nonzero

164 # (but DON'T unflag tiles that should be flagged)

165 if not pyfhd_config["flag_frequencies"]: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true

166 freq_cut_i = np.where(freq_use == 0)

167 if np.size(freq_cut_i) > 0:

168 for pol_i in range(obs["n_pol"]):

169 freq_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=-1), 0)

170 freq_flag = np.minimum(freq_flag, 1)

171 freq_unflag_i = np.where(freq_flag == 0)[0]

172 if np.size(freq_unflag_i) > 0:

173 baseline_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=0), 0)

174 bi_use = np.where(baseline_flag > 0)[0]

175 for fi in range(np.size(freq_unflag_i)):

176 data_test = np.abs(vis_arr[pol_i][freq_unflag_i[fi], bi_use])

177 data_test = data_test.T

178 unflag_i = np.where(data_test > 0)

179 if np.size(unflag_i) > 0:

180 vis_weight_arr[pol_i][

181 freq_unflag_i[fi], bi_use[unflag_i]

182 ] = 1

183 freq_use = np.ones(obs["n_freq"], dtype=np.int64)

184

185 # Time based flagging

186 if np.min(obs["baseline_info"]["time_use"]) <= 0: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true

187 bin_offset = obs["baseline_info"]["bin_offset"]

188 bin_offset = np.hstack([bin_offset, np.size(obs["baseline_info"]["tile_a"])])

189 time_bin = np.zeros(np.size(obs["baseline_info"]["tile_a"]))

190 for ti in range(obs["n_time"]):

191 if obs["baseline_info"]["time_use"][ti] <= 0:

192 vis_weight_arr[:, :, bin_offset[ti] : bin_offset[ti + 1] - 1] = 0

193

194 tile_use_indexes = np.where((tile_use) & (obs["baseline_info"]["tile_use"]))[0]

195 tile_use_zeros = np.zeros_like(obs["baseline_info"]["tile_use"])

196 tile_use_zeros[tile_use_indexes] = 1

197 obs["baseline_info"]["tile_use"] = tile_use_zeros

198 freq_use_indexes = np.where((freq_use) & (obs["baseline_info"]["freq_use"]))[0]

199 freq_use_zeros = np.zeros_like(obs["baseline_info"]["freq_use"])

200 freq_use_zeros[freq_use_indexes] = 1

201 obs["baseline_info"]["freq_use"] = freq_use_zeros

202

203 obs["n_time_flag"] = np.count_nonzero(obs["baseline_info"]["time_use"] == 0)

204 obs["n_tile_flag"] = np.count_nonzero(obs["baseline_info"]["tile_use"] == 0)

205 obs["n_freq_flag"] = np.count_nonzero(obs["baseline_info"]["freq_use"] == 0)

206

207 if np.max(vis_weight_arr) == 0: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 raise ValueError("All data has been flagged, check your flagging parameters")

209

210 return vis_weight_arr, obs

211

212

213def vis_flag(

214 vis_arr: NDArray[np.complex128],

215 vis_weights: NDArray[np.float64],

216 obs: dict,

217 params: dict,

218 logger: Logger,

219) -> tuple[np.ndarray, dict]:

220 """

221 Flag frequencies, times, and tiles which are 3 sigma away from the median

222 of the visibility amplitudes, along with baselines which are outside the

223 supplied minimum and maximum baseline distances.

224

225 Parameters

226 ----------

227 vis_arr : NDArray[np.complex128]

228 The visibility array

229 vis_weights : NDArray[np.float64]

230 The visibility weights array

231 obs : dict

232 The observation dictionary

233 params : dict

234 The dictionary containing uu, vv, ww

235 logger : Logger

236 PyFHD's Logger

237

238 Returns

239 -------

240 vis_weights : np.ndarray

241 Flagged vis_weights

242 obs : dict

243 Flagged tiles and frequencies observation metadata dictionary

244 """

245 flag_nsigma = 3

246 data_abs = np.abs(vis_arr[0])

247 if obs["n_pol"] > 1: 247 ↛ 249line 247 didn't jump to line 249 because the condition on line 247 was always true

248 data_abs = np.sqrt(data_abs**2 + np.abs(vis_arr[1]) ** 2)

249 n_tiles_use = max(

250 np.max(obs["baseline_info"]["tile_a"]), np.max(obs["baseline_info"]["tile_b"])

251 )

252 uv_dist = np.sqrt(params["uu"] ** 2 + params["vv"] ** 2) * idl_median(

253 obs["baseline_info"]["freq"]

254 )

255 cut_baselines_i = np.where(

256 (uv_dist < obs["min_baseline"]) | (uv_dist > obs["max_baseline"])

257 )[0]

258 if cut_baselines_i.size > 0: 258 ↛ 261line 258 didn't jump to line 261 because the condition on line 258 was always true

259 vis_weights[: obs["n_pol"], :, cut_baselines_i] = 0

260

261 tile_fom = np.zeros(n_tiles_use, dtype=np.float64)

262 for tile_i in range(n_tiles_use):

263 tile_ai = np.where((obs["baseline_info"]["tile_a"] - 1) == tile_i)

264 tile_bi = np.where((obs["baseline_info"]["tile_b"] - 1) == tile_i)

265 if tile_ai[0].size == 0 and tile_bi[0].size == 0: 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true

266 continue

267 if tile_bi[0].size > 0: 267 ↛ 273line 267 didn't jump to line 273 because the condition on line 267 was always true

268 if tile_ai[0].size > 0: 268 ↛ 271line 268 didn't jump to line 271 because the condition on line 268 was always true

269 tile_abi = np.hstack([tile_ai[0], tile_bi[0]])

270 else:

271 tile_abi = tile_bi

272 else:

273 tile_abi = tile_ai

274 data_subset = data_abs[:, tile_abi]

275 for pol_i in range(min(obs["n_pol"], 2)):

276 # Dunno why this needs two separate indexes but it works so leave it

277 i_use = np.where((vis_weights[pol_i][:, tile_abi] > 0) & (data_subset > 0))

278 if i_use[0].size > 10:

279 tile_fom[tile_i] += np.std(data_subset[i_use])

280

281 freq_fom = np.zeros(obs["n_freq"])

282 for freq_i in range(obs["n_freq"]):

283 data_subset = data_abs[freq_i, :]

284 for pol_i in range(min(obs["n_pol"], 2)):

285 i_use = np.where((vis_weights[pol_i, freq_i, :] > 0) & (data_subset > 0))

286 if i_use[0].size > 10:

287 freq_fom[freq_i] += np.std(data_subset[i_use])

288

289 freq_nonzero = np.nonzero(freq_fom)[0]

290 tile_nonzero = np.nonzero(tile_fom)

291 tile_mean = idl_median(tile_fom[tile_nonzero])

292 tile_dev = np.std(tile_fom[tile_nonzero])

293 # IDL Median with WIDTH set is doing a median_filter with the size indicating the kernel size

294 freq_mean1 = idl_median(freq_fom[freq_nonzero], width=obs["n_freq"] // 20)

295 freq_mean = np.zeros(obs["n_freq"])

296 freq_mean[freq_nonzero] = freq_mean1

297 freq_dev = np.std(freq_fom[freq_nonzero] - freq_mean[freq_nonzero])

298

299 # We actually want the complements of the where from IDL translation, adjusted conditions accordingly

300 tile_cut0 = np.where(

301 (np.abs(tile_mean - tile_fom) <= 2 * flag_nsigma * tile_dev) | (tile_fom != 0)

302 )[0]

303 freq_cut0 = np.where(

304 (np.abs(freq_mean - freq_fom) <= 2 * flag_nsigma * freq_dev) | (freq_fom != 0)

305 )[0]

306 tile_mean2 = idl_median(tile_fom[tile_cut0])

307 tile_dev2 = np.std(tile_fom[tile_cut0])

308 freq_mean2 = idl_median(freq_fom, width=obs["n_freq"] // 20)

309 freq_dev2 = np.std((freq_fom - freq_mean2)[freq_cut0])

310 # Currently assuming tile_cut and freq_cut are 1D

311 tile_cut = np.where(

312 (np.abs(tile_mean2 - tile_fom) > (flag_nsigma * tile_dev2)) | (tile_fom == 0)

313 )[0]

314 freq_cut = np.where(

315 (np.abs(freq_mean2 - freq_fom) > (flag_nsigma * freq_dev2)) | (freq_fom == 0)

316 )[0]

317

318 if tile_cut.size > 0: 318 ↛ 328line 318 didn't jump to line 328 because the condition on line 318 was always true

319 logger.info(f"Tiles Cut: {obs['baseline_info']['tile_names'][tile_cut]}")

320 for bad_i in range(tile_cut.size):

321 cut_a_i = np.where(obs["baseline_info"]["tile_a"] == tile_cut[bad_i] + 1)

322 cut_b_i = np.where(obs["baseline_info"]["tile_b"] == tile_cut[bad_i] + 1)

323 if cut_a_i[0].size > 0: 323 ↛ 325line 323 didn't jump to line 325 because the condition on line 323 was always true

324 vis_weights[0 : obs["n_pol"], :, cut_a_i] = 0

325 if cut_b_i[0].size > 0: 325 ↛ 320line 325 didn't jump to line 320 because the condition on line 325 was always true

326 vis_weights[0 : obs["n_pol"], :, cut_b_i] = 0

327 obs["baseline_info"]["tile_use"][tile_cut] = 0

328 if freq_cut.size > 0: 328 ↛ 332line 328 didn't jump to line 332 because the condition on line 328 was always true

329 vis_weights[0 : obs["n_pol"], freq_cut] = 0

330 obs["baseline_info"]["freq_use"][freq_cut] = 0

331

332 bin_offset = np.append(

333 obs["baseline_info"]["bin_offset"], obs["baseline_info"]["tile_a"].size

334 )

335 time_bin = np.zeros(obs["baseline_info"]["tile_a"].size)

336 for ti in range(len(obs["baseline_info"]["time_use"])):

337 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti

338 time_fom = np.zeros(obs["baseline_info"]["time_use"].size)

339 for ti in range(len(obs["baseline_info"]["time_use"])):

340 data_subset = data_abs[:, bin_offset[ti] : bin_offset[ti + 1]]

341 for pol_i in range(min(obs["n_pol"], 2)):

342 i_use = np.where(

343 vis_weights[pol_i, :, bin_offset[ti] : bin_offset[ti + 1]] > 0

344 )

345 if i_use[0].size > 10: 345 ↛ 341line 345 didn't jump to line 341 because the condition on line 345 was always true

346 time_fom[ti] += np.std(data_subset[i_use])

347 time_nonzero = np.nonzero(time_fom)

348 time_mean = idl_median(time_fom[time_nonzero])

349 time_dev = np.std(time_fom[time_nonzero])

350 time_cut0 = np.where(

351 (np.abs(time_mean - time_fom) <= 2 * flag_nsigma * time_dev) | (time_fom != 0)

352 )

353 time_mean2 = idl_median(time_fom[time_cut0])

354 time_dev2 = np.std(time_fom[time_cut0])

355 time_cut = np.where(

356 (np.abs(time_mean2 - time_fom) > 2 * flag_nsigma * time_dev2) | (time_fom == 0)

357 )[0]

358 for ti in range(time_cut.size): 358 ↛ 359line 358 didn't jump to line 359 because the loop on line 358 never started

359 ti_cut = np.where(time_bin == time_cut[ti])

360 if ti_cut.size > 0:

361 vis_weights[: obs["n_pol"], :, ti_cut] = 0

362

363 obs["n_vis"] = (np.where(vis_weights[0] > 0)[0]).size

364

365 return vis_weights, obs