Coverage for PyFHD/flagging/flagging.py: 60%
194 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 10:58 +0800
1import numpy as np
2from numpy.typing import NDArray, ArrayLike
3from logging import Logger
4from PyFHD.pyfhd_tools.pyfhd_utils import idl_median, histogram
7def vis_flag_tiles(
8 obs: dict,
9 vis_weight_arr: NDArray[np.float64],
10 tiles_to_flag: ArrayLike,
11 logger: Logger,
12) -> np.ndarray:
13 """
14 Flag tiles in the visibility weights array with a given array or list of tiles to flag
15 containing the names of the tiles, NOT the indexes.
17 Parameters
18 ----------
19 obs : dict
20 The observation metadata dictionary
21 vis_weight_arr : NDArray[np.float64]
22 The visibility weight array
23 tiles_to_flag : ArrayLike
24 The tiles to flag
25 logger : Logger
26 PyFHD's Logger
28 Returns
29 -------
30 flagged_vis_weight_arr: NDArray[np.float64]
31 A vis_weight_arr where the tiles to flag have been set to 0
32 """
33 tile_flag_list_use = np.array([], dtype=np.int64)
34 for flag in tiles_to_flag:
35 if type(flag) == str and flag.strip() in obs["baseline_info"]["tile_names"]:
36 logger.info(f"Manually flagging tile {flag}")
37 flag_idx = np.where(obs["baseline_info"]["tile_names"] == flag.strip())[0][
38 0
39 ]
40 tile_flag_list_use.append(flag_idx + 1)
41 else:
42 logger.warning(
43 f"{flag} wasn't found in obs['baseline_info']['tile_names'], skipping it"
44 )
45 hist_a, _, ra = histogram(obs["baseline_info"]["tile_a"], min=1)
46 hist_b, _, rb = histogram(obs["baseline_info"]["tile_b"], min=1)
47 hist_c, _, _ = histogram(tile_flag_list_use, min=1)
48 # hist_A and hist_b should be the same size
49 hist_ab = hist_a + hist_b
50 n_bin = min(np.size(hist_ab), np.size(hist_c))
51 tile_cut_i = np.where((hist_ab[0:n_bin] > 0) | (hist_c[0:n_bin] > 0))[0]
52 if np.size(tile_cut_i) > 0:
53 for cut_idx in range(np.size(tile_cut_i)):
54 ti = tile_cut_i[cut_idx]
55 na = ra[ra[ti + 1] - 1] - ra[ra[ti]]
56 if na > 0:
57 vis_weight_arr[:, :, ra[ra[ti] : ra[ti + 1] - 1]] = 0
58 nb = rb[rb[ti + 1] - 1] - rb[rb[ti]]
59 if nb > 0:
60 vis_weight_arr[:, :, rb[rb[ti] : rb[ti + 1] - 1]] = 0
61 return vis_weight_arr
64def vis_flag_basic(
65 vis_weight_arr: NDArray[np.float64],
66 vis_arr: NDArray[np.complex128],
67 obs: dict,
68 pyfhd_config: dict,
69 logger: Logger,
70) -> tuple[np.ndarray, dict]:
71 """
72 Do some basic flagging on frequencies and tiles based on the confgiruation given by pyfhd_config
73 such as `flag_freq_start`, `flag_freq_end`, `instrument` and `flag_tile_names`. To flag the frequencies and
74 tiles, the arrays in `obs['baseline_info']`, *`freq_use`* and *`tile_use`* will be adjusted to 0's where
75 the tile or frequency is flagged. The `vis_weight_arr` will be turned to 0's in the associated frequencies
76 and tiles that have been flagged.
78 Parameters
79 ----------
80 vis_weight_arr : NDArray[np.float64]
81 The visibility weights array
82 vis_arr : NDArray[np.complex128]
83 The visibilities array
84 obs : dict
85 The observation dictionary containing the frequency and tile flag arrays
86 params : dict
87 _description_
88 pyfhd_config : dict
89 PyFHD's configuration dictionary
90 logger : Logger
91 PyFHD's logger
93 Returns
94 -------
95 vis_weight_arr : NDArray[np.complex128]
96 Updated visibility weights with flagged tiles and frequencies
97 obs : dict
98 observation metadata dictionary containing updated frequency and tile flags
99 """
100 freq_arr = obs["baseline_info"]["freq"].copy()
102 # If you wish to adjust things based on the configuration option mask_mirror_indices do that here
103 # and add the config the pyfhd_setup. There was no description for it in FHD.
105 if pyfhd_config["flag_freq_start"]: 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true
106 logger.info(
107 f'Flagging frequencies less than {pyfhd_config["flag_freq_start"]}MHz'
108 )
109 frequency_MHz = freq_arr / 1e6
110 freq_start_cut = np.where(frequency_MHz < pyfhd_config["flag_freq_start"])
111 if np.size(freq_start_cut) > 0:
112 vis_weight_arr[:, freq_start_cut, :] = 0
113 if pyfhd_config["flag_freq_end"]: 113 ↛ 114line 113 didn't jump to line 114 because the condition on line 113 was never true
114 logger.info(
115 f'Flagging frequencies more than {pyfhd_config["flag_freq_end"]}MHz'
116 )
117 frequency_MHz = freq_arr / 1e6
118 freq_end_cut = np.where(frequency_MHz > pyfhd_config["flag_freq_end"])
119 if np.size(freq_end_cut) > 0:
120 vis_weight_arr[:, freq_end_cut, :] = 0
121 # This section replaces the function vis_flag_tile
122 if len(pyfhd_config["flag_tiles"]) > 0: 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true
123 vis_weight_arr = vis_flag_tiles(
124 obs, vis_weight_arr, pyfhd_config["flag_tiles"], logger
125 )
126 # Here I'm going to assume the mwa data you're using is more than 32 tiles
127 # If you wish to implement flagging for mwa when it had 32 tiles, do that here
128 # Flagging based on channel width
129 if pyfhd_config["flag_frequencies"]: 129 ↛ 141line 129 didn't jump to line 141 because the condition on line 129 was always true
130 freq_avg = 768 // obs["n_freq"]
131 channel_edge_flag_width = np.ceil(2 / freq_avg)
132 coarse_channel_width = 32 // freq_avg
133 fine_channel_i = np.arange(obs["n_freq"]) % coarse_channel_width
134 channel_edge_flag = np.where(
135 np.minimum(fine_channel_i, (coarse_channel_width - 1) - fine_channel_i)
136 < channel_edge_flag_width
137 )
138 if np.size(channel_edge_flag) > 0: 138 ↛ 141line 138 didn't jump to line 141 because the condition on line 138 was always true
139 vis_weight_arr[:, channel_edge_flag, :] = 0
141 tile_a_i = obs["baseline_info"]["tile_a"] - 1
142 tile_b_i = obs["baseline_info"]["tile_b"] - 1
143 freq_use = np.ones(obs["n_freq"], dtype=np.int64)
144 tile_use = np.ones(obs["n_tile"], dtype=np.int64)
145 for pol_i in range(obs["n_pol"]):
146 baseline_flag = np.max(vis_weight_arr[pol_i], axis=0)
147 freq_flag = np.max(vis_weight_arr[pol_i], axis=1)
148 fi_use = np.where(freq_flag > 0)
149 bi_use = np.where(baseline_flag > 0)
151 freq_use_temp = np.zeros(obs["n_freq"], dtype=np.int64)
152 if np.size(fi_use) > 0: 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was always true
153 freq_use_temp[fi_use] = 1
154 freq_use *= freq_use_temp
156 tile_use_temp = np.zeros(obs["n_tile"], dtype=np.int64)
157 if np.size(bi_use) > 0: 157 ↛ 160line 157 didn't jump to line 160 because the condition on line 157 was always true
158 tile_use_temp[tile_a_i[bi_use]] = 1
159 tile_use_temp[tile_b_i[bi_use]] = 1
160 tile_use *= tile_use_temp
162 # In the case we choose to not flag any frequencies
163 # if pre-processing has flagged frequencies, need to unflag them if the data are nonzero
164 # (but DON'T unflag tiles that should be flagged)
165 if not pyfhd_config["flag_frequencies"]: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true
166 freq_cut_i = np.where(freq_use == 0)
167 if np.size(freq_cut_i) > 0:
168 for pol_i in range(obs["n_pol"]):
169 freq_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=-1), 0)
170 freq_flag = np.minimum(freq_flag, 1)
171 freq_unflag_i = np.where(freq_flag == 0)[0]
172 if np.size(freq_unflag_i) > 0:
173 baseline_flag = np.maximum(np.max(vis_weight_arr[pol_i], axis=0), 0)
174 bi_use = np.where(baseline_flag > 0)[0]
175 for fi in range(np.size(freq_unflag_i)):
176 data_test = np.abs(vis_arr[pol_i][freq_unflag_i[fi], bi_use])
177 data_test = data_test.T
178 unflag_i = np.where(data_test > 0)
179 if np.size(unflag_i) > 0:
180 vis_weight_arr[pol_i][
181 freq_unflag_i[fi], bi_use[unflag_i]
182 ] = 1
183 freq_use = np.ones(obs["n_freq"], dtype=np.int64)
185 # Time based flagging
186 if np.min(obs["baseline_info"]["time_use"]) <= 0: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true
187 bin_offset = obs["baseline_info"]["bin_offset"]
188 bin_offset = np.hstack([bin_offset, np.size(obs["baseline_info"]["tile_a"])])
189 time_bin = np.zeros(np.size(obs["baseline_info"]["tile_a"]))
190 for ti in range(obs["n_time"]):
191 if obs["baseline_info"]["time_use"][ti] <= 0:
192 vis_weight_arr[:, :, bin_offset[ti] : bin_offset[ti + 1] - 1] = 0
194 tile_use_indexes = np.where((tile_use) & (obs["baseline_info"]["tile_use"]))[0]
195 tile_use_zeros = np.zeros_like(obs["baseline_info"]["tile_use"])
196 tile_use_zeros[tile_use_indexes] = 1
197 obs["baseline_info"]["tile_use"] = tile_use_zeros
198 freq_use_indexes = np.where((freq_use) & (obs["baseline_info"]["freq_use"]))[0]
199 freq_use_zeros = np.zeros_like(obs["baseline_info"]["freq_use"])
200 freq_use_zeros[freq_use_indexes] = 1
201 obs["baseline_info"]["freq_use"] = freq_use_zeros
203 obs["n_time_flag"] = np.count_nonzero(obs["baseline_info"]["time_use"] == 0)
204 obs["n_tile_flag"] = np.count_nonzero(obs["baseline_info"]["tile_use"] == 0)
205 obs["n_freq_flag"] = np.count_nonzero(obs["baseline_info"]["freq_use"] == 0)
207 if np.max(vis_weight_arr) == 0: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true
208 raise ValueError("All data has been flagged, check your flagging parameters")
210 return vis_weight_arr, obs
213def vis_flag(
214 vis_arr: NDArray[np.complex128],
215 vis_weights: NDArray[np.float64],
216 obs: dict,
217 params: dict,
218 logger: Logger,
219) -> tuple[np.ndarray, dict]:
220 """
221 Flag frequencies, times, and tiles which are 3 sigma away from the median
222 of the visibility amplitudes, along with baselines which are outside the
223 supplied minimum and maximum baseline distances.
225 Parameters
226 ----------
227 vis_arr : NDArray[np.complex128]
228 The visibility array
229 vis_weights : NDArray[np.float64]
230 The visibility weights array
231 obs : dict
232 The observation dictionary
233 params : dict
234 The dictionary containing uu, vv, ww
235 logger : Logger
236 PyFHD's Logger
238 Returns
239 -------
240 vis_weights : np.ndarray
241 Flagged vis_weights
242 obs : dict
243 Flagged tiles and frequencies observation metadata dictionary
244 """
245 flag_nsigma = 3
246 data_abs = np.abs(vis_arr[0])
247 if obs["n_pol"] > 1: 247 ↛ 249line 247 didn't jump to line 249 because the condition on line 247 was always true
248 data_abs = np.sqrt(data_abs**2 + np.abs(vis_arr[1]) ** 2)
249 n_tiles_use = max(
250 np.max(obs["baseline_info"]["tile_a"]), np.max(obs["baseline_info"]["tile_b"])
251 )
252 uv_dist = np.sqrt(params["uu"] ** 2 + params["vv"] ** 2) * idl_median(
253 obs["baseline_info"]["freq"]
254 )
255 cut_baselines_i = np.where(
256 (uv_dist < obs["min_baseline"]) | (uv_dist > obs["max_baseline"])
257 )[0]
258 if cut_baselines_i.size > 0: 258 ↛ 261line 258 didn't jump to line 261 because the condition on line 258 was always true
259 vis_weights[: obs["n_pol"], :, cut_baselines_i] = 0
261 tile_fom = np.zeros(n_tiles_use, dtype=np.float64)
262 for tile_i in range(n_tiles_use):
263 tile_ai = np.where((obs["baseline_info"]["tile_a"] - 1) == tile_i)
264 tile_bi = np.where((obs["baseline_info"]["tile_b"] - 1) == tile_i)
265 if tile_ai[0].size == 0 and tile_bi[0].size == 0: 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true
266 continue
267 if tile_bi[0].size > 0: 267 ↛ 273line 267 didn't jump to line 273 because the condition on line 267 was always true
268 if tile_ai[0].size > 0: 268 ↛ 271line 268 didn't jump to line 271 because the condition on line 268 was always true
269 tile_abi = np.hstack([tile_ai[0], tile_bi[0]])
270 else:
271 tile_abi = tile_bi
272 else:
273 tile_abi = tile_ai
274 data_subset = data_abs[:, tile_abi]
275 for pol_i in range(min(obs["n_pol"], 2)):
276 # Dunno why this needs two separate indexes but it works so leave it
277 i_use = np.where((vis_weights[pol_i][:, tile_abi] > 0) & (data_subset > 0))
278 if i_use[0].size > 10:
279 tile_fom[tile_i] += np.std(data_subset[i_use])
281 freq_fom = np.zeros(obs["n_freq"])
282 for freq_i in range(obs["n_freq"]):
283 data_subset = data_abs[freq_i, :]
284 for pol_i in range(min(obs["n_pol"], 2)):
285 i_use = np.where((vis_weights[pol_i, freq_i, :] > 0) & (data_subset > 0))
286 if i_use[0].size > 10:
287 freq_fom[freq_i] += np.std(data_subset[i_use])
289 freq_nonzero = np.nonzero(freq_fom)[0]
290 tile_nonzero = np.nonzero(tile_fom)
291 tile_mean = idl_median(tile_fom[tile_nonzero])
292 tile_dev = np.std(tile_fom[tile_nonzero])
293 # IDL Median with WIDTH set is doing a median_filter with the size indicating the kernel size
294 freq_mean1 = idl_median(freq_fom[freq_nonzero], width=obs["n_freq"] // 20)
295 freq_mean = np.zeros(obs["n_freq"])
296 freq_mean[freq_nonzero] = freq_mean1
297 freq_dev = np.std(freq_fom[freq_nonzero] - freq_mean[freq_nonzero])
299 # We actually want the complements of the where from IDL translation, adjusted conditions accordingly
300 tile_cut0 = np.where(
301 (np.abs(tile_mean - tile_fom) <= 2 * flag_nsigma * tile_dev) | (tile_fom != 0)
302 )[0]
303 freq_cut0 = np.where(
304 (np.abs(freq_mean - freq_fom) <= 2 * flag_nsigma * freq_dev) | (freq_fom != 0)
305 )[0]
306 tile_mean2 = idl_median(tile_fom[tile_cut0])
307 tile_dev2 = np.std(tile_fom[tile_cut0])
308 freq_mean2 = idl_median(freq_fom, width=obs["n_freq"] // 20)
309 freq_dev2 = np.std((freq_fom - freq_mean2)[freq_cut0])
310 # Currently assuming tile_cut and freq_cut are 1D
311 tile_cut = np.where(
312 (np.abs(tile_mean2 - tile_fom) > (flag_nsigma * tile_dev2)) | (tile_fom == 0)
313 )[0]
314 freq_cut = np.where(
315 (np.abs(freq_mean2 - freq_fom) > (flag_nsigma * freq_dev2)) | (freq_fom == 0)
316 )[0]
318 if tile_cut.size > 0: 318 ↛ 328line 318 didn't jump to line 328 because the condition on line 318 was always true
319 logger.info(f"Tiles Cut: {obs['baseline_info']['tile_names'][tile_cut]}")
320 for bad_i in range(tile_cut.size):
321 cut_a_i = np.where(obs["baseline_info"]["tile_a"] == tile_cut[bad_i] + 1)
322 cut_b_i = np.where(obs["baseline_info"]["tile_b"] == tile_cut[bad_i] + 1)
323 if cut_a_i[0].size > 0: 323 ↛ 325line 323 didn't jump to line 325 because the condition on line 323 was always true
324 vis_weights[0 : obs["n_pol"], :, cut_a_i] = 0
325 if cut_b_i[0].size > 0: 325 ↛ 320line 325 didn't jump to line 320 because the condition on line 325 was always true
326 vis_weights[0 : obs["n_pol"], :, cut_b_i] = 0
327 obs["baseline_info"]["tile_use"][tile_cut] = 0
328 if freq_cut.size > 0: 328 ↛ 332line 328 didn't jump to line 332 because the condition on line 328 was always true
329 vis_weights[0 : obs["n_pol"], freq_cut] = 0
330 obs["baseline_info"]["freq_use"][freq_cut] = 0
332 bin_offset = np.append(
333 obs["baseline_info"]["bin_offset"], obs["baseline_info"]["tile_a"].size
334 )
335 time_bin = np.zeros(obs["baseline_info"]["tile_a"].size)
336 for ti in range(len(obs["baseline_info"]["time_use"])):
337 time_bin[bin_offset[ti] : bin_offset[ti + 1]] = ti
338 time_fom = np.zeros(obs["baseline_info"]["time_use"].size)
339 for ti in range(len(obs["baseline_info"]["time_use"])):
340 data_subset = data_abs[:, bin_offset[ti] : bin_offset[ti + 1]]
341 for pol_i in range(min(obs["n_pol"], 2)):
342 i_use = np.where(
343 vis_weights[pol_i, :, bin_offset[ti] : bin_offset[ti + 1]] > 0
344 )
345 if i_use[0].size > 10: 345 ↛ 341line 345 didn't jump to line 341 because the condition on line 345 was always true
346 time_fom[ti] += np.std(data_subset[i_use])
347 time_nonzero = np.nonzero(time_fom)
348 time_mean = idl_median(time_fom[time_nonzero])
349 time_dev = np.std(time_fom[time_nonzero])
350 time_cut0 = np.where(
351 (np.abs(time_mean - time_fom) <= 2 * flag_nsigma * time_dev) | (time_fom != 0)
352 )
353 time_mean2 = idl_median(time_fom[time_cut0])
354 time_dev2 = np.std(time_fom[time_cut0])
355 time_cut = np.where(
356 (np.abs(time_mean2 - time_fom) > 2 * flag_nsigma * time_dev2) | (time_fom == 0)
357 )[0]
358 for ti in range(time_cut.size): 358 ↛ 359line 358 didn't jump to line 359 because the loop on line 358 never started
359 ti_cut = np.where(time_bin == time_cut[ti])
360 if ti_cut.size > 0:
361 vis_weights[: obs["n_pol"], :, ti_cut] = 0
363 obs["n_vis"] = (np.where(vis_weights[0] > 0)[0]).size
365 return vis_weights, obs