Skip to content

Commit b3fedb3

Browse files
i,j to lat lon (#746)
* potential solution to irregular grid coordinates issue in xr_parser coordinate name check that excludes exact matches to i,j. Assumes that latitude and longitude are also included in the coordinates attribute of the file * add check for irregular grids to pp write_ds to append additional vertex variables to output array * update comment for ij coordinate check in xr_parser * add check for bounds and vertices in dims of variables to add to files for irregular grids during the pp file write * remove unused import
1 parent 1ad6c12 commit b3fedb3

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

src/preprocessor.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -845,9 +845,6 @@ def normalize_group_time_vals(self, time_vals: np.ndarray) -> np.ndarray:
845845
def drop_attributes(self, xr_ds: xr.Dataset) -> xr.Dataset:
846846
""" Drop attributes that cause conflicts with xarray dataset merge"""
847847
drop_atts = ['average_T2',
848-
'time_bnds',
849-
'lat_bnds',
850-
'lon_bnds',
851848
'average_DT',
852849
'average_T1',
853850
'height',
@@ -1477,7 +1474,7 @@ def log_history_attr(self, var, ds):
14771474
ds.attrs['history'] = hist
14781475
return ds
14791476

1480-
def write_dataset(self, var, ds):
1477+
def write_dataset(self, var: varlist_util.VarlistEntry, ds: xr.Dataset):
14811478
"""Writes processed Dataset *ds* to location specified by the
14821479
``dest_path`` attribute of *var*, using xarray `to_netcdf()
14831480
<https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html>`__.
@@ -1486,11 +1483,27 @@ def write_dataset(self, var, ds):
14861483
os.makedirs(os.path.dirname(var.dest_path), exist_ok=True)
14871484
var_ds = ds[var.translation.name].to_dataset()
14881485
var_ds = var_ds.rename_vars(name_dict={var.translation.name: var.name})
1489-
var.log.info("Writing '%s'.", var.dest_path, tags=util.ObjectLogTag.OUT_FILE)
14901486
if var.is_static:
14911487
unlimited_dims = []
14921488
else:
14931489
unlimited_dims = [var.T.name]
1490+
# append other grid types here as needed
1491+
irregular_grids = {'tripolar'}
1492+
if ds.attrs.get('grid', None) is not None:
1493+
# search for irregular grid types
1494+
for g in irregular_grids:
1495+
grid_search = re.compile(g, re.IGNORECASE)
1496+
grid_regex_result = grid_search.search(ds.attrs.get('grid'))
1497+
if grid_regex_result is not None:
1498+
# add variables not included in xarray dataset if dims correspond to vertices and bounds
1499+
append_vars =\
1500+
(list(set([v for v in ds.variables
1501+
if 'vertices' in ds[v].dims
1502+
or 'bnds' in ds[v].dims]).difference([v for v in var_ds.variables])))
1503+
for v in append_vars:
1504+
v_dataset = ds[v].to_dataset()
1505+
var_ds = xr.merge([var_ds, v_dataset])
1506+
14941507

14951508
# The following block is retained for time comparison with dask delayed write procedure
14961509
# var_ds.to_netcdf(
@@ -1503,6 +1516,7 @@ def write_dataset(self, var, ds):
15031516

15041517
# Uncomment the timing lines and log calls if desired
15051518
# start_time = time.monotonic()
1519+
var.log.info("Writing '%s'.", var.dest_path, tags=util.ObjectLogTag.OUT_FILE)
15061520
delayed_write = var_ds.to_netcdf(
15071521
path=var.dest_path,
15081522
mode='w',
@@ -1543,6 +1557,7 @@ def write_ds(self, case_list: dict,
15431557
raise util.chain_exc(exc, f"writing data for {var.full_name}.",
15441558
util.DataPreprocessEvent)
15451559

1560+
15461561
# del ds # shouldn't be necessary
15471562

15481563
def parse_ds(self,
@@ -1648,7 +1663,6 @@ def write_pp_catalog(self,
16481663
elif not var.is_static:
16491664
d.update({'frequency': var.T.frequency.unit})
16501665
cat_entries.append(d)
1651-
16521666
# create a Pandas dataframe from the catalog entries
16531667

16541668
cat_df = pd.DataFrame(cat_entries)

src/xr_parser.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1133,14 +1133,20 @@ def reconcile_dimension_coords(self, our_var, ds):
11331133
if our_axes_set == ds_axes_set:
11341134
# check dimension coordinate names, std_names, units, bounds
11351135
for coord in our_var.dim_axes.values():
1136-
ds_coord_name = ds_axes[coord.axis]
1137-
self.reconcile_names(coord, ds, ds_coord_name, overwrite_ours=True)
1138-
if coord.axis == 'T':
1136+
# check for irregular grid coordinates and skip them if found
1137+
coord_search = re.compile('[ij]')
1138+
coord_regex_result = coord_search.fullmatch(ds_axes[coord.axis])
1139+
if coord_regex_result is None:
1140+
ds_coord_name = ds_axes[coord.axis]
1141+
self.reconcile_names(coord, ds, ds_coord_name, overwrite_ours=True)
1142+
if coord.axis == 'T':
11391143
# special case for time coordinate
1140-
self.reconcile_time_units(coord, ds[ds_coord_name])
1144+
self.reconcile_time_units(coord, ds[ds_coord_name])
1145+
else:
1146+
self.reconcile_units(coord, ds[ds_coord_name])
1147+
self.reconcile_coord_bounds(coord, ds, ds_coord_name)
11411148
else:
1142-
self.reconcile_units(coord, ds[ds_coord_name])
1143-
self.reconcile_coord_bounds(coord, ds, ds_coord_name)
1149+
continue
11441150
else:
11451151
_log.warning(f"Variable {our_var.name} has unexpected dimensionality: "
11461152
f" expected axes {list(our_axes_set)}, got {list(ds_axes_set)}.")

0 commit comments

Comments
 (0)