i,j to lat lon (#746)

wrongkindofdoctor · web-flow · commit b3fedb3adde2 · 2025-03-07T14:57:22.000-05:00
* potential solution to irregular grid coordinates issue in xr_parser coordinate name check that

excludes exact matches to i,j. Assumes that latitude and longitude are also included in the coordinates attribute of the file

* add check for irregular grids to pp write_ds to append additional vertex variables to output array

* update comment for ij coordinate check in xr_parser

* add check for bounds and vertices in dims of variables to add to files for irregular grids during the pp file write

* remove unused import
diff --git a/src/preprocessor.py b/src/preprocessor.py
@@ -845,9 +845,6 @@ def normalize_group_time_vals(self, time_vals: np.ndarray) -> np.ndarray:
     def drop_attributes(self, xr_ds: xr.Dataset) -> xr.Dataset:
         """ Drop attributes that cause conflicts with xarray dataset merge"""
         drop_atts = ['average_T2',
-                     'time_bnds',
-                     'lat_bnds',
-                     'lon_bnds',
                      'average_DT',
                      'average_T1',
                      'height',
@@ -1477,7 +1474,7 @@ def log_history_attr(self, var, ds):
         ds.attrs['history'] = hist
         return ds
 
-    def write_dataset(self, var, ds):
+    def write_dataset(self, var: varlist_util.VarlistEntry, ds: xr.Dataset):
         """Writes processed Dataset *ds* to location specified by the
         ``dest_path`` attribute of *var*, using xarray `to_netcdf()
         <https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html>`__.
@@ -1486,11 +1483,27 @@ def write_dataset(self, var, ds):
         os.makedirs(os.path.dirname(var.dest_path), exist_ok=True)
         var_ds = ds[var.translation.name].to_dataset()
         var_ds = var_ds.rename_vars(name_dict={var.translation.name: var.name})
-        var.log.info("Writing '%s'.", var.dest_path, tags=util.ObjectLogTag.OUT_FILE)
         if var.is_static:
             unlimited_dims = []
         else:
             unlimited_dims = [var.T.name]
+        # append other grid types here as needed
+        irregular_grids = {'tripolar'}
+        if ds.attrs.get('grid', None) is not None:
+            # search for irregular grid types
+            for g in irregular_grids:
+                grid_search = re.compile(g, re.IGNORECASE)
+                grid_regex_result = grid_search.search(ds.attrs.get('grid'))
+                if grid_regex_result is not None:
+                    # add variables not included in xarray dataset if dims correspond to vertices and bounds
+                    append_vars =\
+                        (list(set([v for v in ds.variables
+                                   if 'vertices' in ds[v].dims
+                                   or 'bnds' in ds[v].dims]).difference([v for v in var_ds.variables])))
+                    for v in append_vars:
+                        v_dataset = ds[v].to_dataset()
+                        var_ds = xr.merge([var_ds, v_dataset])
+
 
         # The following block is retained for time comparison with dask delayed write procedure
         # var_ds.to_netcdf(
@@ -1503,6 +1516,7 @@ def write_dataset(self, var, ds):
 
         # Uncomment the timing lines and log calls if desired
         # start_time = time.monotonic()
+        var.log.info("Writing '%s'.", var.dest_path, tags=util.ObjectLogTag.OUT_FILE)
         delayed_write = var_ds.to_netcdf(
             path=var.dest_path,
             mode='w',
@@ -1543,6 +1557,7 @@ def write_ds(self, case_list: dict,
                     raise util.chain_exc(exc, f"writing data for {var.full_name}.",
                                          util.DataPreprocessEvent)
 
+
             # del ds  # shouldn't be necessary
 
     def parse_ds(self,
@@ -1648,7 +1663,6 @@ def write_pp_catalog(self,
                 elif not var.is_static:
                     d.update({'frequency': var.T.frequency.unit})
                 cat_entries.append(d)
-
         # create a Pandas dataframe from the catalog entries
 
         cat_df = pd.DataFrame(cat_entries)
diff --git a/src/xr_parser.py b/src/xr_parser.py
@@ -1133,14 +1133,20 @@ def reconcile_dimension_coords(self, our_var, ds):
         if our_axes_set == ds_axes_set:
             # check dimension coordinate names, std_names, units, bounds
             for coord in our_var.dim_axes.values():
-                ds_coord_name = ds_axes[coord.axis]
-                self.reconcile_names(coord, ds, ds_coord_name, overwrite_ours=True)
-                if coord.axis == 'T':
+                # check for irregular grid coordinates and skip them if found
+                coord_search = re.compile('[ij]')
+                coord_regex_result = coord_search.fullmatch(ds_axes[coord.axis])
+                if coord_regex_result is None:
+                    ds_coord_name = ds_axes[coord.axis]
+                    self.reconcile_names(coord, ds, ds_coord_name, overwrite_ours=True)
+                    if coord.axis == 'T':
                     # special case for time coordinate
-                    self.reconcile_time_units(coord, ds[ds_coord_name])
+                        self.reconcile_time_units(coord, ds[ds_coord_name])
+                    else:
+                        self.reconcile_units(coord, ds[ds_coord_name])
+                    self.reconcile_coord_bounds(coord, ds, ds_coord_name)
                 else:
-                    self.reconcile_units(coord, ds[ds_coord_name])
-                self.reconcile_coord_bounds(coord, ds, ds_coord_name)
+                    continue
         else:
             _log.warning(f"Variable {our_var.name} has unexpected dimensionality: "
                          f" expected axes {list(our_axes_set)}, got {list(ds_axes_set)}.")