Skip to content

Commit 5e3f95a

Browse files
author
Matic Lubej
committed
save columns and dtypes in parquet and vector stats
1 parent 54238c0 commit 5e3f95a

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

eogrow/utils/testing.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def _get_coords_sample(geom: Polygon | MultiPolygon | Any) -> list[tuple[float,
203203
return [_rounder(point) for point in geom.exterior.coords[:10]] if isinstance(geom, Polygon) else None
204204

205205
stats = {
206-
"columns": list(gdf),
206+
"columns_and_dtypes": list(gdf.dtypes.astype(str).sort_index().items()),
207207
"row_count": len(gdf),
208208
"crs": str(gdf.crs),
209209
"mean_area": _prepare_value(gdf.area.mean(), np.float64),
@@ -227,7 +227,10 @@ def _get_coords_sample(geom: Polygon | MultiPolygon | Any) -> list[tuple[float,
227227

228228

229229
def _calculate_parquet_stats(data: pd.DataFrame, config: StatCalcConfig) -> JsonDict:
230-
stats = {"columns": list(data), "row_count": len(data)}
230+
stats = {
231+
"columns_and_dtypes": list(data.dtypes.astype(str).sort_index().items()),
232+
"row_count": len(data),
233+
}
231234

232235
if len(data):
233236
subsample: pd.DataFrame = data.sample(min(len(data), config.num_random_values), random_state=42)

0 commit comments

Comments
 (0)