alxmrs · alxmrs · Jun 24, 2024
diff --git a/README.md b/README.md
@@ -9,11 +9,13 @@ _Google Earth Engine Feature Collections via Dask DataFrames._
 ## How to use
 
 Install with pip:
+
 ```shell
 pip install dask-ee
 ```
 
 Then, authenticate Earth Engine:
+
 ```shell
 earthengine authenticate
 ```
@@ -26,36 +28,41 @@ import dask_ee
 ```
 
 You'll need to initialize Earth Engine before working with data:
+
 ```python
 ee.Initialize()
 ```
 
 From here, you can read Earth Engine FeatureCollections like they are DataFrames:
+
 ```python
 df = dask_ee.read_ee("WRI/GPPD/power_plants")
 df.head()
 ```
+
 These work like Pandas DataFrames, but they are lazily evaluated via [Dask](https://dask.org/).
 
 Feel free to do any analysis you wish. For example:
+
 ```python
 # Thanks @aazuspan, https://www.aazuspan.dev/blog/dask_featurecollection
 (
-    df[df.comm_year.gt(1940) & df.country.eq("USA") & df.fuel1.isin(["Coal", "Wind"])]
-    .astype({"comm_year": int})
-    .drop(columns=["geo"])
-    .groupby(["comm_year", "fuel1"])
-    .agg({"capacitymw": "sum"})
-    .reset_index()
-    .sort_values(by=["comm_year"])
-    .compute(scheduler="threads")
-    .pivot_table(index="comm_year", columns="fuel1", values="capacitymw", fill_value=0)
-    .plot()
+  df[df.comm_year.gt(1940) & df.country.eq("USA") & df.fuel1.isin(["Coal", "Wind"])]
+  .astype({"comm_year": int})
+  .drop(columns=["geometry"])
+  .groupby(["comm_year", "fuel1"])
+  .agg({"capacitymw": "sum"})
+  .reset_index()
+  .sort_values(by=["comm_year"])
+  .compute(scheduler="threads")
+  .pivot_table(index="comm_year", columns="fuel1", values="capacitymw", fill_value=0)
+  .plot()
 )
 ```
+
 ![Coal vs Wind in the US since 1940](https://raw.githubusercontent.com/alxmrs/dask-ee/main/demo.png)
 
-There are a few other useful things you can do. 
+There are a few other useful things you can do.
 
 For one, you may pass in a pre-processed `ee.FeatureCollection`. This allows full utilization
 of the Earth Engine API.
@@ -71,6 +78,7 @@ df = dask_ee.read_ee(fc)
 
 In addition, you may change the `chunksize`, which controls how many rows are included in each
 Dask partition.
+
 ```python
 df = dask_ee.read_ee("WRI/GPPD/power_plants", chunksize=7_000)
 df.head()
@@ -81,12 +89,33 @@ df.head()
 Contributions are welcome. A good way to start is to check out open [issues](https://github.com/alxmrs/dask-ee/issues)
 or file a new one. We're happy to review pull requests, too.
 
-Before writing code, please install the development dependencies (after cloning the repo): 
+Before writing code, please install the development dependencies (after cloning the repo):
+
 ```shell
 pip install -e ".[dev]"
 ```
 
+<details>
+<summary>Help! On install, I hit the error: <code>NotADirectoryError: [Errno 20] Not a directory: 'gdal-config'</code>.</summary>
+
+You may need to install `gdal` on your system to properly test `dask-geopandas`. On a Mac
+with [Homebrew](https://brew.sh), you can run:
+
+```shell
+brew install gdal
+```
+
+Or, if you're using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html), you can just
+directly install `dask-geopandas`:
+
+```shell
+conda install dask-geopandas -c conda-forge
+```
+
+</details>
+
 ## License
+
 ```
 Copyright 2024 Alexander S Merose
 

diff --git a/dask_ee/read.py b/dask_ee/read.py
@@ -75,9 +75,10 @@ def to_df(page: ee.FeatureCollection) -> pd.DataFrame:
 
   meta = {k: _BUILTIN_DTYPES[v] for k, v in columns.items()}
 
+  # We rename to geometry because that is the default column that geopandas looks for.
   return dd.from_map(
       to_df,
       pages,
       meta=meta,
       divisions=divisions,
-  )
+  ).rename(columns={'geo': 'geometry'})
diff --git a/dask_ee/read_integrationtest.py b/dask_ee/read_integrationtest.py
@@ -11,7 +11,9 @@
 import unittest
 
 import dask.dataframe as dd
+import dask_geopandas as dgp
 import ee
+import shapely
 
 import dask_ee
 
@@ -37,6 +39,21 @@ def test_reads_dask_dataframe(self):
     print(columns)
     print(head)
 
+  def test_read_different_dataframe(self):
+    fc = ee.FeatureCollection('TIGER/2016/Roads').limit(10_001)
+    df = dask_ee.read_ee(fc)
+
+    head = df.head()
+    columns = df.columns
+
+    self.assertIsNotNone(df)
+    self.assertIsNotNone(head)
+    self.assertIsInstance(df, dd.DataFrame)
+    self.assertEqual(df.compute().shape, (10_001, 5))
+
+    print(columns)
+    print(head)
+
   def test_works_with_defined_features(self):
     # Make a list of Features.
     features = [
@@ -52,14 +69,14 @@ def test_works_with_defined_features(self):
 
     df = dask_ee.read_ee(fc)
 
-    self.assertEqual(list(df.columns), ['geo', 'name'])
+    self.assertEqual(list(df.columns), ['geometry', 'name'])
 
   def test_works_with_a_single_feature_in_fc(self):
     from_geom = ee.FeatureCollection(ee.Geometry.Point(16.37, 48.225))
 
     df = dask_ee.read_ee(from_geom)
 
-    self.assertEqual(list(df.columns), ['geo'])
+    self.assertEqual(list(df.columns), ['geometry'])
     self.assertEqual(df.compute().shape, (1, 1))
 
   def test_can_create_random_points(self):
@@ -72,7 +89,7 @@ def test_can_create_random_points(self):
     # Note: these random points have no system:index!
     df = dask_ee.read_ee(random_points)
 
-    self.assertEqual(list(df.columns), ['geo'])
+    self.assertEqual(list(df.columns), ['geometry'])
     self.assertEqual(df.compute().shape, (1000, 1))
 
   def test_prof__read_ee(self):
@@ -83,6 +100,16 @@ def test_prof__read_ee(self):
       # Modified version of `pr.print_stats()`.
       pstats.Stats(pr).sort_stats('cumtime').print_stats()
 
+  def test_integrates_with_geopandas_dask(self):
+    fc = ee.FeatureCollection('WRI/GPPD/power_plants')
+    df = dask_ee.read_ee(fc)
+    gdf = dgp.from_dask_dataframe(df)
+
+    self.assertIsNotNone(df.compute().geometry)
+    self.assertIsInstance(df.geometry.dtype, shapely.GeometryType)
+
+    print(df.head())
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,7 @@ tests = [
   "absl-py",
   "pytest",
   "pyink",
+  "dask-geopandas",
 ]
 dev = [
   "dask-ee[tests]",