Skip to content

Commit

Permalink
Added more unit and integration tests. (#12)
Browse files Browse the repository at this point in the history
I caught one bug due to the tests, which I count as a success. In
addition, this change makes sure that no test code is bundled with the
distribution. Last, I've added an easy hook (via an extra install) to
set up a development environment.
  • Loading branch information
alxmrs authored Jun 24, 2024
1 parent 589a07e commit 6c1cad7
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 18 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
global-exclude *test.py
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Feel free to do any analysis you wish. For example:
.plot()
)
```
![Coal vs Wind in the US since 1940](demo.png)
![Coal vs Wind in the US since 1940](https://raw.githubusercontent.com/alxmrs/dask-ee/main/demo.png)

There are a few other useful things you can do.

Expand Down Expand Up @@ -81,6 +81,11 @@ df.head()
Contributions are welcome. A good way to start is to check out open [issues](https://github.com/alxmrs/dask-ee/issues)
or file a new one. We're happy to review pull requests, too.

Before writing code, please install the development dependencies (after cloning the repo):
```shell
pip install -e ".[dev]"
```

## License
```
Copyright 2024 Alexander S Merose
Expand Down
14 changes: 7 additions & 7 deletions dask_ee/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
}


# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
# bytes in each row.
def read_ee(
fc: t.Union[ee.FeatureCollection, str],
chunksize: t.Union[int, t.Literal['auto']] = 5_000,
Expand All @@ -41,25 +39,27 @@ def read_ee(
Returns:
A dask DataFrame with paged Google Earth Engine data.
"""
# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
# bytes in each row.
if chunksize == 'auto':
raise NotImplementedError('Auto chunksize is not implemented yet!')

if isinstance(fc, str):
fc = ee.FeatureCollection(fc)

if chunksize == 'auto':
raise NotImplementedError('Auto chunksize is not implemented yet!')

# Make all the getInfo() calls at once, up front.
fc_size, all_info = ee.List([fc.size(), fc.limit(0)]).getInfo()

columns = {'geo': 'Json'}
columns.update(all_info['columns'])
del columns['system:index']
if 'system:index' in columns:
del columns['system:index']

divisions = tuple(range(0, fc_size, chunksize))

# TODO(#5): Compare `toList()` to other range operations, like getting all index IDs via `getInfo()`.
pages = [ee.FeatureCollection(fc.toList(chunksize, i)) for i in divisions]
# Get the remainder, if it exists. `io_chunks` are not likely to evenly partition the data.
# Get the remainder, if it exists. `chunksize` is not likely to evenly partition the data.
d, r = divmod(fc_size, chunksize)
if r != 0:
pages.append(ee.FeatureCollection(fc.toList(r, d)))
Expand Down
58 changes: 48 additions & 10 deletions dask_ee/read_integrationtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,66 @@ def setUpClass(cls):
ee.Initialize()

def test_reads_dask_dataframe(self):
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
ddf = dask_ee.read_ee(fc)
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
df = dask_ee.read_ee(fc)

head = ddf.head()
columns = ddf.columns
head = df.head()
columns = df.columns

self.assertIsNotNone(ddf)
self.assertIsNotNone(df)
self.assertIsNotNone(head)
self.assertIsInstance(ddf, dd.DataFrame)
self.assertEqual(ddf.compute().shape, (28_664, 23))
self.assertIsInstance(df, dd.DataFrame)
self.assertEqual(df.compute().shape, (28_664, 23))

print(columns)
print(head)

def test_works_with_defined_features(self):
# Make a list of Features.
features = [
ee.Feature(
ee.Geometry.Rectangle(30.01, 59.80, 30.59, 60.15),
{'name': 'Voronoi'},
),
ee.Feature(ee.Geometry.Point(-73.96, 40.781), {'name': 'Thiessen'}),
ee.Feature(ee.Geometry.Point(6.4806, 50.8012), {'name': 'Dirichlet'}),
]

fc = ee.FeatureCollection(features)

df = dask_ee.read_ee(fc)

self.assertEqual(list(df.columns), ['geo', 'name'])

def test_works_with_a_single_feature_in_fc(self):
from_geom = ee.FeatureCollection(ee.Geometry.Point(16.37, 48.225))

df = dask_ee.read_ee(from_geom)

self.assertEqual(list(df.columns), ['geo'])
self.assertEqual(df.compute().shape, (1, 1))

def test_can_create_random_points(self):
# Define an arbitrary region in which to compute random points.
region = ee.Geometry.Rectangle(-119.224, 34.669, -99.536, 50.064)

# Create 1000 random points in the region.
random_points = ee.FeatureCollection.randomPoints(region)

# Note: these random points have no system:index!
df = dask_ee.read_ee(random_points)

self.assertEqual(list(df.columns), ['geo'])
self.assertEqual(df.compute().shape, (1000, 1))

def test_prof__read_ee(self):
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
with cProfile.Profile() as pr:
_ = dask_ee.read_ee(fc)

# Modified version of `pr.print_stats()`.
pstats.Stats(pr).sort_stats("cumtime").print_stats()
pstats.Stats(pr).sort_stats('cumtime').print_stats()


if __name__ == "__main__":
if __name__ == '__main__':
unittest.main()
6 changes: 6 additions & 0 deletions dask_ee/read_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ def test_can_import_read_op(self):
except ModuleNotFoundError:
self.fail('Cannot import `read_ee` function.')

def test_rejects_auto_chunks(self):
import dask_ee

with self.assertRaises(NotImplementedError):
dask_ee.read_ee('WRI/GPPD/power_plants', 'auto')


if __name__ == '__main__':
unittest.main()
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ tests = [
"pytest",
"pyink",
]
dev = [
"dask-ee[tests]",
"build",
]

[project.urls]
Homepage = "https://github.com/alxmrs/dask-ee"
Expand Down

0 comments on commit 6c1cad7

Please sign in to comment.