From ad2b2bd48fdc352d9e90704ee324c41724ed6f8b Mon Sep 17 00:00:00 2001 From: Alex Merose Date: Sun, 23 Jun 2024 14:59:43 +0200 Subject: [PATCH] Basic reads work without `meta`. --- dask_ee/read.py | 8 ++++--- dask_ee/read_integrationtest.py | 39 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 dask_ee/read_integrationtest.py diff --git a/dask_ee/read.py b/dask_ee/read.py index dbc8fa3..d0ff809 100644 --- a/dask_ee/read.py +++ b/dask_ee/read.py @@ -20,6 +20,7 @@ 'int32': np.int32, 'int64': np.int64, 'int8': np.int8, + 'json': dict, 'long': np.int64, 'short': np.int16, 'uint16': np.uint16, @@ -39,7 +40,8 @@ def read_ee( raise NotImplementedError('Auto io_chunks are not implemented yet!') fc_size, all_info = ee.List([fc.size(), fc]).getInfo() - columns = all_info['columns'] + columns = {'geo': 'json'} + columns.update(all_info['columns']) # TODO(#5): Compare `toList()` to other range operations, like getting all index IDs via `getInfo()`. pages = [ @@ -55,12 +57,12 @@ def to_df(page: ee.FeatureCollection) -> pd.DataFrame: } ) - meta = {k: _BUILTIN_DTYPES[v.lower()] for k, v in columns.items()} + # TODO(alxmrs): Support dask dataframe `meta` via columns. + # meta = {k: _BUILTIN_DTYPES[v.lower()] for k, v in columns.items()} divisions = tuple(range(0, fc_size, io_chunks)) return dd.from_map( to_df, pages, - meta=meta, divisions=divisions, ) diff --git a/dask_ee/read_integrationtest.py b/dask_ee/read_integrationtest.py new file mode 100644 index 0000000..d2c87cb --- /dev/null +++ b/dask_ee/read_integrationtest.py @@ -0,0 +1,39 @@ +"""Integration tests with Google Earth Engine. + +Before running, please authenticate: +``` +earthengine authenticate +``` + +""" +import unittest + +import dask.dataframe as dd +import ee + +import dask_ee + + +class ReadIntegrationTests(unittest.TestCase): + + @classmethod + def setUpClass(cls): + ee.Initialize() + + def test_reads_dask_dataframe(self): + fc = ee.FeatureCollection("WRI/GPPD/power_plants") + ddf = dask_ee.read_ee(fc) + head = ddf.head() + columns = ddf.columns + + print(columns) + + self.assertIsNotNone(ddf) + self.assertIsNotNone(head) + self.assertIsInstance(ddf, dd.DataFrame) + + print(head) + + +if __name__ == '__main__': + unittest.main()