Open
Description
Description
>>> import datachain as dc
>>>
>>> ds = dc.read_parquet("example.parquet").limit(1000)
>>> ds.to_parquet("example-1000.parquet")
>>>
>>> ds2 = dc.read_parquet("example-1000.parquet")
>>> ds2.show()
Parsed by pyarrow: 0rows [00:00, ?rError while validating/converting type for column id with value file:///Users/dmitry/src/money-lion, original error Value 'file:///Users/dmitry/src/money-lion' with type <class 'str'> incompatible for column type Int64
NoneType: None
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/dmitry/src/datachain/src/datachain/lib/dc/datachain.py", line 1546, in show
df = dc.to_pandas(flatten, include_hidden=include_hidden)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/lib/dc/datachain.py", line 1523, in to_pandas
results = self.results(include_hidden=include_hidden)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/lib/dc/datachain.py", line 1032, in results
return list(self.collect_flatten(include_hidden=include_hidden))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/lib/dc/datachain.py", line 983, in collect_flatten
with self._query.ordered_select(*db_signals).as_iterable() as rows:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/Cellar/[email protected]/3.12.9/Frameworks/Python.framework/Versions/3.12/lib/python3.12/contextlib.py", line 137, in __enter__
return next(self.gen)
^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 1304, in as_iterable
query = self.apply_steps().select()
^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 1250, in apply_steps
result = step.apply(
^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 613, in apply
self.populate_udf_table(udf_table, query)
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 531, in populate_udf_table
process_udf_outputs(
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 351, in process_udf_outputs
rows.append(adjust_outputs(warehouse, row, udf_col_types))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/query/dataset.py", line 306, in adjust_outputs
row[col_name] = warehouse.convert_type(
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/datachain/src/datachain/data_storage/warehouse.py", line 152, in convert_type
raise ve
ValueError: Value 'file:///Users/dmitry/src/money-lion' with type <class 'str'> incompatible for column type Int64
Version Info
0.14.6.dev5+g30b2d2a0
Python 3.12.9