diff --git a/python/pyspark/sql/worker/plan_data_source_read.py b/python/pyspark/sql/worker/plan_data_source_read.py index d736df6084c61..c006eeb9ae313 100644 --- a/python/pyspark/sql/worker/plan_data_source_read.py +++ b/python/pyspark/sql/worker/plan_data_source_read.py @@ -240,7 +240,12 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec if not is_streaming: # The partitioning of python batch source read is determined before query execution. - partitions = reader.partitions() # type: ignore[call-arg] + try: + partitions = reader.partitions() # type: ignore[call-arg] + except NotImplementedError: + # Backward compatibility for data sources that raise NotImplementedError for partitions + # Our old base class did this so an old client may still be using it. + partitions = [InputPartition(None)] if not isinstance(partitions, list): raise PySparkRuntimeError( errorClass="DATA_SOURCE_TYPE_MISMATCH",