@@ -860,21 +860,20 @@ def _requires_validation_for_reading_parquet(
860860 # First, we check whether the source provides the dataframely schema. If it
861861 # does, we check whether it matches this schema. If it does, we assume that the
862862 # data adheres to the schema and we do not need to run validation.
863- metadata = (
864- pl . read_parquet_metadata (source ). get ( SCHEMA_METADATA_KEY )
863+ serialized_schema = (
864+ read_parquet_metadata_schema (source )
865865 if not isinstance (source , list )
866866 else None
867867 )
868- if metadata is not None :
869- serialized_schema = deserialize_schema (metadata )
868+ if serialized_schema is not None :
870869 if cls .matches (serialized_schema ):
871870 return False
872871
873872 # Otherwise, we definitely need to run validation. However, we emit different
874873 # information to the user depending on the value of `validate`.
875874 msg = (
876875 "current schema does not match stored schema"
877- if metadata is not None
876+ if serialized_schema is not None
878877 else "no schema to check validity can be read from the source"
879878 )
880879 if validation == "forbid" :
@@ -956,6 +955,24 @@ def _rules_match(lhs: dict[str, Rule], rhs: dict[str, Rule]) -> bool:
956955 )
957956
958957
958+ def read_parquet_metadata_schema (
959+ source : str | Path | IO [bytes ] | bytes ,
960+ ) -> type [Schema ] | None :
961+ """Read a dataframely schema from the metadata of a parquet file.
962+
963+ Args:
964+ source: Path to a parquet file or a file-like object that contains the metadata.
965+
966+ Returns:
967+ The schema that was serialized to the metadata or ``None`` if no schema metadata
968+ is found.
969+ """
970+ metadata = pl .read_parquet_metadata (source )
971+ if (schema_metadata := metadata .get (SCHEMA_METADATA_KEY )) is not None :
972+ return deserialize_schema (schema_metadata )
973+ return None
974+
975+
959976def deserialize_schema (data : str ) -> type [Schema ]:
960977 """Deserialize a schema from a JSON string.
961978
0 commit comments