Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
* Copyright (c) 2022-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -366,12 +366,17 @@ object ParquetSchemaUtils {
caseSensitive: Boolean,
useFieldId: Boolean): DataType = {
val elementType = sparkType.elementType
// Unannotated repeated group should be interpreted as required list of required element, so
// list element type is just the group itself.
// TODO: When we drop Spark 3.1.x, this should use Parquet's LogicalTypeAnnotation
// Note that the original type is not null for leaf nodes.
//if (parquetList.getLogicalTypeAnnotation == null &&
val newSparkType = if (parquetList.isRepetition(Repetition.REPEATED)) {
// A REPEATED field that is neither LIST- nor MAP-annotated is the legacy 1-level list:
// the element type is the field itself (which may be primitive — e.g. `repeated binary x
// (UTF8)` for array<string> — or a group). A REPEATED group that IS LIST-annotated (Thrift
// / Avro 1.7 nested-list style) must go through the LIST-wrapper branch below, otherwise
// the wrapper gets passed to clipSparkType as if it were the primitive element and
// asPrimitiveType() throws ClassCastException (issues #11589, #11592). Predicate matches
// the Parquet spec's "backward-compatibility rules": treat REPEATED as legacy unless
// explicitly annotated LIST or MAP.
val newSparkType = if (parquetList.isRepetition(Repetition.REPEATED) &&
parquetList.getOriginalType != OriginalType.LIST &&
parquetList.getOriginalType != OriginalType.MAP) {
clipSparkType(elementType, parquetList, caseSensitive, useFieldId)
} else {
val parquetListGroup = parquetList.asGroupType()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ class RapidsTestSettings extends BackendTestSettings {
enableSuite[RapidsMathFunctionsSuite]
enableSuite[RapidsMiscFunctionsSuite]
enableSuite[RapidsParquetAvroCompatibilitySuite]
.exclude("SPARK-10136 array of primitive array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11592"))
enableSuite[RapidsParquetColumnIndexSuite]
enableSuite[RapidsParquetCompressionCodecPrecedenceSuite]
.exclude("Create parquet table with compression", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11416"))
Expand Down Expand Up @@ -178,7 +177,6 @@ class RapidsTestSettings extends BackendTestSettings {
.exclude("schema mismatch failure error message for parquet reader", WONT_FIX_ISSUE("GPU uses a unified parquet reader path; the non-vectorized CPU error variant rooted in ParquetDecodingException is not reachable by design. See https://github.com/NVIDIA/spark-rapids/issues/11434"))
enableSuite[RapidsParquetThriftCompatibilitySuite]
.exclude("Read Parquet file generated by parquet-thrift", ADJUST_UT("https://github.com/NVIDIA/spark-rapids/pull/11591"))
.exclude("SPARK-10136 list of primitive list", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11589"))
enableSuite[RapidsParquetVectorizedSuite]
enableSuite[RapidsRandomSuite]
.exclude("random", ADJUST_UT("Replaced by testRapids version that considers partitionIndex offset"))
Expand Down