Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2f22a2f
GH-44810: [C++][Parquet] Add arrow::Result version of parquet::arrow:…
hiroyuki-sato Nov 27, 2025
a1cf83f
Update cpp/src/parquet/arrow/arrow_reader_writer_test.cc
hiroyuki-sato Nov 28, 2025
29ab004
Update cpp/src/parquet/arrow/arrow_reader_writer_test.cc
hiroyuki-sato Nov 28, 2025
3c0bdd3
Update cpp/src/parquet/arrow/arrow_reader_writer_test.cc
hiroyuki-sato Nov 28, 2025
1088943
Update cpp/src/parquet/arrow/reader.cc
hiroyuki-sato Nov 28, 2025
65b43dc
Update cpp/src/parquet/arrow/reader.cc
hiroyuki-sato Nov 28, 2025
fc54de6
Update cpp/src/parquet/arrow/reader.h
hiroyuki-sato Nov 28, 2025
25c72e2
Status version FileReader::Make set to deprecated
hiroyuki-sato Nov 29, 2025
c6cf742
Replace to ASSERT_OK_AND_ASSIGN
hiroyuki-sato Nov 30, 2025
e75d5b8
Added error checking in case an error occurs
hiroyuki-sato Nov 30, 2025
df413a8
Added error checking in case an error occurs
hiroyuki-sato Nov 30, 2025
f9ab393
Add missing ()
hiroyuki-sato Nov 30, 2025
d669ccf
Fix variable name
hiroyuki-sato Nov 30, 2025
315687a
Use Result version FileReader::Make
hiroyuki-sato Nov 30, 2025
d0d4fb0
Replace Result verion FileReader::Make
hiroyuki-sato Dec 2, 2025
b3e1887
Fix segfault
hiroyuki-sato Dec 2, 2025
25816c0
Fix lint
hiroyuki-sato Dec 2, 2025
8b941ab
Replace Result verion FileReader::Make
hiroyuki-sato Dec 8, 2025
d53adad
Use auto
hiroyuki-sato Dec 9, 2025
6ad6292
Fix lint
hiroyuki-sato Dec 9, 2025
4045dc8
Add auto
hiroyuki-sato Dec 9, 2025
7cf225f
Use Result version FileReader::Make
hiroyuki-sato Dec 9, 2025
9c7e26e
Use auto
hiroyuki-sato Dec 9, 2025
98d950c
Reuse existing implementation
kou Dec 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions cpp/src/arrow/dataset/file_parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ Result<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
std::shared_ptr<parquet::FileMetaData> reader_metadata = reader->metadata();
auto arrow_properties =
MakeArrowReaderProperties(*this, *reader_metadata, *options, *parquet_scan_options);
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
RETURN_NOT_OK(parquet::arrow::FileReader::Make(
options->pool, std::move(reader), std::move(arrow_properties), &arrow_reader));
ARROW_ASSIGN_OR_RAISE(auto arrow_reader,
parquet::arrow::FileReader::Make(options->pool, std::move(reader),
std::move(arrow_properties)));
// R build with openSUSE155 requires an explicit shared_ptr construction
return std::shared_ptr<parquet::arrow::FileReader>(std::move(arrow_reader));
}
Expand All @@ -532,37 +532,37 @@ Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
source.filesystem(), options->pool);
auto self = checked_pointer_cast<const ParquetFileFormat>(shared_from_this());

return source.OpenAsync().Then(
[self = self, properties = std::move(properties), source = source,
options = options, metadata = metadata,
parquet_scan_options = parquet_scan_options](
const std::shared_ptr<io::RandomAccessFile>& input) mutable {
return parquet::ParquetFileReader::OpenAsync(input, properties, metadata)
.Then(
[=](const std::unique_ptr<parquet::ParquetFileReader>& reader) mutable
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
auto arrow_properties = MakeArrowReaderProperties(
*self, *reader->metadata(), *options, *parquet_scan_options);

std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
RETURN_NOT_OK(parquet::arrow::FileReader::Make(
return source.OpenAsync().Then([self = self, properties = std::move(properties),
source = source, options = options, metadata = metadata,
parquet_scan_options = parquet_scan_options](
const std::shared_ptr<io::RandomAccessFile>&
input) mutable {
return parquet::ParquetFileReader::OpenAsync(input, properties, metadata)
.Then(
[=](const std::unique_ptr<parquet::ParquetFileReader>& reader) mutable
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
auto arrow_properties = MakeArrowReaderProperties(
*self, *reader->metadata(), *options, *parquet_scan_options);

ARROW_ASSIGN_OR_RAISE(
auto arrow_reader,
parquet::arrow::FileReader::Make(
options->pool,
// TODO(ARROW-12259): workaround since we have Future<(move-only
// type)> It *wouldn't* be safe to const_cast reader except that
// here we know there are no other waiters on the reader.
std::move(const_cast<std::unique_ptr<parquet::ParquetFileReader>&>(
reader)),
arrow_properties, &arrow_reader));

// R build with openSUSE155 requires an explicit shared_ptr construction
return std::shared_ptr<parquet::arrow::FileReader>(
std::move(arrow_reader));
},
[path = source.path()](const Status& status)
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
return WrapSourceError(status, path);
});
});
arrow_properties));

// R build with openSUSE155 requires an explicit shared_ptr construction
return std::shared_ptr<parquet::arrow::FileReader>(std::move(arrow_reader));
},
[path = source.path()](const Status& status)
-> Result<std::shared_ptr<parquet::arrow::FileReader>> {
return WrapSourceError(status, path);
});
});
}

struct SlicingGenerator {
Expand Down
69 changes: 32 additions & 37 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4182,12 +4182,13 @@ void TryReadDataFile(const std::string& path,
const std::string& expected_message = "") {
auto pool = ::arrow::default_memory_pool();

std::unique_ptr<FileReader> arrow_reader;
Status s =
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader);
if (s.ok()) {
Status s;
auto reader_result = FileReader::Make(pool, ParquetFileReader::OpenFile(path, false));
if (reader_result.ok()) {
std::shared_ptr<::arrow::Table> table;
s = arrow_reader->ReadTable(&table);
s = (*reader_result)->ReadTable(&table);
} else {
s = reader_result.status();
}

ASSERT_EQ(s.code(), expected_code)
Expand Down Expand Up @@ -4259,14 +4260,15 @@ TEST(TestArrowReaderAdHoc, LARGE_MEMORY_TEST(LargeStringColumn)) {
array.reset();

auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(tables_buffer));
std::unique_ptr<FileReader> arrow_reader;
ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader));
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
FileReader::Make(default_memory_pool(), std::move(reader)));
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
ASSERT_OK(table->ValidateFull());

// ARROW-9297: ensure RecordBatchReader also works
reader = ParquetFileReader::Open(std::make_shared<BufferReader>(tables_buffer));
ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader));
ASSERT_OK_AND_ASSIGN(arrow_reader,
FileReader::Make(default_memory_pool(), std::move(reader)));
ASSERT_OK_AND_ASSIGN(auto batch_reader, arrow_reader->GetRecordBatchReader());
ASSERT_OK_AND_ASSIGN(auto batched_table,
::arrow::Table::FromRecordBatchReader(batch_reader.get()));
Expand Down Expand Up @@ -4362,9 +4364,8 @@ TEST(TestArrowReaderAdHoc, LegacyTwoLevelList) {
ASSERT_EQ(kExpectedLegacyList, nodeStr.str());

// Verify Arrow schema and data
std::unique_ptr<FileReader> reader;
ASSERT_OK_NO_THROW(
FileReader::Make(default_memory_pool(), std::move(file_reader), &reader));
ASSERT_OK_AND_ASSIGN(auto reader,
FileReader::Make(default_memory_pool(), std::move(file_reader)));
std::shared_ptr<Table> table;
ASSERT_OK(reader->ReadTable(&table));
ASSERT_OK(table->ValidateFull());
Expand Down Expand Up @@ -4427,9 +4428,8 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {

auto pool = ::arrow::default_memory_pool();

std::unique_ptr<FileReader> arrow_reader;
ASSERT_OK_NO_THROW(
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader));
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false)));
std::shared_ptr<::arrow::Table> table;
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));

Expand Down Expand Up @@ -4494,9 +4494,8 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) {
path += "/" + tc.filename + ".parquet";
ARROW_SCOPED_TRACE("path = ", path);

std::unique_ptr<FileReader> reader;
ASSERT_OK_NO_THROW(
FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &reader));
ASSERT_OK_AND_ASSIGN(
auto reader, FileReader::Make(pool, ParquetFileReader::OpenFile(path, false)));
std::shared_ptr<::arrow::Table> table;
ASSERT_OK_NO_THROW(reader->ReadTable(&table));

Expand Down Expand Up @@ -4539,9 +4538,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) {
default_arrow_writer_properties(), &buffer));

auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
std::unique_ptr<FileReader> file_reader;
ASSERT_OK(
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
std::move(reader)));
// This is the important part in this test.
std::vector<int> row_group_indices = {};
ASSERT_OK_AND_ASSIGN(auto record_batch_reader,
Expand All @@ -4567,9 +4565,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyInput) {
default_arrow_writer_properties(), &buffer));

auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
std::unique_ptr<FileReader> file_reader;
ASSERT_OK(
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
std::move(reader)));
ASSERT_OK_AND_ASSIGN(auto record_batch_reader, file_reader->GetRecordBatchReader());
std::shared_ptr<::arrow::RecordBatch> record_batch;
ASSERT_OK(record_batch_reader->ReadNext(&record_batch));
Expand All @@ -4591,9 +4588,8 @@ TEST(TestArrowColumnReader, NextBatchZeroBatchSize) {
default_arrow_writer_properties(), &buffer));

auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
std::unique_ptr<FileReader> file_reader;
ASSERT_OK(
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
std::move(reader)));
std::unique_ptr<arrow::ColumnReader> column_reader;
ASSERT_OK(file_reader->GetColumn(0, &column_reader));
std::shared_ptr<ChunkedArray> chunked_array;
Expand All @@ -4617,9 +4613,8 @@ TEST(TestArrowColumnReader, NextBatchEmptyInput) {
default_arrow_writer_properties(), &buffer));

auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
std::unique_ptr<FileReader> file_reader;
ASSERT_OK(
FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(),
std::move(reader)));
std::unique_ptr<arrow::ColumnReader> column_reader;
ASSERT_OK(file_reader->GetColumn(0, &column_reader));
std::shared_ptr<ChunkedArray> chunked_array;
Expand Down Expand Up @@ -5141,9 +5136,9 @@ class TestArrowReadDeltaEncoding : public ::testing::Test {
std::shared_ptr<Table>* out) {
auto file = test::get_data_file(file_name);
auto pool = ::arrow::default_memory_pool();
std::unique_ptr<FileReader> parquet_reader;
ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false),
&parquet_reader));
ASSERT_OK_AND_ASSIGN(
auto parquet_reader,
FileReader::Make(pool, ParquetFileReader::OpenFile(file, false)));
ASSERT_OK(parquet_reader->ReadTable(out));
ASSERT_OK((*out)->ValidateFull());
}
Expand Down Expand Up @@ -5201,9 +5196,9 @@ TEST_F(TestArrowReadDeltaEncoding, IncrementalDecodeDeltaByteArray) {
const int64_t batch_size = 100;
ArrowReaderProperties properties = default_arrow_reader_properties();
properties.set_batch_size(batch_size);
std::unique_ptr<FileReader> parquet_reader;
ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties,
&parquet_reader));
ASSERT_OK_AND_ASSIGN(
auto parquet_reader,
FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties));
ASSERT_OK_AND_ASSIGN(auto rb_reader, parquet_reader->GetRecordBatchReader());

auto convert_options = ::arrow::csv::ConvertOptions::Defaults();
Expand Down Expand Up @@ -5729,8 +5724,8 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) {
auto read_properties = default_arrow_reader_properties();
read_properties.set_batch_size(record_batch->num_rows());
auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
std::unique_ptr<FileReader> arrow_reader;
ASSERT_OK(FileReader::Make(pool, std::move(reader), read_properties, &arrow_reader));
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
FileReader::Make(pool, std::move(reader), read_properties));

// Verify the single record batch has been sliced into two row groups by
// WriterProperties::max_row_group_length().
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/parquet/arrow/arrow_statistics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ ::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(

auto reader =
ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
std::unique_ptr<FileReader> file_reader;
ARROW_RETURN_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
reader_properties, &file_reader));
ARROW_ASSIGN_OR_RAISE(auto file_reader,
FileReader::Make(::arrow::default_memory_pool(),
std::move(reader), reader_properties));
std::shared_ptr<::arrow::ChunkedArray> chunked_array;
ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
return chunked_array->chunk(0);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/parquet/arrow/fuzz_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,10 @@ Status FuzzReader(const uint8_t* data, int64_t size) {
pq_file_reader = ParquetFileReader::Open(file, reader_properties, pq_md);
END_PARQUET_CATCH_EXCEPTIONS

std::unique_ptr<FileReader> reader;
RETURN_NOT_OK(FileReader::Make(pool, std::move(pq_file_reader), properties, &reader));
auto arrow_reader_result =
FileReader::Make(pool, std::move(pq_file_reader), properties);
RETURN_NOT_OK(arrow_reader_result.status());
auto reader = std::move(*arrow_reader_result);
st &= FuzzReadData(std::move(reader));
}
return st;
Expand Down
30 changes: 23 additions & 7 deletions cpp/src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1343,14 +1343,30 @@ Status FileReader::Make(::arrow::MemoryPool* pool,
std::unique_ptr<ParquetFileReader> reader,
const ArrowReaderProperties& properties,
std::unique_ptr<FileReader>* out) {
*out = std::make_unique<FileReaderImpl>(pool, std::move(reader), properties);
return static_cast<FileReaderImpl*>(out->get())->Init();
ARROW_ASSIGN_OR_RAISE(*out, Make(pool, std::move(reader), properties));
return Status::OK();
}

Status FileReader::Make(::arrow::MemoryPool* pool,
std::unique_ptr<ParquetFileReader> reader,
std::unique_ptr<FileReader>* out) {
return Make(pool, std::move(reader), default_arrow_reader_properties(), out);
ARROW_ASSIGN_OR_RAISE(*out,
Make(pool, std::move(reader), default_arrow_reader_properties()));
return Status::OK();
}

Result<std::unique_ptr<FileReader>> FileReader::Make(
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> parquet_reader,
const ArrowReaderProperties& properties) {
std::unique_ptr<FileReader> reader =
std::make_unique<FileReaderImpl>(pool, std::move(parquet_reader), properties);
RETURN_NOT_OK(static_cast<FileReaderImpl*>(reader.get())->Init());
return reader;
}

Result<std::unique_ptr<FileReader>> FileReader::Make(
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> parquet_reader) {
return Make(pool, std::move(parquet_reader), default_arrow_reader_properties());
}

FileReaderBuilder::FileReaderBuilder()
Expand Down Expand Up @@ -1385,13 +1401,13 @@ FileReaderBuilder* FileReaderBuilder::properties(
}

Status FileReaderBuilder::Build(std::unique_ptr<FileReader>* out) {
return FileReader::Make(pool_, std::move(raw_reader_), properties_, out);
ARROW_ASSIGN_OR_RAISE(*out,
FileReader::Make(pool_, std::move(raw_reader_), properties_));
return Status::OK();
}

Result<std::unique_ptr<FileReader>> FileReaderBuilder::Build() {
std::unique_ptr<FileReader> out;
RETURN_NOT_OK(FileReader::Make(pool_, std::move(raw_reader_), properties_, &out));
return out;
return FileReader::Make(pool_, std::move(raw_reader_), properties_);
}

Result<std::unique_ptr<FileReader>> OpenFile(
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/parquet/arrow/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,29 @@ class RowGroupReader;
class PARQUET_EXPORT FileReader {
public:
/// Factory function to create a FileReader from a ParquetFileReader and properties
/// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead.
ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.")
static ::arrow::Status Make(::arrow::MemoryPool* pool,
std::unique_ptr<ParquetFileReader> reader,
const ArrowReaderProperties& properties,
std::unique_ptr<FileReader>* out);

/// Factory function to create a FileReader from a ParquetFileReader
/// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead.
ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.")
static ::arrow::Status Make(::arrow::MemoryPool* pool,
std::unique_ptr<ParquetFileReader> reader,
std::unique_ptr<FileReader>* out);

/// Factory function to create a FileReader from a ParquetFileReader and properties
static ::arrow::Result<std::unique_ptr<FileReader>> Make(
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader,
const ArrowReaderProperties& properties);

/// Factory function to create a FileReader from a ParquetFileReader
static ::arrow::Result<std::unique_ptr<FileReader>> Make(
::arrow::MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader);

// Since the distribution of columns amongst a Parquet file's row groups may
// be uneven (the number of values in each column chunk can be different), we
// provide a column-oriented read interface. The ColumnReader hides the
Expand Down
Loading
Loading