diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index 7ef60618700..6e0b1ce5b96 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -509,9 +509,9 @@ Result> ParquetFileFormat::GetReader std::shared_ptr reader_metadata = reader->metadata(); auto arrow_properties = MakeArrowReaderProperties(*this, *reader_metadata, *options, *parquet_scan_options); - std::unique_ptr arrow_reader; - RETURN_NOT_OK(parquet::arrow::FileReader::Make( - options->pool, std::move(reader), std::move(arrow_properties), &arrow_reader)); + ARROW_ASSIGN_OR_RAISE(auto arrow_reader, + parquet::arrow::FileReader::Make(options->pool, std::move(reader), + std::move(arrow_properties))); // R build with openSUSE155 requires an explicit shared_ptr construction return std::shared_ptr(std::move(arrow_reader)); } @@ -532,37 +532,37 @@ Future> ParquetFileFormat::GetReader source.filesystem(), options->pool); auto self = checked_pointer_cast(shared_from_this()); - return source.OpenAsync().Then( - [self = self, properties = std::move(properties), source = source, - options = options, metadata = metadata, - parquet_scan_options = parquet_scan_options]( - const std::shared_ptr& input) mutable { - return parquet::ParquetFileReader::OpenAsync(input, properties, metadata) - .Then( - [=](const std::unique_ptr& reader) mutable - -> Result> { - auto arrow_properties = MakeArrowReaderProperties( - *self, *reader->metadata(), *options, *parquet_scan_options); - - std::unique_ptr arrow_reader; - RETURN_NOT_OK(parquet::arrow::FileReader::Make( + return source.OpenAsync().Then([self = self, properties = std::move(properties), + source = source, options = options, metadata = metadata, + parquet_scan_options = parquet_scan_options]( + const std::shared_ptr& + input) mutable { + return parquet::ParquetFileReader::OpenAsync(input, properties, metadata) + .Then( + [=](const std::unique_ptr& reader) mutable + -> Result> { + auto arrow_properties = MakeArrowReaderProperties( + *self, *reader->metadata(), *options, *parquet_scan_options); + + ARROW_ASSIGN_OR_RAISE( + auto arrow_reader, + parquet::arrow::FileReader::Make( options->pool, // TODO(ARROW-12259): workaround since we have Future<(move-only // type)> It *wouldn't* be safe to const_cast reader except that // here we know there are no other waiters on the reader. std::move(const_cast&>( reader)), - arrow_properties, &arrow_reader)); - - // R build with openSUSE155 requires an explicit shared_ptr construction - return std::shared_ptr( - std::move(arrow_reader)); - }, - [path = source.path()](const Status& status) - -> Result> { - return WrapSourceError(status, path); - }); - }); + arrow_properties)); + + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(arrow_reader)); + }, + [path = source.path()](const Status& status) + -> Result> { + return WrapSourceError(status, path); + }); + }); } struct SlicingGenerator { diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 0b73ec6549a..0831fb62675 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -4182,12 +4182,13 @@ void TryReadDataFile(const std::string& path, const std::string& expected_message = "") { auto pool = ::arrow::default_memory_pool(); - std::unique_ptr arrow_reader; - Status s = - FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader); - if (s.ok()) { + Status s; + auto reader_result = FileReader::Make(pool, ParquetFileReader::OpenFile(path, false)); + if (reader_result.ok()) { std::shared_ptr<::arrow::Table> table; - s = arrow_reader->ReadTable(&table); + s = (*reader_result)->ReadTable(&table); + } else { + s = reader_result.status(); } ASSERT_EQ(s.code(), expected_code) @@ -4259,14 +4260,15 @@ TEST(TestArrowReaderAdHoc, LARGE_MEMORY_TEST(LargeStringColumn)) { array.reset(); auto reader = ParquetFileReader::Open(std::make_shared(tables_buffer)); - std::unique_ptr arrow_reader; - ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader)); + ASSERT_OK_AND_ASSIGN(auto arrow_reader, + FileReader::Make(default_memory_pool(), std::move(reader))); ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table)); ASSERT_OK(table->ValidateFull()); // ARROW-9297: ensure RecordBatchReader also works reader = ParquetFileReader::Open(std::make_shared(tables_buffer)); - ASSERT_OK(FileReader::Make(default_memory_pool(), std::move(reader), &arrow_reader)); + ASSERT_OK_AND_ASSIGN(arrow_reader, + FileReader::Make(default_memory_pool(), std::move(reader))); ASSERT_OK_AND_ASSIGN(auto batch_reader, arrow_reader->GetRecordBatchReader()); ASSERT_OK_AND_ASSIGN(auto batched_table, ::arrow::Table::FromRecordBatchReader(batch_reader.get())); @@ -4362,9 +4364,8 @@ TEST(TestArrowReaderAdHoc, LegacyTwoLevelList) { ASSERT_EQ(kExpectedLegacyList, nodeStr.str()); // Verify Arrow schema and data - std::unique_ptr reader; - ASSERT_OK_NO_THROW( - FileReader::Make(default_memory_pool(), std::move(file_reader), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, + FileReader::Make(default_memory_pool(), std::move(file_reader))); std::shared_ptr table; ASSERT_OK(reader->ReadTable(&table)); ASSERT_OK(table->ValidateFull()); @@ -4427,9 +4428,8 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) { auto pool = ::arrow::default_memory_pool(); - std::unique_ptr arrow_reader; - ASSERT_OK_NO_THROW( - FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &arrow_reader)); + ASSERT_OK_AND_ASSIGN(auto arrow_reader, + FileReader::Make(pool, ParquetFileReader::OpenFile(path, false))); std::shared_ptr<::arrow::Table> table; ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table)); @@ -4494,9 +4494,8 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) { path += "/" + tc.filename + ".parquet"; ARROW_SCOPED_TRACE("path = ", path); - std::unique_ptr reader; - ASSERT_OK_NO_THROW( - FileReader::Make(pool, ParquetFileReader::OpenFile(path, false), &reader)); + ASSERT_OK_AND_ASSIGN( + auto reader, FileReader::Make(pool, ParquetFileReader::OpenFile(path, false))); std::shared_ptr<::arrow::Table> table; ASSERT_OK_NO_THROW(reader->ReadTable(&table)); @@ -4539,9 +4538,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) { default_arrow_writer_properties(), &buffer)); auto reader = ParquetFileReader::Open(std::make_shared(buffer)); - std::unique_ptr file_reader; - ASSERT_OK( - FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(), + std::move(reader))); // This is the important part in this test. std::vector row_group_indices = {}; ASSERT_OK_AND_ASSIGN(auto record_batch_reader, @@ -4567,9 +4565,8 @@ TEST(TestArrowFileReader, RecordBatchReaderEmptyInput) { default_arrow_writer_properties(), &buffer)); auto reader = ParquetFileReader::Open(std::make_shared(buffer)); - std::unique_ptr file_reader; - ASSERT_OK( - FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(), + std::move(reader))); ASSERT_OK_AND_ASSIGN(auto record_batch_reader, file_reader->GetRecordBatchReader()); std::shared_ptr<::arrow::RecordBatch> record_batch; ASSERT_OK(record_batch_reader->ReadNext(&record_batch)); @@ -4591,9 +4588,8 @@ TEST(TestArrowColumnReader, NextBatchZeroBatchSize) { default_arrow_writer_properties(), &buffer)); auto reader = ParquetFileReader::Open(std::make_shared(buffer)); - std::unique_ptr file_reader; - ASSERT_OK( - FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(), + std::move(reader))); std::unique_ptr column_reader; ASSERT_OK(file_reader->GetColumn(0, &column_reader)); std::shared_ptr chunked_array; @@ -4617,9 +4613,8 @@ TEST(TestArrowColumnReader, NextBatchEmptyInput) { default_arrow_writer_properties(), &buffer)); auto reader = ParquetFileReader::Open(std::make_shared(buffer)); - std::unique_ptr file_reader; - ASSERT_OK( - FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + ASSERT_OK_AND_ASSIGN(auto file_reader, FileReader::Make(::arrow::default_memory_pool(), + std::move(reader))); std::unique_ptr column_reader; ASSERT_OK(file_reader->GetColumn(0, &column_reader)); std::shared_ptr chunked_array; @@ -5141,9 +5136,9 @@ class TestArrowReadDeltaEncoding : public ::testing::Test { std::shared_ptr
* out) { auto file = test::get_data_file(file_name); auto pool = ::arrow::default_memory_pool(); - std::unique_ptr parquet_reader; - ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), - &parquet_reader)); + ASSERT_OK_AND_ASSIGN( + auto parquet_reader, + FileReader::Make(pool, ParquetFileReader::OpenFile(file, false))); ASSERT_OK(parquet_reader->ReadTable(out)); ASSERT_OK((*out)->ValidateFull()); } @@ -5201,9 +5196,9 @@ TEST_F(TestArrowReadDeltaEncoding, IncrementalDecodeDeltaByteArray) { const int64_t batch_size = 100; ArrowReaderProperties properties = default_arrow_reader_properties(); properties.set_batch_size(batch_size); - std::unique_ptr parquet_reader; - ASSERT_OK(FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties, - &parquet_reader)); + ASSERT_OK_AND_ASSIGN( + auto parquet_reader, + FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), properties)); ASSERT_OK_AND_ASSIGN(auto rb_reader, parquet_reader->GetRecordBatchReader()); auto convert_options = ::arrow::csv::ConvertOptions::Defaults(); @@ -5729,8 +5724,8 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) { auto read_properties = default_arrow_reader_properties(); read_properties.set_batch_size(record_batch->num_rows()); auto reader = ParquetFileReader::Open(std::make_shared(buffer)); - std::unique_ptr arrow_reader; - ASSERT_OK(FileReader::Make(pool, std::move(reader), read_properties, &arrow_reader)); + ASSERT_OK_AND_ASSIGN(auto arrow_reader, + FileReader::Make(pool, std::move(reader), read_properties)); // Verify the single record batch has been sliced into two row groups by // WriterProperties::max_row_group_length(). diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc index 4b97d04fe97..27a76fd72be 100644 --- a/cpp/src/parquet/arrow/arrow_statistics_test.cc +++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc @@ -202,9 +202,9 @@ ::arrow::Result> StatisticsReadArray( auto reader = ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer)); - std::unique_ptr file_reader; - ARROW_RETURN_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader), - reader_properties, &file_reader)); + ARROW_ASSIGN_OR_RAISE(auto file_reader, + FileReader::Make(::arrow::default_memory_pool(), + std::move(reader), reader_properties)); std::shared_ptr<::arrow::ChunkedArray> chunked_array; ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array)); return chunked_array->chunk(0); diff --git a/cpp/src/parquet/arrow/fuzz_internal.cc b/cpp/src/parquet/arrow/fuzz_internal.cc index 95df3b2c475..b2e295b435d 100644 --- a/cpp/src/parquet/arrow/fuzz_internal.cc +++ b/cpp/src/parquet/arrow/fuzz_internal.cc @@ -281,8 +281,10 @@ Status FuzzReader(const uint8_t* data, int64_t size) { pq_file_reader = ParquetFileReader::Open(file, reader_properties, pq_md); END_PARQUET_CATCH_EXCEPTIONS - std::unique_ptr reader; - RETURN_NOT_OK(FileReader::Make(pool, std::move(pq_file_reader), properties, &reader)); + auto arrow_reader_result = + FileReader::Make(pool, std::move(pq_file_reader), properties); + RETURN_NOT_OK(arrow_reader_result.status()); + auto reader = std::move(*arrow_reader_result); st &= FuzzReadData(std::move(reader)); } return st; diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 34783e83368..6b700870772 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -1343,14 +1343,30 @@ Status FileReader::Make(::arrow::MemoryPool* pool, std::unique_ptr reader, const ArrowReaderProperties& properties, std::unique_ptr* out) { - *out = std::make_unique(pool, std::move(reader), properties); - return static_cast(out->get())->Init(); + ARROW_ASSIGN_OR_RAISE(*out, Make(pool, std::move(reader), properties)); + return Status::OK(); } Status FileReader::Make(::arrow::MemoryPool* pool, std::unique_ptr reader, std::unique_ptr* out) { - return Make(pool, std::move(reader), default_arrow_reader_properties(), out); + ARROW_ASSIGN_OR_RAISE(*out, + Make(pool, std::move(reader), default_arrow_reader_properties())); + return Status::OK(); +} + +Result> FileReader::Make( + ::arrow::MemoryPool* pool, std::unique_ptr parquet_reader, + const ArrowReaderProperties& properties) { + std::unique_ptr reader = + std::make_unique(pool, std::move(parquet_reader), properties); + RETURN_NOT_OK(static_cast(reader.get())->Init()); + return reader; +} + +Result> FileReader::Make( + ::arrow::MemoryPool* pool, std::unique_ptr parquet_reader) { + return Make(pool, std::move(parquet_reader), default_arrow_reader_properties()); } FileReaderBuilder::FileReaderBuilder() @@ -1385,13 +1401,13 @@ FileReaderBuilder* FileReaderBuilder::properties( } Status FileReaderBuilder::Build(std::unique_ptr* out) { - return FileReader::Make(pool_, std::move(raw_reader_), properties_, out); + ARROW_ASSIGN_OR_RAISE(*out, + FileReader::Make(pool_, std::move(raw_reader_), properties_)); + return Status::OK(); } Result> FileReaderBuilder::Build() { - std::unique_ptr out; - RETURN_NOT_OK(FileReader::Make(pool_, std::move(raw_reader_), properties_, &out)); - return out; + return FileReader::Make(pool_, std::move(raw_reader_), properties_); } Result> OpenFile( diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h index 1342d0bfa6f..54620b3d0f5 100644 --- a/cpp/src/parquet/arrow/reader.h +++ b/cpp/src/parquet/arrow/reader.h @@ -116,16 +116,29 @@ class RowGroupReader; class PARQUET_EXPORT FileReader { public: /// Factory function to create a FileReader from a ParquetFileReader and properties + /// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead. + ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.") static ::arrow::Status Make(::arrow::MemoryPool* pool, std::unique_ptr reader, const ArrowReaderProperties& properties, std::unique_ptr* out); /// Factory function to create a FileReader from a ParquetFileReader + /// \deprecated Deprecated in 23.0.0. Use arrow::Result version instead. + ARROW_DEPRECATED("Deprecated in 23.0.0. Use arrow::Result version instead.") static ::arrow::Status Make(::arrow::MemoryPool* pool, std::unique_ptr reader, std::unique_ptr* out); + /// Factory function to create a FileReader from a ParquetFileReader and properties + static ::arrow::Result> Make( + ::arrow::MemoryPool* pool, std::unique_ptr reader, + const ArrowReaderProperties& properties); + + /// Factory function to create a FileReader from a ParquetFileReader + static ::arrow::Result> Make( + ::arrow::MemoryPool* pool, std::unique_ptr reader); + // Since the distribution of columns amongst a Parquet file's row groups may // be uneven (the number of values in each column chunk can be different), we // provide a column-oriented read interface. The ColumnReader hides the diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc b/cpp/src/parquet/arrow/reader_writer_benchmark.cc index a80386676d2..cfb458ddb38 100644 --- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc +++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc @@ -296,9 +296,10 @@ static void BenchmarkReadTable(::benchmark::State& state, const Table& table, for (auto _ : state) { auto reader = ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer)); - std::unique_ptr arrow_reader; - EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader), - &arrow_reader)); + auto arrow_reader_result = + FileReader::Make(::arrow::default_memory_pool(), std::move(reader)); + EXIT_NOT_OK(arrow_reader_result.status()); + auto arrow_reader = std::move(*arrow_reader_result); std::shared_ptr
table; EXIT_NOT_OK(arrow_reader->ReadTable(&table)); @@ -735,9 +736,10 @@ static void BM_ReadIndividualRowGroups(::benchmark::State& state) { while (state.KeepRunning()) { auto reader = ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer)); - std::unique_ptr arrow_reader; - EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader), - &arrow_reader)); + auto arrow_reader_result = + FileReader::Make(::arrow::default_memory_pool(), std::move(reader)); + EXIT_NOT_OK(arrow_reader_result.status()); + auto arrow_reader = std::move(*arrow_reader_result); std::vector> tables; for (int i = 0; i < arrow_reader->num_row_groups(); i++) { @@ -770,9 +772,11 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) { while (state.KeepRunning()) { auto reader = ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer)); - std::unique_ptr arrow_reader; - EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader), - &arrow_reader)); + auto arrow_reader_result = + FileReader::Make(::arrow::default_memory_pool(), std::move(reader)); + EXIT_NOT_OK(arrow_reader_result.status()); + auto arrow_reader = std::move(*arrow_reader_result); + std::shared_ptr
table; EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table)); } @@ -794,10 +798,10 @@ static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) { while (state.KeepRunning()) { auto reader = ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer)); - std::unique_ptr unique_reader; - EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader), - &unique_reader)); - std::shared_ptr arrow_reader = std::move(unique_reader); + auto arrow_reader_result = + FileReader::Make(::arrow::default_memory_pool(), std::move(reader)); + EXIT_NOT_OK(arrow_reader_result.status()); + std::shared_ptr arrow_reader = std::move(*arrow_reader_result); ASSIGN_OR_ABORT(auto generator, arrow_reader->GetRecordBatchGenerator(arrow_reader, rgs, {0})); auto fut = ::arrow::CollectAsyncGenerator(generator);