-
Notifications
You must be signed in to change notification settings - Fork 981
Accelerate data page mask computation on device #20280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 23 commits
df8100d
bc6f08b
30dbbee
73f64f2
de65183
2519f5f
f6b79c7
aef9d64
ebf460a
c62f6b7
f9e8f85
9ea83f5
38a640b
0071dc7
06dfca4
8ac9c3a
e5e12ec
3d056ed
60c789f
55948f0
a2221a5
fc6c534
488b1de
33489df
ffacb93
c820f50
2753a46
f9e6848
b4f9d5b
566a1ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -455,7 +455,7 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns( | |
| (mask_data_pages == use_data_page_mask::YES) | ||
| ? _extended_metadata->compute_data_page_mask( | ||
| row_mask, row_group_indices, _input_columns, _rows_processed_so_far, stream) | ||
| : std::vector<std::vector<bool>>{}; | ||
| : cudf::detail::make_empty_host_vector<bool>(0, stream); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this data eventually get copied to the GPU? if not, we should just use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No this gets converted to the |
||
|
|
||
| prepare_data( | ||
| read_mode::READ_ALL, row_group_indices, std::move(column_chunk_buffers), data_page_mask); | ||
|
|
@@ -485,7 +485,7 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns( | |
| (mask_data_pages == use_data_page_mask::YES) | ||
| ? _extended_metadata->compute_data_page_mask( | ||
| row_mask, row_group_indices, _input_columns, _rows_processed_so_far, stream) | ||
| : std::vector<std::vector<bool>>{}; | ||
| : cudf::detail::make_empty_host_vector<bool>(0, stream); | ||
|
|
||
| prepare_data( | ||
| read_mode::READ_ALL, row_group_indices, std::move(column_chunk_buffers), data_page_mask); | ||
|
|
@@ -524,7 +524,7 @@ void hybrid_scan_reader_impl::setup_chunking_for_filter_columns( | |
| (mask_data_pages == use_data_page_mask::YES) | ||
| ? _extended_metadata->compute_data_page_mask( | ||
| row_mask, row_group_indices, _input_columns, _rows_processed_so_far, _stream) | ||
| : std::vector<std::vector<bool>>{}; | ||
| : cudf::detail::make_empty_host_vector<bool>(0, _stream); | ||
|
|
||
| prepare_data( | ||
| read_mode::CHUNKED_READ, row_group_indices, std::move(column_chunk_buffers), data_page_mask); | ||
|
|
@@ -575,7 +575,7 @@ void hybrid_scan_reader_impl::setup_chunking_for_payload_columns( | |
| (mask_data_pages == use_data_page_mask::YES) | ||
| ? _extended_metadata->compute_data_page_mask( | ||
| row_mask, row_group_indices, _input_columns, _rows_processed_so_far, _stream) | ||
| : std::vector<std::vector<bool>>{}; | ||
| : cudf::detail::make_empty_host_vector<bool>(0, _stream); | ||
|
|
||
| prepare_data( | ||
| read_mode::CHUNKED_READ, row_group_indices, std::move(column_chunk_buffers), data_page_mask); | ||
|
|
@@ -656,7 +656,7 @@ void hybrid_scan_reader_impl::prepare_data( | |
| read_mode mode, | ||
| cudf::host_span<std::vector<size_type> const> row_group_indices, | ||
| std::vector<rmm::device_buffer>&& column_chunk_buffers, | ||
| cudf::host_span<std::vector<bool> const> data_page_mask) | ||
| cudf::host_span<bool const> data_page_mask) | ||
| { | ||
| // if we have not preprocessed at the whole-file level, do that now | ||
| if (not _file_preprocessed) { | ||
|
|
@@ -885,8 +885,7 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output( | |
| } | ||
| } | ||
|
|
||
| void hybrid_scan_reader_impl::set_pass_page_mask( | ||
| cudf::host_span<std::vector<bool> const> data_page_mask) | ||
| void hybrid_scan_reader_impl::set_pass_page_mask(cudf::host_span<bool const> data_page_mask) | ||
| { | ||
| auto const& pass = _pass_itm_data; | ||
| auto const& chunks = pass->chunks; | ||
|
|
@@ -900,13 +899,11 @@ void hybrid_scan_reader_impl::set_pass_page_mask( | |
| return; | ||
| } | ||
|
|
||
| size_t num_inserted_data_pages = 0; | ||
| std::for_each( | ||
| thrust::counting_iterator<size_t>(0), | ||
| thrust::counting_iterator(_input_columns.size()), | ||
| [&](auto col_idx) { | ||
| auto const& col_page_mask = data_page_mask[col_idx]; | ||
| size_t num_inserted_data_pages = 0; | ||
|
|
||
| for (size_t chunk_idx = col_idx; chunk_idx < chunks.size(); chunk_idx += num_columns) { | ||
| // Insert a true value for each dictionary page | ||
| if (chunks[chunk_idx].num_dict_pages > 0) { _pass_page_mask.push_back(true); } | ||
|
|
@@ -916,21 +913,17 @@ void hybrid_scan_reader_impl::set_pass_page_mask( | |
|
|
||
| // Make sure we have enough page mask for this column chunk | ||
| CUDF_EXPECTS( | ||
| col_page_mask.size() >= num_inserted_data_pages + num_data_pages_this_col_chunk, | ||
| data_page_mask.size() >= num_inserted_data_pages + num_data_pages_this_col_chunk, | ||
| "Encountered invalid data page mask size"); | ||
|
|
||
| // Insert page mask for this column chunk | ||
| _pass_page_mask.insert( | ||
| _pass_page_mask.end(), | ||
| col_page_mask.begin() + num_inserted_data_pages, | ||
| col_page_mask.begin() + num_inserted_data_pages + num_data_pages_this_col_chunk); | ||
|
|
||
| data_page_mask.begin() + num_inserted_data_pages, | ||
| data_page_mask.begin() + num_inserted_data_pages + num_data_pages_this_col_chunk); | ||
| // Update the number of inserted data pages | ||
| num_inserted_data_pages += num_data_pages_this_col_chunk; | ||
| } | ||
| // Make sure we inserted exactly the number of data pages for this column | ||
| CUDF_EXPECTS(num_inserted_data_pages == col_page_mask.size(), | ||
| "Encountered mismatch in number of data pages and page mask size"); | ||
| }); | ||
|
|
||
| // Make sure we inserted exactly the number of pages for this pass | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to see an exception if the tables aren't equal to alert the user