-
Notifications
You must be signed in to change notification settings - Fork 595
Description
Most cuml.explainer
kernels are parameterized by model dtype (float32, float64) leading to 2 instances of each kernel. Some are also parameterized by the prediction type (float32, float64) leading to 4 instances of each kernel (2 of each of these are never used by cuml
since we cast preds to match the model type).
Here are the main kernels in cuml.explainer
today:
kernels.csv
kernel,bytes
"void ML::Explainer::update_perm_shap_values_kernel<double, int>(double*, double const*, int, int const*)",1132
"void ML::Explainer::update_perm_shap_values_kernel<float, int>(float*, float const*, int, int const*)",1132
"void ML::Explainer::exact_rows_kernel<double, int>(float*, int, int, double*, int, double*, double*)",2488
"void ML::Explainer::exact_rows_kernel<float, int>(float*, int, int, float*, int, float*, float*)",2488
"void ML::Explainer::_fused_tile_scatter_pe<double, int>(double*, double const*, int, int, double const*, int*, int, int, bool)",2913
"void ML::Explainer::_fused_tile_scatter_pe<float, int>(float*, float const*, int, int, float const*, int*, int, int, bool)",2913
"void ML::Explainer::sampled_rows_kernel<float, int>(int*, float*, int, int, float*, int, float*, float*, unsigned long)",13160
"void ML::Explainer::sampled_rows_kernel<double, int>(int*, float*, int, int, double*, int, double*, double*, unsigned long)",13288
"void gpu_treeshap::detail::ShapKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 1024ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",39620
"void gpu_treeshap::detail::ShapKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 1024ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",39620
"void gpu_treeshap::detail::ShapKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 1024ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",41668
"void gpu_treeshap::detail::ShapKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 1024ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",41668
"void gpu_treeshap::detail::ShapInterventionalKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<double>, (anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",54824
"void gpu_treeshap::detail::ShapInterventionalKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<float>, (anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",54824
"void gpu_treeshap::detail::ShapInterventionalKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<double>, (anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",57000
"void gpu_treeshap::detail::ShapTaylorInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",57056
"void gpu_treeshap::detail::ShapTaylorInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",57056
"void gpu_treeshap::detail::ShapInterventionalKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<float>, (anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",57128
"void gpu_treeshap::detail::ShapTaylorInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",59104
"void gpu_treeshap::detail::ShapTaylorInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",59104
"void gpu_treeshap::detail::ShapInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",60032
"void gpu_treeshap::detail::ShapInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<float> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<float> > const*, unsigned long const*, unsigned long, double*)",60032
"void gpu_treeshap::detail::ShapInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<double>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<double>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",62592
"void gpu_treeshap::detail::ShapInteractionsKernel<(anonymous namespace)::DenseDatasetWrapper<float>, 256ul, 100ul, (anonymous namespace)::SplitCondition<double> >((anonymous namespace)::DenseDatasetWrapper<float>, unsigned long, gpu_treeshap::PathElement<(anonymous namespace)::SplitCondition<double> > const*, unsigned long const*, unsigned long, double*)",62720
By my calculations if we instead moved to only compiling for float32 (or float64) we'd shave off 0.64 MiB (for one SM). Maybe worth it, considering that cuml.explainer
isn't as performance sensitive.
Metadata
Metadata
Assignees
Labels
No labels