uxlfoundation · icfaust · Oct 15, 2025 · Jun 11, 2025 · Jun 11, 2025 · Jun 11, 2025
@@ -52,6 +52,7 @@ def parse_tests_tree(entry, prefix=""):
     "model_selection/tests": ["test_split.py", "test_validation.py"],
     "neighbors/tests": ["test_lof.py", "test_neighbors.py", "test_neighbors_pipeline.py"],
     "svm/tests": ["test_sparse.py", "test_svm.py"],
+    "tests": "test_dummy.py",
 }
 if sklearn_check_version("1.2"):
     tests_map["tests"] = ["test_public_functions.py"]

@@ -122,6 +122,7 @@ def __repr__(self) -> str:
     "_spmd_backend",
     "covariance",
     "decomposition",
+    "dummy",
     "ensemble",
     "neighbors",
     "primitives",

@@ -78,6 +78,7 @@ ONEDAL_PY_INIT_MODULE(logistic_regression);
 #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
 ONEDAL_PY_INIT_MODULE(finiteness_checker);
 #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
+ONEDAL_PY_INIT_MODULE(dummy);
 #endif // ONEDAL_DATA_PARALLEL_SPMD
 
 #ifdef ONEDAL_DATA_PARALLEL_SPMD
@@ -138,6 +139,7 @@ PYBIND11_MODULE(_onedal_py_host, m) {
 #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
     init_finiteness_checker(m);
 #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
+    init_dummy(m);
 }
 #endif // ONEDAL_DATA_PARALLEL_SPMD
 

@@ -0,0 +1,19 @@
+# ==============================================================================
+# Copyright Contributors to the oneDAL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from .dummy import DummyEstimator
+
+__all__ = ["DummyEstimator"]
@@ -0,0 +1,195 @@
+/*******************************************************************************
+* Copyright Contributors to the oneDAL Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "onedal/common.hpp"
+#include "onedal/version.hpp"
+// A fake oneDAL algorithm is include via the `dummy_onedal.hpp` header. In
+// normal circumstances a header for the oneDAL algorithm would be
+// included here from the oneDAL `oneapi/dal/algo/` folder.
+#include "onedal/dummy/dummy_onedal.hpp"
+#include "oneapi/dal/table/common.hpp"
+#include "oneapi/dal/table/homogen.hpp"
+
+namespace py = pybind11;
+
+// oneDAL-python interaction code is located in this namespace
+namespace oneapi::dal::python {
+
+// pybind11 structures and functions of the 'dummy' algorithm
+namespace dummy {
+
+template <typename Task, typename Ops>
+struct method2t {
+    method2t(const Task& task, const Ops& ops) : ops(ops) {}
+    // this functor converts the method param into a valid oneDAL task.
+    // Tasks are specific to each algorithm, therefore method2t is often
+    // defined for each algo.
+    template <typename Float>
+    auto operator()(const py::dict& params) {
+        using namespace dal::dummy;
+        const auto method = params["method"].cast<std::string>();
+
+        ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense);
+        ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default);
+        ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method);
+    }
+
+    Ops ops;
+};
+
+struct params2desc {
+    // This functor converts the params dictionary into a oneDAL descriptor
+    template <typename Float, typename Method, typename Task>
+    auto operator()(const py::dict& params) {
+        auto desc = dal::dummy::descriptor<Float, Method, Task>();
+
+        // conversion of the params dict to oneDAL params occurs here except
+        // for the ``method`` and ``fptype`` parameters.  They are assigned
+        // to the descriptor individually here before returning.
+        const auto constant = params["constant"].cast<double>();
+        desc.set_constant(constant);
+
+        return desc;
+    }
+};
+
+// the following functions define the python interface methods for the
+// oneDAL algorithms. They are templated for the policy (which may be host,
+// dpc, or spmd), and task, which is defined per algorithm.  They are all
+// defined using lambda functions (a common occurrence for pybind11), but
+// that is not a requirement.
+template <typename Policy, typename Task>
+void init_train_ops(py::module& m) {
+    m.def("train", [](const Policy& policy, const py::dict& params, const table& data) {
+        using namespace dal::dummy;
+        using input_t = train_input<Task>;
+        // while there is a train_ops defined for each oneDAL algorithm
+        // which supports ``train``, this is the train_ops defined in
+        // onedal/common/dispatch_utils.hpp
+        train_ops ops(policy, input_t{ data }, params2desc{});
+        // fptype2t is defined in common/dispatch_utils.hpp
+        // which operates in a similar manner to the method2t functor
+        // it selects the floating point datatype for the calculation
+        return fptype2t{ method2t{ Task{}, ops } }(params);
+    });
+};
+
+template <typename Policy, typename Task>
+void init_infer_ops(py::module_& m) {
+    m.def(
+        "infer",
+        [](const Policy& policy, const py::dict& params, const table& constant, const table& data) {
+            using namespace dal::dummy;
+            using input_t = infer_input<Task>;
+
+            infer_ops ops(policy, input_t{ data, constant }, params2desc{});
+            // with the use of functors the order of operations is as
+            // follows: Task is generated, the ops is already created above,
+            // method2t is constructed, and then fptype2t is constructed.
+            // It is then evaluated in opposite order sequentially on the
+            // params dict.
+            return fptype2t{ method2t{ Task{}, ops } }(params);
+        });
+}
+
+// This defines the result C++ objects for use in python via pybind11.
+// Result object attributes should be pybind11 native types (like int,
+// float, etc.) or oneDAL tables.
+
+template <typename Task>
+void init_train_result(py::module_& m) {
+    using namespace dal::dummy;
+    using result_t = train_result<Task>;
+
+    py::class_<result_t>(m, "train_result").def(py::init()).DEF_ONEDAL_PY_PROPERTY(data, result_t);
+}
+
+template <typename Task>
+void init_infer_result(py::module_& m) {
+    using namespace dal::dummy;
+    using result_t = infer_result<Task>;
+
+    py::class_<result_t>(m, "infer_result").def(py::init()).DEF_ONEDAL_PY_PROPERTY(data, result_t);
+}
+
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_train_result);
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_result);
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_train_ops);
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_ops);
+
+} // namespace dummy
+
+ONEDAL_PY_INIT_MODULE(dummy) {
+    using namespace dummy;
+    using namespace dal::detail;
+    using namespace dal::dummy;
+
+    // the task_list allows for multiple types of tasks (like regression
+    // and classification) template to be evaluated. The use of 'types'
+    // is not required, and has special implications for the
+    // 'bind_default_backend' function as it creates submodules in python
+    // based on the task name. See the covariance implementation
+    // where no task_list is used and a submodule of the algorithm is not
+    // made.
+    using task_list = types<task::generate>;
+    auto sub = m.def_submodule("dummy");
+
+    // explicitly define the templates based off of the policy and task
+    // lists. These instantiations lead to a cascade of fully-resolved
+    // templates from oneDAL.  It begins by fully resolving functors defined
+    // here and the oneDAL descriptor. It then fully specifies functors in
+    // common/dispatch_utils.hpp, which starts resolving oneDAL objects
+    // for the algorithm like the train_ops/infer_ops functors defined there.
+    // This leads to a fair number of compile time work with oneDAL headers.
+    // For example take init_train_ops in approximate reverse order
+    // (to show how it goes from here to oneDAL):
+    //
+    // 0. Creates pybind11 interface
+    // 1. Specifies lambda defined in init_train_ops
+    // 2. Specifies fptype2t
+    // 3. Specifies method2t
+    // 4. Specifies train_ops defined in common/dispatch_utils.hpp
+    // 5. Specifies train defined in oneapi/dal/train.hpp
+    // 6. Specifies train_dispatch in oneapi/dal/detail/train_ops.hpp
+    // 7. Specifies several functors in oneapi/dal/detail/ops_dispatcher.hpp
+    // 8. Specifies train_ops defined in algorithm's train_ops.hpp
+    // 9. Specifies oneDAL train_input, train_result and descriptor structs
+    /**** finally hits objects compiled in oneDAL for the computation ****/
+    // (train_ops_dispatcher for example)
+    //
+    // Its not clear how many layers of these indirections are compiled
+    // versus optimized away. The namings in dispatch_utils.hpp are also
+    // unfortunate and confusing.
+
+    // policy_list is defined elsewhere which is dependent on the backend
+    // which is being built. Placed within a macro-check in order to prevent
+    // use with an spmd policy.
+#ifndef ONEDAL_DATA_PARALLEL_SPMD
+    ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
+    ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);
+    ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list);
+    ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list);
+#else
+    // This is where the pybind11 init functions would be instantiated with
+    // a policy_spmd object. For example, if an init_train_ops existed for
+    // the spmd backend it would be instantiated like:
+    // ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list);
+#endif
+}
+
+ONEDAL_PY_TYPE2STR(dal::dummy::task::generate, "generate");
+
+} // namespace oneapi::dal::python
@@ -0,0 +1,137 @@
+# ==============================================================================
+# Copyright Contributors to the oneDAL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""This file describes necessary characteristics and design patterns of onedal estimators.
+
+This can be used as a foundation for developing other estimators. Most
+comments guiding code development should be removed unless pertinent to the
+implementation."""
+
+from .._device_offload import supports_queue
+from ..common._backend import bind_default_backend
+from ..datatypes import from_table, to_table
+
+
+class DummyEstimator:
+    # This class creates a constant 2d array of specific size as an example
+
+    def __init__(self, constant=False):
+        # The __init__ method should only assign class attributes matching
+        # the input parameters (similar to sklearn).  It is not to assign
+        # any attributes which aren't related to the operation of oneDAL.
+        # This is means that it should not conform to sklearn, only to
+        # oneDAL. Don't add unnecessary attributes which only match sklearn,
+        # these should be translated by the sklearnex estimator. In this case
+        # the only parameter for the dummy algorithm is the `constant` param.
+        self.constant = constant
+        self._onedal_model = None
+
+    # see documentation on bind_default_backend. There exists three possible
+    # oneDAL pybind11 interfaces, 'host', 'dpc' and 'spmd'. These are for
+    # cpu-only, cpu and gpu, and multi-device computation respectively. Logic
+    # in the onedal module will determine which can be used at import time.
+    # It will attempt to use the `dpc` interface if possible (which enables
+    # gpu computation) but requires a SYCL runtime. If not possible it will
+    # silently fall back to the 'host' pybind11 interface. The backend
+    # binding logic will seamlessly handle this for the estimator. The 'spmd'
+    # backend is specific to onedal estimators defined in the 'spmd' folder.
+    # The binding makes the pybind11 function a method of this class with
+    # the same name (in this case ProtoTypeEstimator.compute should call
+    # the pybind11 function onedal.backend.dummy.generate.train)
+    # where backend can be one of 'host', 'dpc' or 'spmd'.
+    @bind_default_backend("dummy.generate")
+    def train(self, params, data_table): ...
+
+    @bind_default_backend("dummy.generate")
+    def infer(self, params, model, data_table): ...
+
+    @supports_queue
+    def fit(self, X, y, queue=None):
+        # convert the data to oneDAL tables in preparation for use by the
+        # oneDAL pybind11 interfaces/objects.
+        X_t, y_t = to_table(X, y)
+
+        # Generating the params dict can be centralized into a class method,
+        # but it must be named ``_get_onedal_params``. Parameter 'fptype' is
+        # specific to the pybind11 interface, and cannot be found in oneDAL
+        # documentation. This tells oneDAL what float type to use for the
+        # computation. The safest and best way to assign this value is after
+        # the input data has been converted to a oneDAL table, as the dtype
+        # is standardized (taken care of by ``to_table``).  This dtype is a
+        # ``numpy`` dtype due to its ubiquity and native support in pybind11.
+        params = {
+            "fptype": y_t.dtype,  # normally X_t.dtype is used
+            "method": "dense",
+            "constant": self.constant,
+        }
+
+        # This is the call to the oneDAL pybind11 backend, which was
+        # previously bound using ``bind_default_backend``. It returns a
+        # pybind11 Python interface to the oneDAL C++ result object.
+        result = self.train(params, y_t)
+        # In general the naming conventions of ``fit`` match to ``train``,
+        # and ``predict`` match oneDAL's ``infer``. Please refer to the oneDAL
+        # design documentation to determine the best translation (headers
+        # under oneDAL/tree/main/cpp/oneapi/dal in the oneDAL repository,
+        # like for other correlaries like ``compute`` and ``partial_train``.
+        # Generally the sklearn naming scheme for class methods should be
+        # used here, but calls to the pybind11 interfaces should follow
+        # oneDAL naming.
+
+        # Oftentimes oneDAL table objects are attributes of the oneDAL C++
+        # object. These can be converted into various common data frameworks
+        # like ``numpy`` or ``dpctl.tensor`` using ``from_table``. In this
+        # case the output is a basic python type (bool) which can be handled
+        # easily just with pybind11 without any special code. Attributes of
+        # the result object are copied to attributes of the onedal estimator
+        # object.
+
+        self.constant_, self.fit_X_, self.fit_y_ = from_table(
+            result.data, X_t, y_t, like=X
+        )
+        # The fit_X_ and fit_y_ attributes are not required and are generally
+        # discouraged. They are set in order to show the process of setting
+        # and returning array values (and is just an example).  In setting
+        # return attributes, post processing of the values beyond conversion
+        # needed for sklearn must occur in the sklearnex estimator.
+
+    def _create_model(self):
+        # While doing something rather trivial, this is closer to what may
+        # occur in other estimators which can generate models just in time.
+        # Necessary attributes are collected, converted to oneDAL tables
+        # and set to the oneDAL object. In general there should be a oneDAL
+        # model class defined with serialization and deserialization with a
+        # pybind11 interface.
+
+        # When the model is a oneDAL object (see svm), it must maintain a
+        # pybind11 interface to the `serialization` and `deserialization`
+        # oneDAL routines for proper pickling of the oneDAL object. oneDAL
+        # tables must be converted to the array type of the fitted estimator
+        # for pickling/unpickling (see any incremental estimator pybind11
+        # implementation).
+
+        # This example just treats a oneDAL table as the model.
+        return to_table(self.constant_)
+
+    @supports_queue
+    def predict(self, X, queue=None):
+        X_t = to_table(X)
+        if self._onedal_model is None:
+            self._onedal_model = self._create_model()
+
+        params = {"fptype": X_t.dtype, "method": "dense", "constant": self.constant}
+        result = self.infer(params, self._onedal_model, X_t)
+        return from_table(result.data, like=X)