-
Notifications
You must be signed in to change notification settings - Fork 184
[enhancement] Introduce DummyRegressor Estimator (prototype estimator for sklearnex design) #2534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3829036
1a4627c
90df3e3
c4a6fdc
b54fe71
befe555
8f08da0
7c06762
2ab1084
b34e741
7a4df2c
23a15d3
15bf789
dbfc635
eb48444
be72e87
b987736
e3fa31b
e8ceac5
0cbe504
c4e2283
abba51d
a09e238
60a8df1
85bdf39
cd54ace
dc3774c
4a6ee7d
9ba1966
f0d5427
877f963
d4c7526
940287b
ce573bf
34a42f5
199a416
98c9b36
771e314
57ef9d2
5774d20
a514023
9490f38
3986916
dfc6216
c953c69
c6e8487
02df4c8
0a55de2
541d054
c501a0c
8e3b88c
fa3444b
4f54b78
4cf36db
0b2b62b
e8f3557
99c852b
978b811
156c78d
2a4df80
999cfc1
33986aa
3c86271
40d9a49
84afc60
98914e6
4f48012
d8472eb
72be71d
853a53f
435d52a
6bd3fe6
ed96f16
63d2869
ed9ec46
e986a08
b538b2d
9d16f02
10e59e5
a0e0304
dff4169
6c10169
9c151cd
60d4324
32a2ba8
89a8c68
f1a746a
2c08ffc
48af701
658a147
5fc6e57
2836997
9016add
763b5d1
1cec67e
b4d8e9e
3b556f8
ec796e8
05ebe08
22e4c6f
6f79554
eaa6a71
3b423b4
82fda39
71b71c2
8d57f18
07488f4
0c90896
9110c97
eabaf0e
b719094
9e7c244
b99c17d
79c314e
36bdfaf
593730e
efe4576
ad43b29
05ddcdb
cab3a95
a3278b6
c8b14ed
1e97974
942984c
2ae9676
e76339e
fcc2ba2
6816ec7
ffff3c7
6604e67
79174e6
9ad56ee
8af7f77
2ce7f89
081296a
4414f9f
cf225b8
6175373
1214f0a
a29712b
2beafbf
a332d9b
e11da6d
ccd55c3
e7fcb1a
73358ec
d57ae03
617bec1
8af27d1
c81571e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # ============================================================================== | ||
| # Copyright Contributors to the oneDAL Project | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ============================================================================== | ||
|
|
||
| from .dummy import DummyEstimator | ||
|
|
||
| __all__ = ["DummyEstimator"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| /******************************************************************************* | ||
| * Copyright Contributors to the oneDAL Project | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| *******************************************************************************/ | ||
|
|
||
| #include "onedal/common.hpp" | ||
| #include "onedal/version.hpp" | ||
| // A fake oneDAL algorithm is include via the `dummy_onedal.hpp` header. In | ||
| // normal circumstances a header for the oneDAL algorithm would be | ||
| // included here from the oneDAL `oneapi/dal/algo/` folder. | ||
| #include "onedal/dummy/dummy_onedal.hpp" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A comment here specifying that in practice this would instead look like #include oneapi/dal/algo/... would be useful
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added! |
||
| #include "oneapi/dal/table/common.hpp" | ||
| #include "oneapi/dal/table/homogen.hpp" | ||
|
|
||
| namespace py = pybind11; | ||
|
|
||
| // oneDAL-python interaction code is located in this namespace | ||
| namespace oneapi::dal::python { | ||
icfaust marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // pybind11 structures and functions of the 'dummy' algorithm | ||
| namespace dummy { | ||
icfaust marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| template <typename Task, typename Ops> | ||
| struct method2t { | ||
| method2t(const Task& task, const Ops& ops) : ops(ops) {} | ||
| // this functor converts the method param into a valid oneDAL task. | ||
| // Tasks are specific to each algorithm, therefore method2t is often | ||
| // defined for each algo. | ||
| template <typename Float> | ||
| auto operator()(const py::dict& params) { | ||
| using namespace dal::dummy; | ||
| const auto method = params["method"].cast<std::string>(); | ||
|
|
||
| ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense); | ||
| ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default); | ||
| ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method); | ||
| } | ||
|
|
||
| Ops ops; | ||
| }; | ||
|
|
||
| struct params2desc { | ||
| // This functor converts the params dictionary into a oneDAL descriptor | ||
| template <typename Float, typename Method, typename Task> | ||
| auto operator()(const py::dict& params) { | ||
| auto desc = dal::dummy::descriptor<Float, Method, Task>(); | ||
|
|
||
| // conversion of the params dict to oneDAL params occurs here except | ||
| // for the ``method`` and ``fptype`` parameters. They are assigned | ||
| // to the descriptor individually here before returning. | ||
| const auto constant = params["constant"].cast<double>(); | ||
| desc.set_constant(constant); | ||
|
|
||
| return desc; | ||
| } | ||
| }; | ||
|
|
||
| // the following functions define the python interface methods for the | ||
| // oneDAL algorithms. They are templated for the policy (which may be host, | ||
| // dpc, or spmd), and task, which is defined per algorithm. They are all | ||
| // defined using lambda functions (a common occurrence for pybind11), but | ||
| // that is not a requirement. | ||
| template <typename Policy, typename Task> | ||
| void init_train_ops(py::module& m) { | ||
| m.def("train", [](const Policy& policy, const py::dict& params, const table& data) { | ||
| using namespace dal::dummy; | ||
| using input_t = train_input<Task>; | ||
| // while there is a train_ops defined for each oneDAL algorithm | ||
| // which supports ``train``, this is the train_ops defined in | ||
| // onedal/common/dispatch_utils.hpp | ||
| train_ops ops(policy, input_t{ data }, params2desc{}); | ||
| // fptype2t is defined in common/dispatch_utils.hpp | ||
| // which operates in a similar manner to the method2t functor | ||
| // it selects the floating point datatype for the calculation | ||
| return fptype2t{ method2t{ Task{}, ops } }(params); | ||
| }); | ||
| }; | ||
|
|
||
| template <typename Policy, typename Task> | ||
| void init_infer_ops(py::module_& m) { | ||
| m.def( | ||
| "infer", | ||
| [](const Policy& policy, const py::dict& params, const table& constant, const table& data) { | ||
| using namespace dal::dummy; | ||
| using input_t = infer_input<Task>; | ||
|
|
||
| infer_ops ops(policy, input_t{ data, constant }, params2desc{}); | ||
| // with the use of functors the order of operations is as | ||
| // follows: Task is generated, the ops is already created above, | ||
| // method2t is constructed, and then fptype2t is constructed. | ||
| // It is then evaluated in opposite order sequentially on the | ||
| // params dict. | ||
| return fptype2t{ method2t{ Task{}, ops } }(params); | ||
| }); | ||
| } | ||
|
|
||
| // This defines the result C++ objects for use in python via pybind11. | ||
| // Result object attributes should be pybind11 native types (like int, | ||
| // float, etc.) or oneDAL tables. | ||
|
|
||
| template <typename Task> | ||
| void init_train_result(py::module_& m) { | ||
| using namespace dal::dummy; | ||
| using result_t = train_result<Task>; | ||
|
|
||
| py::class_<result_t>(m, "train_result").def(py::init()).DEF_ONEDAL_PY_PROPERTY(data, result_t); | ||
| } | ||
|
|
||
| template <typename Task> | ||
| void init_infer_result(py::module_& m) { | ||
| using namespace dal::dummy; | ||
| using result_t = infer_result<Task>; | ||
|
|
||
| py::class_<result_t>(m, "infer_result").def(py::init()).DEF_ONEDAL_PY_PROPERTY(data, result_t); | ||
| } | ||
|
|
||
| ONEDAL_PY_DECLARE_INSTANTIATOR(init_train_result); | ||
| ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_result); | ||
| ONEDAL_PY_DECLARE_INSTANTIATOR(init_train_ops); | ||
| ONEDAL_PY_DECLARE_INSTANTIATOR(init_infer_ops); | ||
|
|
||
| } // namespace dummy | ||
|
|
||
| ONEDAL_PY_INIT_MODULE(dummy) { | ||
| using namespace dummy; | ||
| using namespace dal::detail; | ||
| using namespace dal::dummy; | ||
|
|
||
| // the task_list allows for multiple types of tasks (like regression | ||
| // and classification) template to be evaluated. The use of 'types' | ||
| // is not required, and has special implications for the | ||
| // 'bind_default_backend' function as it creates submodules in python | ||
| // based on the task name. See the covariance implementation | ||
| // where no task_list is used and a submodule of the algorithm is not | ||
| // made. | ||
| using task_list = types<task::generate>; | ||
| auto sub = m.def_submodule("dummy"); | ||
|
|
||
| // explicitly define the templates based off of the policy and task | ||
| // lists. These instantiations lead to a cascade of fully-resolved | ||
| // templates from oneDAL. It begins by fully resolving functors defined | ||
| // here and the oneDAL descriptor. It then fully specifies functors in | ||
| // common/dispatch_utils.hpp, which starts resolving oneDAL objects | ||
| // for the algorithm like the train_ops/infer_ops functors defined there. | ||
| // This leads to a fair number of compile time work with oneDAL headers. | ||
| // For example take init_train_ops in approximate reverse order | ||
| // (to show how it goes from here to oneDAL): | ||
| // | ||
| // 0. Creates pybind11 interface | ||
| // 1. Specifies lambda defined in init_train_ops | ||
| // 2. Specifies fptype2t | ||
| // 3. Specifies method2t | ||
| // 4. Specifies train_ops defined in common/dispatch_utils.hpp | ||
| // 5. Specifies train defined in oneapi/dal/train.hpp | ||
| // 6. Specifies train_dispatch in oneapi/dal/detail/train_ops.hpp | ||
| // 7. Specifies several functors in oneapi/dal/detail/ops_dispatcher.hpp | ||
| // 8. Specifies train_ops defined in algorithm's train_ops.hpp | ||
| // 9. Specifies oneDAL train_input, train_result and descriptor structs | ||
| /**** finally hits objects compiled in oneDAL for the computation ****/ | ||
| // (train_ops_dispatcher for example) | ||
| // | ||
| // Its not clear how many layers of these indirections are compiled | ||
| // versus optimized away. The namings in dispatch_utils.hpp are also | ||
| // unfortunate and confusing. | ||
|
|
||
| // policy_list is defined elsewhere which is dependent on the backend | ||
| // which is being built. Placed within a macro-check in order to prevent | ||
| // use with an spmd policy. | ||
| #ifndef ONEDAL_DATA_PARALLEL_SPMD | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about else (if spmd to be instantiated)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added a comment |
||
| ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list); | ||
| ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list); | ||
| ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list); | ||
| ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list); | ||
| #else | ||
| // This is where the pybind11 init functions would be instantiated with | ||
| // a policy_spmd object. For example, if an init_train_ops existed for | ||
| // the spmd backend it would be instantiated like: | ||
| // ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list); | ||
| #endif | ||
| } | ||
|
|
||
| ONEDAL_PY_TYPE2STR(dal::dummy::task::generate, "generate"); | ||
|
|
||
| } // namespace oneapi::dal::python | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| # ============================================================================== | ||
| # Copyright Contributors to the oneDAL Project | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # ============================================================================== | ||
|
|
||
| """This file describes necessary characteristics and design patterns of onedal estimators. | ||
|
|
||
| This can be used as a foundation for developing other estimators. Most | ||
| comments guiding code development should be removed unless pertinent to the | ||
| implementation.""" | ||
|
|
||
| from .._device_offload import supports_queue | ||
| from ..common._backend import bind_default_backend | ||
| from ..datatypes import from_table, to_table | ||
|
|
||
|
|
||
| class DummyEstimator: | ||
| # This class creates a constant 2d array of specific size as an example | ||
|
|
||
| def __init__(self, constant=False): | ||
| # The __init__ method should only assign class attributes matching | ||
| # the input parameters (similar to sklearn). It is not to assign | ||
| # any attributes which aren't related to the operation of oneDAL. | ||
| # This is means that it should not conform to sklearn, only to | ||
| # oneDAL. Don't add unnecessary attributes which only match sklearn, | ||
| # these should be translated by the sklearnex estimator. In this case | ||
| # the only parameter for the dummy algorithm is the `constant` param. | ||
| self.constant = constant | ||
| self._onedal_model = None | ||
|
|
||
| # see documentation on bind_default_backend. There exists three possible | ||
| # oneDAL pybind11 interfaces, 'host', 'dpc' and 'spmd'. These are for | ||
| # cpu-only, cpu and gpu, and multi-device computation respectively. Logic | ||
david-cortes-intel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # in the onedal module will determine which can be used at import time. | ||
| # It will attempt to use the `dpc` interface if possible (which enables | ||
| # gpu computation) but requires a SYCL runtime. If not possible it will | ||
| # silently fall back to the 'host' pybind11 interface. The backend | ||
| # binding logic will seamlessly handle this for the estimator. The 'spmd' | ||
| # backend is specific to onedal estimators defined in the 'spmd' folder. | ||
| # The binding makes the pybind11 function a method of this class with | ||
| # the same name (in this case ProtoTypeEstimator.compute should call | ||
| # the pybind11 function onedal.backend.dummy.generate.train) | ||
| # where backend can be one of 'host', 'dpc' or 'spmd'. | ||
| @bind_default_backend("dummy.generate") | ||
| def train(self, params, data_table): ... | ||
|
|
||
| @bind_default_backend("dummy.generate") | ||
| def infer(self, params, model, data_table): ... | ||
|
|
||
| @supports_queue | ||
| def fit(self, X, y, queue=None): | ||
| # convert the data to oneDAL tables in preparation for use by the | ||
| # oneDAL pybind11 interfaces/objects. | ||
| X_t, y_t = to_table(X, y) | ||
|
|
||
| # Generating the params dict can be centralized into a class method, | ||
| # but it must be named ``_get_onedal_params``. Parameter 'fptype' is | ||
| # specific to the pybind11 interface, and cannot be found in oneDAL | ||
| # documentation. This tells oneDAL what float type to use for the | ||
| # computation. The safest and best way to assign this value is after | ||
| # the input data has been converted to a oneDAL table, as the dtype | ||
| # is standardized (taken care of by ``to_table``). This dtype is a | ||
| # ``numpy`` dtype due to its ubiquity and native support in pybind11. | ||
| params = { | ||
| "fptype": y_t.dtype, # normally X_t.dtype is used | ||
| "method": "dense", | ||
| "constant": self.constant, | ||
| } | ||
|
|
||
| # This is the call to the oneDAL pybind11 backend, which was | ||
| # previously bound using ``bind_default_backend``. It returns a | ||
| # pybind11 Python interface to the oneDAL C++ result object. | ||
| result = self.train(params, y_t) | ||
| # In general the naming conventions of ``fit`` match to ``train``, | ||
david-cortes-intel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # and ``predict`` match oneDAL's ``infer``. Please refer to the oneDAL | ||
| # design documentation to determine the best translation (headers | ||
| # under oneDAL/tree/main/cpp/oneapi/dal in the oneDAL repository, | ||
| # like for other correlaries like ``compute`` and ``partial_train``. | ||
| # Generally the sklearn naming scheme for class methods should be | ||
| # used here, but calls to the pybind11 interfaces should follow | ||
| # oneDAL naming. | ||
|
|
||
| # Oftentimes oneDAL table objects are attributes of the oneDAL C++ | ||
| # object. These can be converted into various common data frameworks | ||
| # like ``numpy`` or ``dpctl.tensor`` using ``from_table``. In this | ||
| # case the output is a basic python type (bool) which can be handled | ||
| # easily just with pybind11 without any special code. Attributes of | ||
| # the result object are copied to attributes of the onedal estimator | ||
| # object. | ||
|
|
||
| self.constant_, self.fit_X_, self.fit_y_ = from_table( | ||
| result.data, X_t, y_t, like=X | ||
| ) | ||
| # The fit_X_ and fit_y_ attributes are not required and are generally | ||
| # discouraged. They are set in order to show the process of setting | ||
| # and returning array values (and is just an example). In setting | ||
| # return attributes, post processing of the values beyond conversion | ||
| # needed for sklearn must occur in the sklearnex estimator. | ||
|
|
||
| def _create_model(self): | ||
| # While doing something rather trivial, this is closer to what may | ||
| # occur in other estimators which can generate models just in time. | ||
| # Necessary attributes are collected, converted to oneDAL tables | ||
| # and set to the oneDAL object. In general there should be a oneDAL | ||
| # model class defined with serialization and deserialization with a | ||
| # pybind11 interface. | ||
|
|
||
| # When the model is a oneDAL object (see svm), it must maintain a | ||
| # pybind11 interface to the `serialization` and `deserialization` | ||
| # oneDAL routines for proper pickling of the oneDAL object. oneDAL | ||
| # tables must be converted to the array type of the fitted estimator | ||
| # for pickling/unpickling (see any incremental estimator pybind11 | ||
| # implementation). | ||
|
|
||
| # This example just treats a oneDAL table as the model. | ||
| return to_table(self.constant_) | ||
|
|
||
| @supports_queue | ||
| def predict(self, X, queue=None): | ||
| X_t = to_table(X) | ||
| if self._onedal_model is None: | ||
| self._onedal_model = self._create_model() | ||
|
|
||
| params = {"fptype": X_t.dtype, "method": "dense", "constant": self.constant} | ||
| result = self.infer(params, self._onedal_model, X_t) | ||
| return from_table(result.data, like=X) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the location of the files
dummy.cpp,dummy_onedal.hpp,prototype.pycorrect?Should those be located in
onedal/dummy/folder?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, having "dummy.py" instead of "prototype.py" might be more aligned with the rest of the codebase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!