Skip to content

Commit 04f35d6

Browse files
committed
Parallelize longitudinal preprocessors
1 parent 8b7046b commit 04f35d6

12 files changed

+309
-176
lines changed

lib/cpp/preprocessing/longitudinal_features_lagger.cpp

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,38 @@
66

77
#include "tick/preprocessing/longitudinal_features_lagger.h"
88

9+
910
LongitudinalFeaturesLagger::LongitudinalFeaturesLagger(
10-
const SBaseArrayDouble2dPtrList1D &features,
11-
const SArrayULongPtr n_lags)
12-
: n_intervals(features[0]->n_rows()),
13-
n_lags(n_lags),
14-
n_samples(features.size()),
15-
n_observations(n_samples * n_intervals),
16-
n_features(features[0]->n_cols()),
17-
n_lagged_features(n_lags->sum() + n_lags->size()) {
18-
col_offset = ArrayULong(n_lags->size());
19-
col_offset.init_to_zero();
20-
if (n_features != n_lags->size()) {
21-
TICK_ERROR("Features matrix column number should match n_lags length.");
22-
}
23-
if ((*n_lags)[0] >= n_intervals) {
24-
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
25-
}
11+
ulong n_intervals,
12+
SArrayULongPtr _n_lags)
13+
: n_intervals(n_intervals),
14+
n_lags(_n_lags),
15+
n_features(_n_lags->size()),
16+
n_lagged_features(_n_lags->size() + _n_lags->sum()) {
17+
if (n_lags != nullptr) compute_col_offset(n_lags);
18+
}
19+
20+
void LongitudinalFeaturesLagger::compute_col_offset(const SArrayULongPtr n_lags) {
21+
ArrayULong col_offset_temp = ArrayULong(n_lags->size());
22+
col_offset_temp.init_to_zero();
2623
for (ulong i(1); i < n_lags->size(); i++) {
2724
if ((*n_lags)[i] >= n_intervals) {
2825
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
2926
}
30-
col_offset[i] = col_offset[i - 1] + (*n_lags)[i-1] + 1;
27+
col_offset_temp[i] = col_offset_temp[i - 1] + (*n_lags)[i-1] + 1;
3128
}
29+
col_offset = col_offset_temp.as_sarray_ptr();
3230
}
3331

3432
void LongitudinalFeaturesLagger::dense_lag_preprocessor(ArrayDouble2d &features,
3533
ArrayDouble2d &out,
3634
ulong censoring) const {
35+
if (n_intervals != features.n_rows()) {
36+
TICK_ERROR("Features matrix rows count should match n_intervals.");
37+
}
38+
if (n_features != features.n_cols()) {
39+
TICK_ERROR("Features matrix column count should match n_lags length.");
40+
}
3741
if (out.n_cols() != n_lagged_features) {
3842
TICK_ERROR(
3943
"n_columns of &out should be equal to n_features + sum(n_lags).");
@@ -47,8 +51,9 @@ void LongitudinalFeaturesLagger::dense_lag_preprocessor(ArrayDouble2d &features,
4751
n_cols_feature = (*n_lags)[feature] + 1;
4852
for (ulong j = 0; j < n_intervals; j++) {
4953
row = j;
50-
col = col_offset[feature];
51-
value = features(row, feature);
54+
col = (*col_offset)[feature];
55+
// use view_row instead of (row, feature) to be const
56+
value = view_row(features, row)[feature];
5257
max_col = col + n_cols_feature;
5358
if (value != 0) {
5459
while (row < censoring && col < max_col) {
@@ -68,14 +73,15 @@ void LongitudinalFeaturesLagger::sparse_lag_preprocessor(ArrayULong &row,
6873
ArrayULong &out_col,
6974
ArrayDouble &out_data,
7075
ulong censoring) const {
76+
// TODO: add checks here ? Or do them in Python ?
7177
ulong j(0), r, c, offset, new_col, max_col;
7278
double value;
7379

7480
for (ulong i = 0; i < data.size(); i++) {
7581
value = data[i];
7682
r = row[i];
7783
c = col[i];
78-
offset = col_offset[c];
84+
offset = (*col_offset)[c];
7985
max_col = offset + (*n_lags)[c] + 1;
8086
new_col = offset;
8187

lib/cpp/preprocessing/sparse_longitudinal_features_product.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@
77
#include "tick/preprocessing/sparse_longitudinal_features_product.h"
88
#include <map>
99

10-
SparseLongitudinalFeaturesProduct::SparseLongitudinalFeaturesProduct(
11-
const SBaseArrayDouble2dPtrList1D &features)
12-
: n_features(features[0]->n_cols()) {}
13-
1410
ulong SparseLongitudinalFeaturesProduct::get_feature_product_col(
1511
ulong col1, ulong col2, ulong n_cols) const {
1612
if (col1 > col2) { // ensure we have the right order as the following formula

lib/include/tick/preprocessing/longitudinal_features_lagger.h

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,21 @@ class LongitudinalFeaturesLagger {
1515
protected:
1616
ulong n_intervals;
1717
SArrayULongPtr n_lags;
18-
ArrayULong col_offset;
19-
ulong n_samples;
20-
ulong n_observations;
2118
ulong n_features;
2219
ulong n_lagged_features;
20+
SArrayULongPtr col_offset;
2321

2422
public:
25-
LongitudinalFeaturesLagger(const SBaseArrayDouble2dPtrList1D &features,
26-
const SArrayULongPtr n_lags);
23+
// This exists solely for cereal/swig
24+
LongitudinalFeaturesLagger() = default;
2725

28-
void dense_lag_preprocessor(ArrayDouble2d &features, ArrayDouble2d &out,
26+
LongitudinalFeaturesLagger(ulong n_intervals,
27+
SArrayULongPtr n_lags);
28+
29+
void compute_col_offset(SArrayULongPtr n_lags);
30+
31+
void dense_lag_preprocessor(ArrayDouble2d &features,
32+
ArrayDouble2d &out,
2933
ulong censoring) const;
3034

3135
void sparse_lag_preprocessor(ArrayULong &row, ArrayULong &col,
@@ -34,14 +38,26 @@ class LongitudinalFeaturesLagger {
3438
ulong censoring) const;
3539

3640
template <class Archive>
37-
void serialize(Archive &ar) {
41+
void load(Archive &ar) {
42+
ar(CEREAL_NVP(n_intervals));
43+
ar(CEREAL_NVP(n_features));
44+
ar(CEREAL_NVP(n_lagged_features));
45+
46+
Array<ulong> temp_n_lags, temp_col_offset;
47+
ar(cereal::make_nvp("n_lags", temp_n_lags));
48+
49+
n_lags = temp_n_lags.as_sarray_ptr();
50+
col_offset = temp_col_offset.as_sarray_ptr();
51+
}
52+
53+
54+
template <class Archive>
55+
void save(Archive &ar) const {
3856
ar(CEREAL_NVP(n_intervals));
39-
ar(CEREAL_NVP(n_lags));
40-
ar(CEREAL_NVP(col_offset));
41-
ar(CEREAL_NVP(n_samples));
42-
ar(CEREAL_NVP(n_observations));
4357
ar(CEREAL_NVP(n_features));
4458
ar(CEREAL_NVP(n_lagged_features));
59+
ar(cereal::make_nvp("n_lags", *n_lags));
60+
ar(cereal::make_nvp("col_offset", *col_offset));
4561
}
4662
};
4763

lib/include/tick/preprocessing/sparse_longitudinal_features_product.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ class SparseLongitudinalFeaturesProduct {
1616
ulong n_features;
1717

1818
public:
19+
// This exists soley for cereal/swig
20+
SparseLongitudinalFeaturesProduct() = default;
21+
1922
explicit SparseLongitudinalFeaturesProduct(
20-
const SBaseArrayDouble2dPtrList1D &features);
23+
const ulong n_features): n_features(n_features) {}
2124

2225
inline ulong get_feature_product_col(ulong col1, ulong col2,
2326
ulong n_cols) const;
@@ -28,7 +31,12 @@ class SparseLongitudinalFeaturesProduct {
2831
ArrayDouble &out_data) const;
2932

3033
template <class Archive>
31-
void serialize(Archive &ar) {
34+
void load(Archive &ar) {
35+
ar(CEREAL_NVP(n_features));
36+
}
37+
38+
template <class Archive>
39+
void save(Archive &ar) const {
3240
ar(CEREAL_NVP(n_features));
3341
}
3442
};

lib/swig/preprocessing/longitudinal_features_lagger.i

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,25 @@
44
#include "tick/preprocessing/longitudinal_features_lagger.h"
55
%}
66

7+
%include serialization.i
8+
79
class LongitudinalFeaturesLagger {
810

911
public:
10-
LongitudinalFeaturesLagger(const SBaseArrayDouble2dPtrList1D &features,
11-
const SArrayULongPtr n_lags);
12+
// This exists soley for cereal/swig
13+
LongitudinalFeaturesLagger();
14+
15+
LongitudinalFeaturesLagger(ulong n_intervals,
16+
SArrayULongPtr n_lags);
1217

1318
void dense_lag_preprocessor(ArrayDouble2d &features,
1419
ArrayDouble2d &out,
1520
ulong censoring) const;
1621

17-
void sparse_lag_preprocessor(ArrayULong &row,
18-
ArrayULong &col,
19-
ArrayDouble &data,
20-
ArrayULong &out_row,
21-
ArrayULong &out_col,
22-
ArrayDouble &out_data,
22+
void sparse_lag_preprocessor(ArrayULong &row, ArrayULong &col,
23+
ArrayDouble &data, ArrayULong &out_row,
24+
ArrayULong &out_col, ArrayDouble &out_data,
2325
ulong censoring) const;
24-
2526
};
2627

2728
TICK_MAKE_PICKLABLE(LongitudinalFeaturesLagger);

lib/swig/preprocessing/sparse_longitudinal_features_product.i

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@
44
#include "tick/preprocessing/sparse_longitudinal_features_product.h"
55
%}
66

7+
%include serialization.i
8+
79
class SparseLongitudinalFeaturesProduct {
810

911
public:
10-
SparseLongitudinalFeaturesProduct(const SBaseArrayDouble2dPtrList1D &features);
12+
// This exists soley for cereal/swig
13+
SparseLongitudinalFeaturesProduct();
14+
15+
SparseLongitudinalFeaturesProduct(const ulong n_features);
1116

1217
void sparse_features_product(ArrayULong &row,
1318
ArrayULong &col,

tick/preprocessing/base/longitudinal_preprocessor.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from abc import ABC, abstractmethod
44
from tick.base import Base
5+
from multiprocessing import cpu_count
56

67

78
class LongitudinalPreprocessor(ABC, Base):
@@ -14,9 +15,14 @@ class LongitudinalPreprocessor(ABC, Base):
1415
set to the number of cores.
1516
"""
1617

17-
def __init__(self, n_jobs=-1):
18+
_attrinfos = {'n_jobs': {'writable': True}}
19+
20+
def __init__(self, n_jobs=1):
1821
Base.__init__(self)
19-
self.n_jobs = n_jobs
22+
if n_jobs == -1:
23+
self.n_jobs = cpu_count()
24+
else:
25+
self.n_jobs = n_jobs
2026

2127
@abstractmethod
2228
def fit(self, features, labels, censoring) -> None:

0 commit comments

Comments
 (0)