Skip to content

Commit 3e7d36e

Browse files
committed
shitty pickling
1 parent 9d7aaa4 commit 3e7d36e

File tree

8 files changed

+116
-56
lines changed

8 files changed

+116
-56
lines changed

lib/cpp/preprocessing/longitudinal_features_lagger.cpp

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,39 @@
66

77
#include "tick/preprocessing/longitudinal_features_lagger.h"
88

9+
910
LongitudinalFeaturesLagger::LongitudinalFeaturesLagger(
10-
const SBaseArrayDouble2dPtrList1D &features,
11-
const SArrayULongPtr n_lags)
12-
: n_intervals(features[0]->n_rows()),
11+
ulong n_intervals,
12+
SArrayULongPtr n_lags)
13+
: n_intervals(n_intervals),
1314
n_lags(n_lags),
14-
n_samples(features.size()),
15-
n_observations(n_samples * n_intervals),
16-
n_features(features[0]->n_cols()),
17-
n_lagged_features(n_lags->sum() + n_lags->size()) {
18-
col_offset = ArrayULong(n_lags->size());
19-
col_offset.init_to_zero();
20-
if (n_features != n_lags->size()) {
21-
TICK_ERROR("Features matrix column number should match n_lags length.");
22-
}
23-
if ((*n_lags)[0] >= n_intervals) {
24-
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
25-
}
15+
n_features(n_lags->size()),
16+
n_lagged_features(n_features + n_lags->sum()),
17+
col_offset(nullptr){
18+
if (n_lags != nullptr) compute_col_offset(n_lags);
19+
};
20+
21+
void LongitudinalFeaturesLagger::compute_col_offset(const SArrayULongPtr n_lags) {
22+
ArrayULong col_offset_temp = ArrayULong(n_lags->size());
23+
col_offset_temp.init_to_zero();
2624
for (ulong i(1); i < n_lags->size(); i++) {
2725
if ((*n_lags)[i] >= n_intervals) {
2826
TICK_ERROR("n_lags elements must be between 0 and (n_intervals - 1).");
2927
}
30-
col_offset[i] = col_offset[i - 1] + (*n_lags)[i-1] + 1;
28+
col_offset_temp[i] = col_offset_temp[i - 1] + (*n_lags)[i-1] + 1;
3129
}
32-
}
30+
col_offset = col_offset_temp.as_sarray_ptr();
31+
};
3332

3433
void LongitudinalFeaturesLagger::dense_lag_preprocessor(ArrayDouble2d &features,
3534
ArrayDouble2d &out,
3635
ulong censoring) const {
36+
if (n_features != features.n_rows()) {
37+
TICK_ERROR("Features matrix rows count should match n_lags length.");
38+
}
39+
if (n_features != features.n_cols()) {
40+
TICK_ERROR("Features matrix column count should match n_lags length.");
41+
}
3742
if (out.n_cols() != n_lagged_features) {
3843
TICK_ERROR(
3944
"n_columns of &out should be equal to n_features + sum(n_lags).");
@@ -47,8 +52,9 @@ void LongitudinalFeaturesLagger::dense_lag_preprocessor(ArrayDouble2d &features,
4752
n_cols_feature = (*n_lags)[feature] + 1;
4853
for (ulong j = 0; j < n_intervals; j++) {
4954
row = j;
50-
col = col_offset[feature];
51-
value = features(row, feature);
55+
col = (*col_offset)[feature];
56+
// use view_row instead of (row, feature) to be const
57+
value = view_row(features, row)[feature];
5258
max_col = col + n_cols_feature;
5359
if (value != 0) {
5460
while (row < censoring && col < max_col) {
@@ -68,14 +74,15 @@ void LongitudinalFeaturesLagger::sparse_lag_preprocessor(ArrayULong &row,
6874
ArrayULong &out_col,
6975
ArrayDouble &out_data,
7076
ulong censoring) const {
77+
// TODO: add checks here ? Or do them in Python ?
7178
ulong j(0), r, c, offset, new_col, max_col;
7279
double value;
7380

7481
for (ulong i = 0; i < data.size(); i++) {
7582
value = data[i];
7683
r = row[i];
7784
c = col[i];
78-
offset = col_offset[c];
85+
offset = (*col_offset)[c];
7986
max_col = offset + (*n_lags)[c] + 1;
8087
new_col = offset;
8188

lib/include/tick/preprocessing/longitudinal_features_lagger.h

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,21 @@ class LongitudinalFeaturesLagger {
1515
protected:
1616
ulong n_intervals;
1717
SArrayULongPtr n_lags;
18-
ArrayULong col_offset;
19-
ulong n_samples;
20-
ulong n_observations;
2118
ulong n_features;
2219
ulong n_lagged_features;
20+
SArrayULongPtr col_offset;
2321

2422
public:
25-
LongitudinalFeaturesLagger(const SBaseArrayDouble2dPtrList1D &features,
26-
const SArrayULongPtr n_lags);
23+
// This exists soley for cereal/swig
24+
LongitudinalFeaturesLagger(): LongitudinalFeaturesLagger(0, nullptr) {};
2725

28-
void dense_lag_preprocessor(ArrayDouble2d &features, ArrayDouble2d &out,
26+
LongitudinalFeaturesLagger(ulong n_intervals,
27+
SArrayULongPtr n_lags);
28+
29+
void compute_col_offset(SArrayULongPtr n_lags);
30+
31+
void dense_lag_preprocessor(ArrayDouble2d &features,
32+
ArrayDouble2d &out,
2933
ulong censoring) const;
3034

3135
void sparse_lag_preprocessor(ArrayULong &row, ArrayULong &col,
@@ -34,14 +38,28 @@ class LongitudinalFeaturesLagger {
3438
ulong censoring) const;
3539

3640
template <class Archive>
37-
void serialize(Archive &ar) {
41+
void load(Archive &ar) {
42+
ar(CEREAL_NVP(n_intervals));
43+
ar(CEREAL_NVP(n_features));
44+
ar(CEREAL_NVP(n_lagged_features));
45+
46+
Array<ulong> temp_n_lags, temp_col_offset;
47+
ar(cereal::make_nvp("n_lags", temp_n_lags));
48+
// ar(cereal::make_nvp("col_offset", temp_col_offset));
49+
50+
n_lags = temp_n_lags.as_sarray_ptr();
51+
if (n_lags != nullptr) compute_col_offset(n_lags);
52+
// col_offset = temp_col_offset.as_sarray_ptr();
53+
}
54+
55+
56+
template <class Archive>
57+
void save(Archive &ar) const {
3858
ar(CEREAL_NVP(n_intervals));
39-
ar(CEREAL_NVP(n_lags));
40-
ar(CEREAL_NVP(col_offset));
41-
ar(CEREAL_NVP(n_samples));
42-
ar(CEREAL_NVP(n_observations));
4359
ar(CEREAL_NVP(n_features));
4460
ar(CEREAL_NVP(n_lagged_features));
61+
ar(cereal::make_nvp("n_lags", *n_lags));
62+
// ar(cereal::make_nvp("col_offset", *col_offset));
4563
}
4664
};
4765

lib/swig/preprocessing/longitudinal_features_lagger.i

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,25 @@
44
#include "tick/preprocessing/longitudinal_features_lagger.h"
55
%}
66

7+
%include serialization.i
8+
79
class LongitudinalFeaturesLagger {
810

911
public:
10-
LongitudinalFeaturesLagger(const SBaseArrayDouble2dPtrList1D &features,
11-
const SArrayULongPtr n_lags);
12+
// This exists soley for cereal/swig
13+
LongitudinalFeaturesLagger(): LongitudinalFeaturesLagger(0, nullptr) {};
14+
15+
LongitudinalFeaturesLagger(ulong n_intervals,
16+
SArrayULongPtr n_lags);
1217

1318
void dense_lag_preprocessor(ArrayDouble2d &features,
1419
ArrayDouble2d &out,
1520
ulong censoring) const;
1621

17-
void sparse_lag_preprocessor(ArrayULong &row,
18-
ArrayULong &col,
19-
ArrayDouble &data,
20-
ArrayULong &out_row,
21-
ArrayULong &out_col,
22-
ArrayDouble &out_data,
22+
void sparse_lag_preprocessor(ArrayULong &row, ArrayULong &col,
23+
ArrayDouble &data, ArrayULong &out_row,
24+
ArrayULong &out_col, ArrayDouble &out_data,
2325
ulong censoring) const;
24-
2526
};
2627

2728
TICK_MAKE_PICKLABLE(LongitudinalFeaturesLagger);

lib/swig/preprocessing/sparse_longitudinal_features_product.i

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include "tick/preprocessing/sparse_longitudinal_features_product.h"
55
%}
66

7+
%include serialization.i
8+
79
class SparseLongitudinalFeaturesProduct {
810

911
public:

tick/preprocessing/base/longitudinal_preprocessor.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from abc import ABC, abstractmethod
44
from tick.base import Base
5+
from multiprocessing import cpu_count
56

67

78
class LongitudinalPreprocessor(ABC, Base):
@@ -14,9 +15,14 @@ class LongitudinalPreprocessor(ABC, Base):
1415
set to the number of cores.
1516
"""
1617

17-
def __init__(self, n_jobs=-1):
18+
_attrinfos = {'n_jobs': {'writable': True}}
19+
20+
def __init__(self, n_jobs=1):
1821
Base.__init__(self)
19-
self.n_jobs = n_jobs
22+
if n_jobs == -1:
23+
self.n_jobs = cpu_count()
24+
else:
25+
self.n_jobs = n_jobs
2026

2127
@abstractmethod
2228
def fit(self, features, labels, censoring) -> None:

tick/preprocessing/longitudinal_features_lagger.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
as _LongitudinalFeaturesLagger
88
from .utils import check_longitudinal_features_consistency,\
99
check_censoring_consistency
10+
from multiprocessing.pool import Pool
1011

1112

1213
class LongitudinalFeaturesLagger(LongitudinalPreprocessor):
@@ -83,7 +84,7 @@ class LongitudinalFeaturesLagger(LongitudinalPreprocessor):
8384
}
8485
}
8586

86-
def __init__(self, n_lags, n_jobs=-1):
87+
def __init__(self, n_lags, n_jobs=1):
8788
LongitudinalPreprocessor.__init__(self, n_jobs=n_jobs)
8889
if not isinstance(n_lags, np.ndarray) or n_lags.dtype != 'uint64':
8990
raise ValueError(
@@ -166,7 +167,6 @@ def transform(self, features, labels=None, censoring=None):
166167
output : `[numpy.ndarrays]` or `[csr_matrices]`, shape=(n_intervals, n_features)
167168
The list of features matrices with added lagged features.
168169
"""
169-
170170
n_samples = len(features)
171171
if censoring is None:
172172
censoring = np.full((n_samples,), self._n_intervals,
@@ -176,16 +176,28 @@ def transform(self, features, labels=None, censoring=None):
176176
features = check_longitudinal_features_consistency(
177177
features, base_shape, "float64")
178178
if sps.issparse(features[0]):
179-
X_with_lags = [
180-
self._sparse_lagger(x, int(censoring[i]))
181-
for i, x in enumerate(features)
182-
]
183-
# TODO: Don't get why int() is required here as censoring_i is uint64
179+
if self.n_jobs > 1:
180+
with Pool(self.n_jobs) as pool:
181+
X_with_lags = pool.starmap(self._sparse_lagger, zip(features, censoring))
182+
pool.start()
183+
pool.join()
184+
else:
185+
X_with_lags = [
186+
self._sparse_lagger(x, int(censoring[i]))
187+
for i, x in enumerate(features)
188+
]
189+
# TODO: Don't get why int() is required here as censoring_i is uint64
184190
else:
185-
X_with_lags = [
186-
self._dense_lagger(x, int(censoring[i]))
187-
for i, x in enumerate(features)
188-
]
191+
if self.n_jobs > 1:
192+
with Pool(self.n_jobs) as pool:
193+
X_with_lags = pool.starmap(self._dense_lagger, zip(features, censoring))
194+
pool.start()
195+
pool.join()
196+
else:
197+
X_with_lags = [
198+
self._dense_lagger(x, int(censoring[i]))
199+
for i, x in enumerate(features)
200+
]
189201

190202
return X_with_lags, labels, censoring
191203

@@ -197,14 +209,15 @@ def _dense_lagger(self, feature_matrix, censoring_i):
197209
return output
198210

199211
def _sparse_lagger(self, feature_matrix, censoring_i):
212+
pp = self._cpp_preprocessor
200213
coo = feature_matrix.tocoo()
201214
estimated_nnz = coo.nnz * int((self.n_lags + 1).sum())
202215
out_row = np.zeros((estimated_nnz,), dtype="uint64")
203216
out_col = np.zeros((estimated_nnz,), dtype="uint64")
204217
out_data = np.zeros((estimated_nnz,), dtype="float64")
205-
self._cpp_preprocessor.sparse_lag_preprocessor(
218+
pp.sparse_lag_preprocessor(
206219
coo.row.astype("uint64"), coo.col.astype("uint64"), coo.data,
207-
out_row, out_col, out_data, censoring_i)
220+
out_row, out_col, out_data, int(censoring_i))
208221
return sps.csr_matrix((out_data, (out_row, out_col)),
209222
shape=(self._n_intervals,
210223
self._n_output_features))

tick/preprocessing/tests/longitudinal_features_lagger_test.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ def test_sparse_pre_convolution(self):
3636
feat_prod = [f.todense() for f in feat_prod]
3737
np.testing.assert_equal(feat_prod, self.expected_output)
3838

39+
# def test_parallelization(self):
40+
# feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags, n_jobs=1) \
41+
# .fit_transform(self.sparse_features, censoring=self.censoring)
42+
# p_feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags, n_jobs=3)\
43+
# .fit_transform(self.sparse_features, censoring=self.censoring)
44+
#
45+
# def test_parallelization(self):
46+
# feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags, n_jobs=1) \
47+
# .fit_transform(self.features, censoring=self.censoring)
48+
# p_feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags, n_jobs=1)\
49+
# .fit_transform(self.features, censoring=self.censoring)
50+
3951

4052
if __name__ == "__main__":
4153
unittest.main()

tick/survival/convolutional_sccs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,8 @@ def fit_kfold_cv(self, features, labels, censoring, C_tv_range: tuple = (),
355355
features, labels, censoring)
356356
# split the data with stratified KFold
357357
kf = StratifiedKFold(n_folds, shuffle, self.random_state)
358-
labels_interval = np.nonzero(p_labels)[1]
358+
# labels_interval = np.nonzero(p_labels)[1]
359+
labels_interval = [np.nonzero(arr)[0][0] for arr in p_labels]
359360

360361
# Training loop
361362
model_global_parameters = {

0 commit comments

Comments
 (0)