Skip to content

Commit 4ae90a7

Browse files
committed
move coo_matrix to array headers
1 parent e61301f commit 4ae90a7

File tree

3 files changed

+159
-151
lines changed

3 files changed

+159
-151
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
2+
3+
#ifndef LIB_INCLUDE_TICK_ARRAY_COO_MATRIX_H_
4+
#define LIB_INCLUDE_TICK_ARRAY_COO_MATRIX_H_
5+
6+
template <typename T>
7+
class CooMatrix {
8+
public:
9+
CooMatrix() {}
10+
11+
CooMatrix(const ArrayULong &rows, const ArrayULong &cols, const Array<T> data) {
12+
this->rows = rows;
13+
this->cols = cols;
14+
this->data = data;
15+
checkCoo();
16+
}
17+
18+
explicit CooMatrix(std::shared_ptr<SSparseArray2d<T, RowMajor>> sparse) {
19+
rows = ArrayULong(sparse->size_sparse());
20+
nnz = 0;
21+
22+
std::vector<ulong> nnz_rows;
23+
for (ulong i = 0; i < sparse->n_rows(); i++) {
24+
nnz_rows.push_back(sparse->row_indices()[i + 1] - sparse->row_indices()[i]);
25+
}
26+
27+
ulong out_i = 0;
28+
ulong row_i = 0;
29+
for (ulong nnz_i : nnz_rows) {
30+
nnz += nnz_i;
31+
for (ulong i = 0; i < nnz_i; i++) {
32+
rows[out_i] = row_i;
33+
out_i++;
34+
if (out_i > rows.size()) TICK_ERROR("Invalid sparse matrix");
35+
}
36+
row_i++;
37+
}
38+
39+
auto toArrayULong = [](ArrayUInt &array) {
40+
ArrayULong out(array.size());
41+
for (ulong i = 0; i < array.size(); i++) out[i] = (ulong)array[i];
42+
return out;
43+
};
44+
45+
ArrayUInt temp(sparse->size_sparse(), sparse->indices());
46+
cols = toArrayULong(temp);
47+
data = Array<T>(sparse->size_sparse(), sparse->data());
48+
49+
checkCoo();
50+
}
51+
52+
void checkCoo() {
53+
if (rows.size() != cols.size() || cols.size() != data.size() || data.size() != rows.size())
54+
TICK_ERROR("CooMatrix::checkCoo row, cols, and data size are different");
55+
// more check?
56+
}
57+
58+
void clearZero() {
59+
checkCoo();
60+
61+
std::vector<ulong> out_row;
62+
std::vector<ulong> out_col;
63+
std::vector<T> out_data;
64+
for (ulong i = 0; i < rows.size(); i++) {
65+
if (rows[i] != 0 || cols[i] != 0 || data[i] != (T)0) {
66+
out_row.push_back(rows[i]);
67+
out_col.push_back(cols[i]);
68+
out_data.push_back(data[i]);
69+
}
70+
}
71+
rows = ArrayULong(out_row.size());
72+
cols = ArrayULong(out_col.size());
73+
data = Array<T>(out_data.size());
74+
75+
for (ulong i = 0; i < rows.size(); i++) {
76+
rows[i] = out_row[i];
77+
cols[i] = out_col[i];
78+
data[i] = out_data[i];
79+
}
80+
}
81+
82+
void sortByRow() {
83+
checkCoo();
84+
85+
std::vector<std::tuple<ulong, ulong, T>> sort_data;
86+
for (ulong i = 0; i < rows.size(); i++) sort_data.emplace_back(rows[i], cols[i], data[i]);
87+
88+
std::sort(sort_data.begin(), sort_data.end());
89+
90+
for (ulong i = 0; i < rows.size(); i++) {
91+
rows[i] = std::get<0>(sort_data[i]);
92+
cols[i] = std::get<1>(sort_data[i]);
93+
data[i] = std::get<2>(sort_data[i]);
94+
}
95+
96+
checkCoo();
97+
}
98+
99+
std::shared_ptr<SSparseArray2d<T>> toSparse(ulong n_rows, ulong n_cols) {
100+
checkCoo();
101+
102+
clearZero();
103+
sortByRow();
104+
105+
std::vector<unsigned int> rows_vec(n_rows + 1);
106+
std::vector<unsigned int> cols_vec;
107+
rows_vec[0] = 0;
108+
109+
std::vector<ulong> nnz_rows;
110+
for (ulong i = 0; i < n_rows; i++) {
111+
ulong nnz_this_row = 0;
112+
for (ulong j = 0; j < rows.size(); j++) {
113+
if (rows[j] == i) {
114+
nnz_this_row++;
115+
}
116+
}
117+
nnz_rows.push_back(nnz_this_row);
118+
}
119+
120+
if (nnz_rows.size() != n_rows) {
121+
TICK_ERROR("Unexcepted error nnz_rows.size() != n_rows");
122+
}
123+
124+
for (ulong i = 1; i < n_rows + 1; i++) {
125+
rows_vec[i] = rows_vec[i - 1] + nnz_rows[i - 1];
126+
}
127+
128+
ulong maxcol = 0;
129+
for (ulong i = 0; i < cols.size(); i++) {
130+
if (cols[i] > maxcol) maxcol = cols[i];
131+
cols_vec.push_back(cols[i]);
132+
}
133+
134+
unsigned int *row_ptr = new unsigned int[rows_vec.size()];
135+
unsigned int *col_ptr = new unsigned int[cols_vec.size()];
136+
T *data_ptr = new T[data.size()];
137+
138+
memcpy(row_ptr, rows_vec.data(), rows_vec.size() * sizeof(unsigned int));
139+
memcpy(col_ptr, cols_vec.data(), cols_vec.size() * sizeof(unsigned int));
140+
memcpy(data_ptr, data.data(), data.size() * sizeof(T));
141+
142+
std::shared_ptr<SSparseArray2d<T>> arrayptr = SSparseArray2d<T>::new_ptr(0, 0, 0);
143+
144+
arrayptr->set_data_indices_rowindices(data_ptr, col_ptr, row_ptr, n_rows, n_cols);
145+
return arrayptr;
146+
}
147+
148+
ArrayULong rows;
149+
ArrayULong cols;
150+
Array<T> data;
151+
ulong nnz;
152+
};
153+
154+
#endif // LIB_INCLUDE_TICK_ARRAY_COO_MATRIX_H_

lib/include/tick/preprocessing/longitudinal_features_lagger_mp.h

Lines changed: 1 addition & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -11,156 +11,9 @@
1111
#include <cereal/types/polymorphic.hpp>
1212
#include "tick/base/base.h"
1313
#include "tick/base/serialization.h"
14+
#include "tick/array/coo_matrix.h"
1415
#include "tick/preprocessing/longitudinal_preprocessor.h"
1516

16-
template <typename T>
17-
class CooMatrix {
18-
public:
19-
CooMatrix() {}
20-
21-
CooMatrix(const ArrayULong &rows, const ArrayULong &cols, const Array<T> data) {
22-
this->rows = rows;
23-
this->cols = cols;
24-
this->data = data;
25-
checkCoo();
26-
}
27-
28-
explicit CooMatrix(std::shared_ptr<SSparseArray2d<T, RowMajor>> sparse) {
29-
rows = ArrayULong(sparse->size_sparse());
30-
nnz = 0;
31-
32-
std::vector<ulong> nnz_rows;
33-
for (ulong i = 0; i < sparse->n_rows(); i++) {
34-
nnz_rows.push_back(sparse->row_indices()[i + 1] - sparse->row_indices()[i]);
35-
}
36-
37-
ulong out_i = 0;
38-
ulong row_i = 0;
39-
for (ulong nnz_i : nnz_rows) {
40-
nnz += nnz_i;
41-
for (ulong i = 0; i < nnz_i; i++) {
42-
rows[out_i] = row_i;
43-
out_i++;
44-
if (out_i > rows.size()) TICK_ERROR("Invalid sparse matrix");
45-
}
46-
row_i++;
47-
}
48-
49-
auto toArrayULong = [](ArrayUInt &array) {
50-
ArrayULong out(array.size());
51-
for (ulong i = 0; i < array.size(); i++) out[i] = (ulong)array[i];
52-
return out;
53-
};
54-
55-
ArrayUInt temp(sparse->size_sparse(), sparse->indices());
56-
cols = toArrayULong(temp);
57-
data = Array<T>(sparse->size_sparse(), sparse->data());
58-
59-
checkCoo();
60-
}
61-
62-
void checkCoo() {
63-
if (rows.size() != cols.size() || cols.size() != data.size() || data.size() != rows.size())
64-
TICK_ERROR("CooMatrix::checkCoo row, cols, and data size are different");
65-
// more check?
66-
}
67-
68-
void clearZero() {
69-
checkCoo();
70-
71-
std::vector<ulong> out_row;
72-
std::vector<ulong> out_col;
73-
std::vector<T> out_data;
74-
for (ulong i = 0; i < rows.size(); i++) {
75-
if (rows[i] != 0 || cols[i] != 0 || data[i] != (T)0) {
76-
out_row.push_back(rows[i]);
77-
out_col.push_back(cols[i]);
78-
out_data.push_back(data[i]);
79-
}
80-
}
81-
rows = ArrayULong(out_row.size());
82-
cols = ArrayULong(out_col.size());
83-
data = Array<T>(out_data.size());
84-
85-
for (ulong i = 0; i < rows.size(); i++) {
86-
rows[i] = out_row[i];
87-
cols[i] = out_col[i];
88-
data[i] = out_data[i];
89-
}
90-
}
91-
92-
void sortByRow() {
93-
checkCoo();
94-
95-
std::vector<std::tuple<ulong, ulong, T>> sort_data;
96-
for (ulong i = 0; i < rows.size(); i++) sort_data.emplace_back(rows[i], cols[i], data[i]);
97-
98-
std::sort(sort_data.begin(), sort_data.end());
99-
100-
for (ulong i = 0; i < rows.size(); i++) {
101-
rows[i] = std::get<0>(sort_data[i]);
102-
cols[i] = std::get<1>(sort_data[i]);
103-
data[i] = std::get<2>(sort_data[i]);
104-
}
105-
106-
checkCoo();
107-
}
108-
109-
std::shared_ptr<SSparseArray2d<T>> toSparse(ulong n_rows, ulong n_cols) {
110-
checkCoo();
111-
112-
clearZero();
113-
sortByRow();
114-
115-
std::vector<unsigned int> rows_vec(n_rows + 1);
116-
std::vector<unsigned int> cols_vec;
117-
rows_vec[0] = 0;
118-
119-
std::vector<ulong> nnz_rows;
120-
for (ulong i = 0; i < n_rows; i++) {
121-
ulong nnz_this_row = 0;
122-
for (ulong j = 0; j < rows.size(); j++) {
123-
if (rows[j] == i) {
124-
nnz_this_row++;
125-
}
126-
}
127-
nnz_rows.push_back(nnz_this_row);
128-
}
129-
130-
if (nnz_rows.size() != n_rows) {
131-
TICK_ERROR("Unexcepted error nnz_rows.size() != n_rows");
132-
}
133-
134-
for (ulong i = 1; i < n_rows + 1; i++) {
135-
rows_vec[i] = rows_vec[i - 1] + nnz_rows[i - 1];
136-
}
137-
138-
ulong maxcol = 0;
139-
for (ulong i = 0; i < cols.size(); i++) {
140-
if (cols[i] > maxcol) maxcol = cols[i];
141-
cols_vec.push_back(cols[i]);
142-
}
143-
144-
unsigned int *row_ptr = new unsigned int[rows_vec.size()];
145-
unsigned int *col_ptr = new unsigned int[cols_vec.size()];
146-
T *data_ptr = new T[data.size()];
147-
148-
memcpy(row_ptr, rows_vec.data(), rows_vec.size() * sizeof(unsigned int));
149-
memcpy(col_ptr, cols_vec.data(), cols_vec.size() * sizeof(unsigned int));
150-
memcpy(data_ptr, data.data(), data.size() * sizeof(T));
151-
152-
std::shared_ptr<SSparseArray2d<T>> arrayptr = SSparseArray2d<T>::new_ptr(0, 0, 0);
153-
154-
arrayptr->set_data_indices_rowindices(data_ptr, col_ptr, row_ptr, n_rows, n_cols);
155-
return arrayptr;
156-
}
157-
158-
ArrayULong rows;
159-
ArrayULong cols;
160-
Array<T> data;
161-
ulong nnz;
162-
};
163-
16417
class DLL_PUBLIC LongitudinalFeaturesLagger_MP : LongitudinalPreprocessor {
16518
protected:
16619
ulong n_intervals;

lib/include/tick/preprocessing/longitudinal_preprocessor.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
2+
#ifndef LIB_INCLUDE_TICK_PREPROCESSING_LONGITUDINAL_PREPROCESSOR_H_
3+
#define LIB_INCLUDE_TICK_PREPROCESSING_LONGITUDINAL_PREPROCESSOR_H_
4+
15
#include <cereal/types/base_class.hpp>
26
#include <cereal/types/polymorphic.hpp>
37
#include "tick/base/base.h"
48
#include "tick/base/serialization.h"
59

6-
#ifndef LIB_INCLUDE_TICK_PREPROCESSING_LONGITUDINAL_PREPROCESSOR_H_
7-
#define LIB_INCLUDE_TICK_PREPROCESSING_LONGITUDINAL_PREPROCESSOR_H_
8-
910
class DLL_PUBLIC LongitudinalPreprocessor {
1011
protected:
1112
size_t n_jobs;

0 commit comments

Comments
 (0)