-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathRegTree.cpp
126 lines (110 loc) · 4.44 KB
/
RegTree.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "RegTree.h"
namespace mla {
namespace tree {
void RegressionTree::optSplitPos(int &nOptFeatureIndex,
float &fOptFeatureVal,
std::vector<int32_t> &vCurrentIndex,
std::vector<int32_t> &vFeatureIndex) {
float minDevia = INT_MAX;
// sample the feature
std::vector<int32_t> vTempFeatureIndex;
if (getEnsemble()) {
srand( (unsigned)time(NULL));
for (int32_t i = 0; i < vFeatureIndex.size(); ++i) {
int32_t t = rand() % vFeatureIndex.size();
std::swap(vFeatureIndex[i], vFeatureIndex[t]);
}
vTempFeatureIndex.assign(vFeatureIndex.begin(), vFeatureIndex.begin() + getRandFeatureCnt());
} else {
vTempFeatureIndex.assign(vFeatureIndex.begin(), vFeatureIndex.end());
}
std::cout << getRandFeatureCnt() << " " << std::endl;
std::cout << vTempFeatureIndex.size() << std::endl;
for (int32_t i = 0; i < (int32_t)vTempFeatureIndex.size(); i ++) {
std::cout << vTempFeatureIndex[i] << " ";
std::map<int32_t, float> tmpFeatureValue;
for (int32_t j = 0; j < vCurrentIndex.size(); j ++) {
float tmpVal = getTrainingX()[vCurrentIndex[j]][vTempFeatureIndex[i]];
tmpFeatureValue[vCurrentIndex[j]] = tmpVal;
}
std::vector<int32_t> vTempCurrentIndex(vCurrentIndex.begin(), vCurrentIndex.end());
sortIndexVec(vTempCurrentIndex, tmpFeatureValue);
float totValue = 0.0;
float totSqaValue = 0.0;
for (int32_t j = 0; j < (int32_t)vTempCurrentIndex.size(); j ++) {
float tmpVal = getTrainingY()[vTempCurrentIndex[j]];
totValue += tmpVal;
totSqaValue += tmpVal * tmpVal;
}
float curTotVal = 0.0;
float curTotSqaVal = 0.0;
float minDeviaTemp = INT_MAX, featureVal = 0.0;
for (int32_t j = 0; j < (int32_t)vTempCurrentIndex.size(); ++ j) {
float tmpVal = getTrainingY()[vTempCurrentIndex[j]];
curTotVal += tmpVal;
curTotSqaVal += tmpVal * tmpVal;
float curDevia = totSqaValue - curTotVal * curTotVal / (j + 1);
if (j + 1 != (int32_t)vTempCurrentIndex.size()) {
curDevia -= (totValue - curTotVal) *
(totValue - curTotVal) /
(vTempCurrentIndex.size() - j - 1);
}
if (curDevia < minDevia) {
minDevia = curDevia;
nOptFeatureIndex = vTempFeatureIndex[i];
fOptFeatureVal = tmpFeatureValue[vTempCurrentIndex[j]];
}
#ifdef DEBUG
if (curDevia < minDeviaTemp) {
minDeviaTemp = curDevia;
featureVal = tmpFeatureValue[vTempCurrentIndex[j]];
}
#endif
}
}
}
void RegressionTree::splitData(mla::basic::Node<float>* &top,
const int &nOptFeatureIndex,
const float &fOptFeatureVal,
const std::vector<int32_t> &vTmpCurrentIndex,
std::vector<int32_t> &vLeftIndex,
std::vector<int32_t> &vRightIndex) {
float label = 0.0;
for (int32_t i = 0; i < vTmpCurrentIndex.size(); i ++) {
label += getTrainingY()[vTmpCurrentIndex[i]];
}
label /= (int32_t)vTmpCurrentIndex.size();
top->m_nCurrentOptSplitIndex = nOptFeatureIndex;
top->m_fCurrentOptSplitValue = fOptFeatureVal;
top->label = label;
for (int32_t j = 0; j < vTmpCurrentIndex.size(); j ++) {
if (getTrainingX()[vTmpCurrentIndex[j]][nOptFeatureIndex] <= fOptFeatureVal) {
vLeftIndex.push_back(vTmpCurrentIndex[j]);
} else {
vRightIndex.push_back(vTmpCurrentIndex[j]);
}
}
}
float RegressionTree::predict( const std::vector<float> &testFeatureX) {
mla::basic::Node<float>* oTreeNode = getTreeRootNode();
while (true) {
if (NULL == oTreeNode->m_oLeft && NULL == oTreeNode->m_oRight) {
return oTreeNode->label;
}
if (testFeatureX[oTreeNode->m_nCurrentOptSplitIndex] <= oTreeNode->m_fCurrentOptSplitValue) {
if (NULL == oTreeNode->m_oLeft) {
return oTreeNode->label;
} else {
oTreeNode = oTreeNode->m_oLeft;
}
} else {
if (NULL == oTreeNode->m_oRight) {
return oTreeNode->label;
} else {
oTreeNode = oTreeNode->m_oRight;
}
}
}
}
}
}