Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions math/mathcore/src/TKDTreeBinning.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ void TKDTreeBinning::SetNBins(UInt_t bins) {
fDataBins = new TKDTreeID(fDataSize, fDim, fDataSize / (fNBins - remainingData)); // TKDTree input is data size, data dimension and the content size of bins ("bucket" size)
SetTreeData();
fDataBins->Build();
// Need to keep the number of bins consistent with the number of
// terminal nodes in the kd-tree.
fNBins = fDataBins->GetNNodes() + 1;
SetBinsEdges();
SetBinsContent();
} else {
Expand Down Expand Up @@ -238,12 +241,13 @@ void TKDTreeBinning::SetTreeData() {
}

void TKDTreeBinning::SetBinsContent() {
// Sets the bins' content
// Sets the bins' content from the actual number of points in each terminal
// node of the kd-tree. All terminal nodes hold fBucketSize points except
// possibly the last one, so query the tree directly instead of guessing.
fBinsContent.resize(fNBins);
UInt_t nTreeNodes = fDataBins->GetNNodes();
for (UInt_t i = 0; i < fNBins; ++i)
fBinsContent[i] = fDataBins->GetBucketSize();
if ( fDataSize % fNBins != 0 )
fBinsContent[fNBins - 1] = fDataSize % (fNBins-1);
fBinsContent[i] = fDataBins->GetNPointsNode(i + nTreeNodes);
}

void TKDTreeBinning::SetBinsEdges() {
Expand Down
133 changes: 103 additions & 30 deletions math/mathcore/test/testkdTreeBinning.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,36 @@ using std::cout;
using std::cerr;
using std::endl;

Comment thread
guitargeek marked this conversation as resolved.
void testkdTreeBinning() {
// Returns the number of detected failures (0 on success) so that the caller can
// turn it into a non-zero process exit code: ROOT's Error() only prints, it does
// not by itself make the test fail under ctest.
int testkdTreeBinning()
{

int nfail = 0;

// -----------------------------------------------------------------------------------------------
// C r e a t e r a n d o m s a m p l e
// -----------------------------------------------------------------------------------------------

const UInt_t DATASZ = 10000;
const UInt_t DATADIM = 2;
const UInt_t NBINS = 50;
constexpr std::size_t DATASZ = 10000;
constexpr std::size_t DATADIM = 2;
constexpr std::size_t NBINS = 50;

Double_t smp[DATASZ * DATADIM];
double smp[DATASZ * DATADIM];

double mu[2] = {0,2};
double sig[2] = {2,3};
TRandom3 r;
r.SetSeed(1);
for (UInt_t i = 0; i < DATADIM; ++i)
for (UInt_t j = 0; j < DATASZ; ++j)
for (std::size_t i = 0; i < DATADIM; ++i)
for (std::size_t j = 0; j < DATASZ; ++j)
smp[DATASZ * i + j] = r.Gaus(mu[i], sig[i]);

UInt_t h1bins = (UInt_t) std::sqrt(double(NBINS));
std::size_t h1bins = std::sqrt(double(NBINS));

TH2D* h1 = new TH2D("h1BinTest", "Regular binning", h1bins, -5., 5., h1bins, -5., 5.);
for (UInt_t j = 0; j < DATASZ; ++j)
for (std::size_t j = 0; j < DATASZ; ++j)
h1->Fill(smp[j], smp[DATASZ + j]);


Expand All @@ -61,20 +67,20 @@ void testkdTreeBinning() {

TKDTreeBinning* kdBins = new TKDTreeBinning(DATASZ, DATADIM, smp, NBINS);

UInt_t nbins = kdBins->GetNBins();
UInt_t dim = kdBins->GetDim();
std::size_t nbins = kdBins->GetNBins();
std::size_t dim = kdBins->GetDim();

const Double_t* binsMinEdges = kdBins->GetBinsMinEdges();
const Double_t* binsMaxEdges = kdBins->GetBinsMaxEdges();
const double *binsMinEdges = kdBins->GetBinsMinEdges();
const double *binsMaxEdges = kdBins->GetBinsMaxEdges();

TH2Poly* h2pol = new TH2Poly("h2PolyBinTest", "KDTree binning", kdBins->GetDataMin(0), kdBins->GetDataMax(0), kdBins->GetDataMin(1), kdBins->GetDataMax(1));

for (UInt_t i = 0; i < nbins; ++i) {
UInt_t edgeDim = i * dim;
for (std::size_t i = 0; i < nbins; ++i) {
std::size_t edgeDim = i * dim;
h2pol->AddBin(binsMinEdges[edgeDim], binsMinEdges[edgeDim + 1], binsMaxEdges[edgeDim], binsMaxEdges[edgeDim + 1]);
}

for (UInt_t i = 1; i <= kdBins->GetNBins(); ++i)
for (std::size_t i = 1; i <= kdBins->GetNBins(); ++i)
h2pol->SetBinContent(i, kdBins->GetBinDensity(i - 1));

int ibinMin = kdBins->GetBinMinDensity();
Expand All @@ -83,14 +89,22 @@ void testkdTreeBinning() {
std::cout << "Bin with minimum density: " << ibinMin << " density = " << kdBins->GetBinDensity(ibinMin) << " content = " << kdBins->GetBinContent(ibinMin) << std::endl;
std::cout << "Bin with maximum density: " << ibinMax << " density = " << kdBins->GetBinDensity(ibinMax) << " content = " << kdBins->GetBinContent(ibinMax) << std::endl;

if (kdBins->GetBinContent(ibinMax) != DATASZ/NBINS)
if (kdBins->GetBinContent(ibinMax) != DATASZ / NBINS) {
Error("testkdTreeBinning","Wrong bin content");
++nfail;
}

// order bins by density
kdBins->SortBinsByDensity(true);

if (kdBins->GetBinMinDensity() != 0) Error("testkdTreeBinning","Wrong minimum bin after sorting");
if (kdBins->GetBinMaxDensity() != nbins-1) Error("testkdTreeBinning","Wrong maximum bin after sorting");
if (kdBins->GetBinMinDensity() != 0) {
Error("testkdTreeBinning", "Wrong minimum bin after sorting");
++nfail;
}
if (kdBins->GetBinMaxDensity() != nbins - 1) {
Error("testkdTreeBinning", "Wrong maximum bin after sorting");
++nfail;
}

if (showGraphics) {
new TCanvas();
Expand All @@ -104,7 +118,6 @@ void testkdTreeBinning() {
double point[2] = {0,0};
// double binCenter[2];
gRandom->SetSeed(0);
bool ok = true;
for (int itimes = 0; itimes < ntimes; itimes++) {

// generate a random point in 2D
Expand All @@ -126,12 +139,12 @@ void testkdTreeBinning() {
std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl;
}

ok &= point[0] > binMin[0] && point[0] < binMax[0];
ok &= point[1] > binMin[1] && point[1] < binMax[1];
bool ok = point[0] > binMin[0] && point[0] < binMax[0] && point[1] > binMin[1] && point[1] < binMax[1];
if (!ok) {
Error ("testkdTreeBinning::FindBin"," Point is not in the right bin " );
std::cout << " point x " << point[0] << " BIN CENTER is " << binCenter[0] << " min " << binMin[0] << " max " << binMax[0] << std::endl;
std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl;
++nfail;
}

if (itimes < 2 && showGraphics ) {
Expand All @@ -143,21 +156,80 @@ void testkdTreeBinning() {
}

delete [] binCenter;
}
else
} else {
Error("testkdTreeBinning::FindBin"," Bin %d is not existing ",ibin);
++nfail;
}
}

return nfail;
}

// Regression test for the case where the requested number of bins does not
Comment thread
guitargeek marked this conversation as resolved.
// divide the data size evenly. In that situation the kd-tree builds more
// terminal nodes (bins) than naively expected, and FindBin() must never return
// an index outside [0, GetNBins()). See
// https://github.com/root-project/root/issues/10784 about
// TKDTreeBinning::FindBin returning non-existent bins. Returns the number of
// detected failures (0 on success) so that the caller can turn it into a
// non-zero process exit code: ROOT's Error() only prints, it does not by
// itself make the test fail under ctest.
int testkdTreeBinningFindBinRange()
{

int nfail = 0;

constexpr std::size_t DATASZ = 100500; // deliberately NOT a multiple of NBINS
constexpr std::size_t DATADIM = 5;
constexpr std::size_t NBINS = 1000;

std::vector<double> smp(DATASZ * DATADIM);
TRandom3 r;
r.SetSeed(1);
for (std::size_t i = 0; i < DATADIM; ++i)
for (std::size_t j = 0; j < DATASZ; ++j)
smp[DATASZ * i + j] = r.Uniform(-1., 1.);

TKDTreeBinning kdBins(DATASZ, DATADIM, smp, NBINS);

const std::size_t nbins = kdBins.GetNBins();

// The number of bins must match the number of terminal nodes of the kd-tree.
if ((int)nbins != kdBins.GetTree()->GetNNodes() + 1) {
Error("testkdTreeBinningFindBinRange", "GetNBins() (%zu) != number of kd-tree terminal nodes (%d)", nbins,
kdBins.GetTree()->GetNNodes() + 1);
++nfail;
}

// Every data point must be assigned to a valid bin.
std::vector<double> point(DATADIM);
for (std::size_t j = 0; j < DATASZ; ++j) {
for (std::size_t i = 0; i < DATADIM; ++i)
point[i] = smp[DATASZ * i + j];
std::size_t bin = kdBins.FindBin(point.data());
if (bin >= nbins) {
Error("testkdTreeBinningFindBinRange", "FindBin returned out-of-range bin %zu (NBins = %zu)", bin, nbins);
++nfail;
break;
}
}

return;
// The total bin content must add up to the data size.
Long64_t total = 0;
for (std::size_t i = 0; i < nbins; ++i)
total += kdBins.GetBinContent(i);
if (total != (Long64_t)DATASZ) {
Error("testkdTreeBinningFindBinRange", "Sum of bin contents (%lld) != data size (%zu)", total, DATASZ);
++nfail;
}

return nfail;
}

int main(int argc, char **argv)
{
// Parse command line arguments
for (Int_t i=1 ; i<argc ; i++) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i] ;
if (arg == "-g") {
showGraphics = true;
Expand All @@ -174,13 +246,15 @@ int main(int argc, char **argv)
cerr << endl;
return -1;
}
}
}

TApplication* theApp = nullptr;
if ( showGraphics )
theApp = new TApplication("App",&argc,argv);

testkdTreeBinning();
int nfail = testkdTreeBinning();

nfail += testkdTreeBinningFindBinRange();

if ( showGraphics )
{
Expand All @@ -189,7 +263,6 @@ int main(int argc, char **argv)
theApp = nullptr;
}

return 0;

return nfail;
}

Loading