From 38acc2ef6dde7d4c66094852de75ea3ae786689f Mon Sep 17 00:00:00 2001
From: david-cortes-intel <david.cortes@intel.com>
Date: Thu, 11 Sep 2025 09:06:31 -0700
Subject: [PATCH 1/5] use fp64 for all random forest aggregates

---
 .../dtrees/dtrees_train_data_helper.i         |  11 +-
 ..._classification_train_dense_default_impl.i | 415 +++++++++---------
 .../forest/df_train_dense_default_impl.i      | 100 +++--
 .../df_regression_train_dense_default_impl.i  | 357 ++++++++-------
 .../src/algorithms/dtrees/gbt/gbt_train_aux.i |   5 +-
 5 files changed, 460 insertions(+), 428 deletions(-)
diff --git a/cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i b/cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i
index a36c9da7bd2..cce5159f267 100644
--- a/cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i
+++ b/cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i
@@ -104,17 +104,18 @@ void shuffle(void * state, size_t n, IndexType * dst, int * auxBuf)
 template <typename algorithmFPType, typename TImpurityData>
 struct SplitData
 {
+    using intermSummFPType = typename TImpurityData::intermSummFPType;
     TImpurityData left;
     algorithmFPType featureValue;
-    algorithmFPType impurityDecrease;
+    intermSummFPType impurityDecrease;
     size_t nLeft;
     size_t iStart;
     bool featureUnordered;
-    algorithmFPType totalWeights;
-    algorithmFPType leftWeights;
+    intermSummFPType totalWeights;
+    intermSummFPType leftWeights;
 
     SplitData()
-        : impurityDecrease(-daal::services::internal::MaxVal<algorithmFPType>::get()),
+        : impurityDecrease(-daal::services::internal::MaxVal<intermSummFPType>::get()),
           left {},
           featureValue(0.0),
           nLeft(0),
@@ -122,7 +123,7 @@ struct SplitData
           totalWeights(0.0),
           leftWeights(0.0)
     {}
-    SplitData(algorithmFPType impDecr, bool bFeatureUnordered)
+    SplitData(intermSummFPType impDecr, bool bFeatureUnordered)
         : impurityDecrease(impDecr), featureUnordered(bFeatureUnordered), featureValue(0.0), nLeft(0), iStart(0), totalWeights(0.0), leftWeights(0.0)
     {}
     SplitData(const SplitData & o) = delete;
diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
index 4e157357f93..5d8aa44e2c5 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
@@ -56,18 +56,19 @@ template <typename algorithmFPType, CpuType cpu>
 class UnorderedRespHelperBest : public DataHelper<algorithmFPType, ClassIndexType, cpu>
 {
 public:
+    typedef double intermSummFPType;
     typedef DataHelper<algorithmFPType, ClassIndexType, cpu> super;
-    typedef typename dtrees::internal::TVector<float, cpu, dtrees::internal::ScalableAllocator<cpu> >
-        Histogramm; //not sure why this is hard-coded to float and not algorithmFPType
+    typedef typename dtrees::internal::TVector<intermSummFPType, cpu, dtrees::internal::ScalableAllocator<cpu> > Histogramm;
 
     struct ImpurityData
     {
-        double var; //impurity is a variance
+        typedef intermSummFPType intermSummFPType;
+        intermSummFPType var; //impurity is a variance
         Histogramm hist;
 
         ImpurityData() {}
         ImpurityData(size_t nClasses) : hist(nClasses), var(0) {}
-        algorithmFPType value() const { return var; }
+        intermSummFPType value() const { return var; }
         void init(size_t nClasses)
         {
             var = 0;
@@ -91,40 +92,40 @@ public:
     }
 
     int findSplitByHistDefault(int nDiffFeatMax, size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                               const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights, const IndexType iFeature) const;
+                               const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <int K, bool noWeights>
     int findSplitFewClasses(int nDiffFeatMax, size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                            const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights, const IndexType iFeature) const;
+                            const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <bool noWeights>
     bool findSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                  const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                 const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                 const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 
     bool findSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                      const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 
 protected: //enables specific functions for UnorderedRespHelperBest
     // Calculate impurity for right child
-    static void updateRightImpurity(ImpurityData & imp, ClassIndexType iClass, double totalWeights, double moveWeights)
+    static void updateRightImpurity(ImpurityData & imp, ClassIndexType iClass, intermSummFPType totalWeights, intermSummFPType moveWeights)
     {
-        double delta = (2. * totalWeights - moveWeights) * imp.var + 2. * (imp.hist[iClass] - totalWeights);
-        imp.var      = isZero<double, cpu>((totalWeights - moveWeights) * (totalWeights - moveWeights)) ?
-                           1. :
-                           (imp.var + moveWeights * delta / ((totalWeights - moveWeights) * (totalWeights - moveWeights)));
+        intermSummFPType delta = (2. * totalWeights - moveWeights) * imp.var + 2. * (imp.hist[iClass] - totalWeights);
+        imp.var                = isZero<intermSummFPType, cpu>((totalWeights - moveWeights) * (totalWeights - moveWeights)) ?
+                                     1. :
+                                     (imp.var + moveWeights * delta / ((totalWeights - moveWeights) * (totalWeights - moveWeights)));
         imp.hist[iClass] -= moveWeights;
     }
 
     // Calculate impurity for left and right childs
-    static void updateImpurity(ImpurityData & left, ImpurityData & right, ClassIndexType iClass, double totalWeights, double startWeights,
-                               double & moveWeights)
+    static void updateImpurity(ImpurityData & left, ImpurityData & right, ClassIndexType iClass, intermSummFPType totalWeights,
+                               intermSummFPType startWeights, intermSummFPType & moveWeights)
     {
-        double tmp = startWeights * (2. * moveWeights + left.var * startWeights) - 2. * moveWeights * left.hist[iClass];
+        intermSummFPType tmp = startWeights * (2. * moveWeights + left.var * startWeights) - 2. * moveWeights * left.hist[iClass];
         // Update impurity for left child
         left.hist[iClass] += moveWeights;
-        left.var = isZero<algorithmFPType, cpu>((startWeights + moveWeights) * (startWeights + moveWeights)) ?
+        left.var = isZero<intermSummFPType, cpu>((startWeights + moveWeights) * (startWeights + moveWeights)) ?
                        1. :
                        (tmp / ((startWeights + moveWeights) * (startWeights + moveWeights)));
         // Update impurity for right child
@@ -132,17 +133,16 @@ protected: //enables specific functions for UnorderedRespHelperBest
         moveWeights = 0.;
     }
 
-    void calcGini(double totalWeights, ImpurityData & imp) const
+    void calcGini(intermSummFPType totalWeights, ImpurityData & imp) const
     {
-        const double sqWeights = totalWeights * totalWeights;
-        const double one       = double(1);
-        const double cDiv      = isZero<double, cpu>(sqWeights) ? one : (one / sqWeights);
-        double var             = one;
+        const intermSummFPType sqWeights = totalWeights * totalWeights;
+        const intermSummFPType cDiv      = isZero<intermSummFPType, cpu>(sqWeights) ? 1.0 : (1.0 / sqWeights);
+        intermSummFPType var             = 1.0;
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
-        for (size_t i = 0; i < _nClasses; ++i) var -= cDiv * double(imp.hist[i]) * double(imp.hist[i]);
+        for (size_t i = 0; i < _nClasses; ++i) var -= cDiv * imp.hist[i] * imp.hist[i];
         imp.var = var;
-        if (!isPositive<double, cpu>(imp.var)) imp.var = 0; //roundoff error
+        if (!isPositive<intermSummFPType, cpu>(imp.var)) imp.var = 0; //roundoff error
     }
 
 protected:
@@ -150,8 +150,8 @@ protected:
     //set of buffers for indexed features processing, used in findBestSplitForFeatureIndexed only
     size_t _nClassesThreshold;
     mutable TVector<IndexType, cpu> _idxFeatureBuf;
-    mutable TVector<algorithmFPType, cpu> _weightsFeatureBuf;
-    mutable TVector<float, cpu> _samplesPerClassBuf;
+    mutable TVector<intermSummFPType, cpu> _weightsFeatureBuf;
+    mutable TVector<intermSummFPType, cpu> _samplesPerClassBuf;
     mutable Histogramm _histLeft;
     //work variables used in memory saving mode only
     mutable ImpurityData _impLeft;
@@ -164,26 +164,26 @@ protected:
 template <typename algorithmFPType, CpuType cpu>
 int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitByHistDefault(int nDiffFeatMax, size_t n, size_t nMinSplitPart,
                                                                           const ImpurityData & curImpurity, TSplitData & split,
-                                                                          const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                          const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                           const IndexType iFeature) const
 {
-    auto nFeatIdx         = _idxFeatureBuf.get();
-    auto featWeights      = _weightsFeatureBuf.get();
-    auto nSamplesPerClass = _samplesPerClassBuf.get();
+    auto nFeatIdx                       = _idxFeatureBuf.get();
+    intermSummFPType * featWeights      = _weightsFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = _samplesPerClassBuf.get();
 
-    algorithmFPType bestImpDecrease =
-        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + algorithmFPType(1.) - curImpurity.var);
+    intermSummFPType bestImpDecrease =
+        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + 1.0 - curImpurity.var);
 
     //init histogram for the left part
     _histLeft.setAll(0);
-    auto histLeft               = _histLeft.get();
-    size_t nLeft                = 0;
-    algorithmFPType leftWeights = 0.;
-    int idxFeatureBestSplit     = -1; //index of best feature value in the array of sorted feature values
+    auto histLeft                = _histLeft.get();
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0.;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
     for (size_t i = 0; i < nDiffFeatMax; ++i)
     {
         if (!nFeatIdx[i]) continue;
-        algorithmFPType thisFeatWeights = featWeights[i];
+        intermSummFPType thisFeatWeights = featWeights[i];
 
         nLeft       = (split.featureUnordered ? nFeatIdx[i] : nLeft + nFeatIdx[i]);
         leftWeights = (split.featureUnordered ? thisFeatWeights : leftWeights + thisFeatWeights);
@@ -207,9 +207,9 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitByHistDefault(int nD
             for (size_t iClass = 0; iClass < _nClasses; ++iClass) histLeft[iClass] = nSamplesPerClass[i * _nClasses + iClass];
         }
 
-        auto histTotal           = curImpurity.hist.get();
-        algorithmFPType sumLeft  = 0;
-        algorithmFPType sumRight = 0;
+        auto histTotal            = curImpurity.hist.get();
+        intermSummFPType sumLeft  = 0;
+        intermSummFPType sumRight = 0;
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
         //proximal impurity improvement
@@ -219,7 +219,7 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitByHistDefault(int nD
             sumRight += (histTotal[iClass] - histLeft[iClass]) * (histTotal[iClass] - histLeft[iClass]);
         }
 
-        const algorithmFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
+        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
         if (decrease > bestImpDecrease)
         {
             split.left.hist     = _histLeft;
@@ -232,7 +232,7 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitByHistDefault(int nD
     }
     if (idxFeatureBestSplit >= 0)
     {
-        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - algorithmFPType(1);
+        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - 1.0;
         split.totalWeights     = totalWeights;
     }
 
@@ -243,24 +243,24 @@ template <typename algorithmFPType, CpuType cpu>
 template <int K, bool noWeights>
 int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitFewClasses(int nDiffFeatMax, size_t n, size_t nMinSplitPart,
                                                                        const ImpurityData & curImpurity, TSplitData & split,
-                                                                       const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                       const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                        const IndexType iFeature) const
 {
-    auto nSamplesPerClass = _samplesPerClassBuf.get();
-    auto nFeatIdx         = _idxFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = _samplesPerClassBuf.get();
+    auto nFeatIdx                       = _idxFeatureBuf.get();
 
-    algorithmFPType bestImpDecrease =
-        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + algorithmFPType(1.) - curImpurity.var);
+    intermSummFPType bestImpDecrease =
+        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + 1.0 - curImpurity.var);
 
     //init histogram for the left part
     _histLeft.setAll(0);
-    auto histLeft               = _histLeft.get();
-    size_t nLeft                = 0;
-    algorithmFPType leftWeights = 0.;
-    int idxFeatureBestSplit     = -1; //index of best feature value in the array of sorted feature values
+    auto histLeft                = _histLeft.get();
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0.;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
     for (size_t i = 0; i < nDiffFeatMax; ++i)
     {
-        algorithmFPType thisNFeatIdx(0);
+        intermSummFPType thisNFeatIdx = 0;
         if (noWeights)
         {
             for (size_t iClass = 0; iClass < K; ++iClass)
@@ -276,7 +276,7 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitFewClasses(int nDiff
 
         if (!thisNFeatIdx) continue;
 
-        algorithmFPType thisFeatWeights(0);
+        intermSummFPType thisFeatWeights = 0;
         if (noWeights)
         {
             thisFeatWeights = thisNFeatIdx;
@@ -306,9 +306,9 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitFewClasses(int nDiff
             for (size_t iClass = 0; iClass < K; ++iClass) histLeft[iClass] = nSamplesPerClass[i * K + iClass];
         }
 
-        auto histTotal           = curImpurity.hist.get();
-        algorithmFPType sumLeft  = 0;
-        algorithmFPType sumRight = 0;
+        auto histTotal            = curImpurity.hist.get();
+        intermSummFPType sumLeft  = 0;
+        intermSummFPType sumRight = 0;
 
         //proximal impurity improvement
         for (size_t iClass = 0; iClass < K; ++iClass)
@@ -317,7 +317,7 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitFewClasses(int nDiff
             sumRight += (histTotal[iClass] - histLeft[iClass]) * (histTotal[iClass] - histLeft[iClass]);
         }
 
-        const algorithmFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
+        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
         if (decrease > bestImpDecrease)
         {
             split.left.hist     = _histLeft;
@@ -330,7 +330,7 @@ int UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitFewClasses(int nDiff
     }
     if (idxFeatureBestSplit >= 0)
     {
-        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - algorithmFPType(1);
+        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - 1.0;
         split.totalWeights     = totalWeights;
     }
 
@@ -342,34 +342,34 @@ template <bool noWeights>
 bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                             size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                             const ImpurityData & curImpurity, TSplitData & split,
-                                                                            const algorithmFPType minWeightLeaf, algorithmFPType totalWeights) const
+                                                                            const intermSummFPType minWeightLeaf, intermSummFPType totalWeights) const
 {
     ClassIndexType iClass = this->_aResponse[aIdx[0]].val;
     _impLeft.init(_nClasses);
     _impRight = curImpurity;
 
-    const bool bBestFromOtherFeatures      = isPositive<algorithmFPType, cpu>(split.impurityDecrease);
-    algorithmFPType vBestFromOtherFeatures = algorithmFPType(-1);
+    const bool bBestFromOtherFeatures       = isPositive<intermSummFPType, cpu>(split.impurityDecrease);
+    intermSummFPType vBestFromOtherFeatures = -1.0;
     if (noWeights)
     {
-        vBestFromOtherFeatures = bBestFromOtherFeatures ? algorithmFPType(n) * (curImpurity.var - split.impurityDecrease) : algorithmFPType(-1);
+        vBestFromOtherFeatures = bBestFromOtherFeatures ? static_cast<intermSummFPType>(n) * (curImpurity.var - split.impurityDecrease) : -1.0;
     }
     else
     {
-        vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : algorithmFPType(-1);
+        vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1.0;
     }
 
-    bool bFound           = false;
-    algorithmFPType vBest = algorithmFPType(-1);
-    IndexType iBest       = -1;
+    bool bFound            = false;
+    intermSummFPType vBest = -1.0;
+    IndexType iBest        = -1;
 
-    double nEqualRespValues      = this->_aWeights[aIdx[0]].val;
-    double iStartEqualRespValues = double(0);
-    algorithmFPType leftWeights  = algorithmFPType(0);
-    const algorithmFPType last   = featureVal[n - nMinSplitPart];
+    intermSummFPType nEqualRespValues      = this->_aWeights[aIdx[0]].val;
+    intermSummFPType iStartEqualRespValues = 0;
+    intermSummFPType leftWeights           = 0;
+    const algorithmFPType last             = featureVal[n - nMinSplitPart];
     for (size_t i = 1; i < (n - nMinSplitPart + 1); ++i)
     {
-        const algorithmFPType weights = this->_aWeights[aIdx[i]].val;
+        const intermSummFPType weights = this->_aWeights[aIdx[i]].val;
         const bool bSameFeaturePrev(featureVal[i] <= featureVal[i - 1] + accuracy);
         leftWeights += this->_aWeights[aIdx[i - 1]].val;
         if (bSameFeaturePrev || (i < nMinSplitPart) || (leftWeights < minWeightLeaf) || (totalWeights - leftWeights < minWeightLeaf))
@@ -403,19 +403,19 @@ bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitOrderedFeature(cons
         iClass                = this->_aResponse[aIdx[i]].val;
         nEqualRespValues      = weights;
         iStartEqualRespValues = leftWeights;
-        if (!isPositive<algorithmFPType, cpu>(_impLeft.var)) _impLeft.var = 0;
-        if (!isPositive<algorithmFPType, cpu>(_impRight.var)) _impRight.var = 0;
+        if (!isPositive<intermSummFPType, cpu>(_impLeft.var)) _impLeft.var = 0;
+        if (!isPositive<intermSummFPType, cpu>(_impRight.var)) _impRight.var = 0;
 
-        const algorithmFPType v = leftWeights * _impLeft.var + (totalWeights - leftWeights) * _impRight.var;
+        const intermSummFPType v = leftWeights * _impLeft.var + (totalWeights - leftWeights) * _impRight.var;
         if (iBest < 0)
         {
-            if (bBestFromOtherFeatures && isGreater<algorithmFPType, cpu>(v, vBestFromOtherFeatures))
+            if (bBestFromOtherFeatures && isGreater<intermSummFPType, cpu>(v, vBestFromOtherFeatures))
             {
                 if (featureVal[i] < last) continue;
                 break;
             }
         }
-        else if (isGreater<algorithmFPType, cpu>(v, vBest))
+        else if (isGreater<intermSummFPType, cpu>(v, vBest))
         {
             if (featureVal[i] < last) continue;
             break;
@@ -434,8 +434,8 @@ bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitOrderedFeature(cons
     if (bFound)
     {
         DAAL_ASSERT(iBest > 0);
-        const algorithmFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
-        DAAL_CHECK_STATUS_VAR(!(isZero<algorithmFPType, cpu>(impurityDecrease)));
+        const intermSummFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
+        DAAL_CHECK_STATUS_VAR(!(isZero<intermSummFPType, cpu>(impurityDecrease)));
         split.impurityDecrease = impurityDecrease;
 #ifdef DEBUG_CHECK_IMPURITY
         checkImpurity(aIdx, split.nLeft, split.left);
@@ -454,27 +454,27 @@ template <typename algorithmFPType, CpuType cpu>
 bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                                 size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                                 const ImpurityData & curImpurity, TSplitData & split,
-                                                                                const algorithmFPType minWeightLeaf,
-                                                                                const algorithmFPType totalWeights) const
+                                                                                const intermSummFPType minWeightLeaf,
+                                                                                const intermSummFPType totalWeights) const
 {
     DAAL_ASSERT(n >= 2 * nMinSplitPart);
     _impRight.init(_nClasses);
     bool bFound                       = false;
     const bool bBestFromOtherFeatures = !(split.impurityDecrease < 0);
-    algorithmFPType vBest             = -1;
+    intermSummFPType vBest            = -1;
     IndexType iBest                   = -1;
 
-    const algorithmFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1;
+    const intermSummFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1;
     for (size_t i = 0; i < n - nMinSplitPart;)
     {
         _impLeft.init(_nClasses);
-        auto weights                = this->_aWeights[aIdx[i]].val;
-        size_t count                = 1;
-        algorithmFPType leftWeights = weights;
-        const algorithmFPType first = featureVal[i];
-        ClassIndexType xi           = this->_aResponse[aIdx[i]].val;
-        _impLeft.hist[xi]           = weights;
-        const size_t iStart         = i;
+        auto weights                 = this->_aWeights[aIdx[i]].val;
+        size_t count                 = 1;
+        intermSummFPType leftWeights = weights;
+        const algorithmFPType first  = featureVal[i];
+        ClassIndexType xi            = this->_aResponse[aIdx[i]].val;
+        _impLeft.hist[xi]            = weights;
+        const size_t iStart          = i;
         for (++i; (i < n) && (featureVal[i] == first); ++count, ++i)
         {
             weights = this->_aWeights[aIdx[i]].val;
@@ -490,12 +490,12 @@ bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitCategoricalFeature(
         for (size_t j = 0; j < _nClasses; ++j) _impRight.hist[j] = curImpurity.hist[j] - _impLeft.hist[j];
         calcGini(leftWeights, _impLeft);
         calcGini(totalWeights - leftWeights, _impRight);
-        const algorithmFPType v = leftWeights * _impLeft.var + (totalWeights - leftWeights) * _impRight.var;
+        const intermSummFPType v = leftWeights * _impLeft.var + (totalWeights - leftWeights) * _impRight.var;
         if (iBest < 0)
         {
-            if (bBestFromOtherFeatures && isGreater<algorithmFPType, cpu>(v, vBestFromOtherFeatures)) continue;
+            if (bBestFromOtherFeatures && isGreater<intermSummFPType, cpu>(v, vBestFromOtherFeatures)) continue;
         }
-        else if (isGreater<algorithmFPType, cpu>(v, vBest))
+        else if (isGreater<intermSummFPType, cpu>(v, vBest))
             continue;
         iBest              = i;
         vBest              = v;
@@ -510,8 +510,8 @@ bool UnorderedRespHelperBest<algorithmFPType, cpu>::findSplitCategoricalFeature(
     }
     if (bFound)
     {
-        const algorithmFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
-        DAAL_CHECK_STATUS_VAR(!(isZero<algorithmFPType, cpu>(impurityDecrease)));
+        const intermSummFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
+        DAAL_CHECK_STATUS_VAR(!(isZero<intermSummFPType, cpu>(impurityDecrease)));
         split.impurityDecrease = impurityDecrease;
         DAAL_ASSERT(split.nLeft >= nMinSplitPart);
         DAAL_ASSERT((n - split.nLeft) >= nMinSplitPart);
@@ -532,10 +532,10 @@ template <typename algorithmFPType, CpuType cpu, typename crtp>
 class RespHelperBase : public UnorderedRespHelperBest<algorithmFPType, cpu>
 {
 public:
+    using intermSummFPType = typename UnorderedRespHelperBest<algorithmFPType, cpu>::intermSummFPType;
     typedef ClassIndexType TResponse;
     typedef typename dtrees::internal::TreeImpClassification<> TreeType;
     typedef typename TreeType::NodeType NodeType;
-    typedef double intermSummFPType;
     using super        = UnorderedRespHelperBest<algorithmFPType, cpu>;
     using Histogramm   = typename super::Histogramm;
     using ImpurityData = typename super::ImpurityData;
@@ -560,11 +560,11 @@ public:
     }
 
     template <bool noWeights>
-    void calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, double & totalweights) const;
+    void calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, intermSummFPType & totalweights) const;
 
     bool findSplitForFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                              const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                             const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const
+                             const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const
     {
         const bool noWeights = !this->_weights;
         if (noWeights)
@@ -585,9 +585,9 @@ public:
     bool terminateCriteria(ImpurityData & imp, algorithmFPType impurityThreshold, size_t nSamples) const { return imp.value() < impurityThreshold; }
 
     template <typename BinIndexType>
-    int findSplitForFeatureSorted(algorithmFPType * featureBuf, IndexType iFeature, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
-                                  const ImpurityData & curImpurity, TSplitData & split, const algorithmFPType minWeightLeaf,
-                                  const algorithmFPType totalWeights, const BinIndexType * binIndex) const;
+    int findSplitForFeatureSorted(intermSummFPType * featureBuf, IndexType iFeature, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
+                                  const ImpurityData & curImpurity, TSplitData & split, const intermSummFPType minWeightLeaf,
+                                  const intermSummFPType totalWeights, const BinIndexType * binIndex) const;
     template <typename BinIndexType>
     void computeHistFewClassesWithoutWeights(IndexType iFeature, const IndexType * aIdx, const BinIndexType * binIndex, size_t n) const;
     template <typename BinIndexType>
@@ -597,7 +597,7 @@ public:
 
     template <bool noWeights>
     int findSplitFewClassesDispatch(int nDiffFeatMax, size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                                    const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights, const IndexType iFeature) const;
+                                    const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <bool noWeights, typename BinIndexType>
     void finalizeBestSplit(const IndexType * aIdx, const BinIndexType * binIndex, size_t n, IndexType iFeature, size_t idxFeatureValueBestSplit,
@@ -721,7 +721,8 @@ bool RespHelperBase<algorithmFPType, cpu, crtp>::init(const NumericTable * data,
 
 template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <bool noWeights>
-void RespHelperBase<algorithmFPType, cpu, crtp>::calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, double & totalWeights) const
+void RespHelperBase<algorithmFPType, cpu, crtp>::calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp,
+                                                              intermSummFPType & totalWeights) const
 {
     imp.init(this->_nClasses);
     if (noWeights)
@@ -730,9 +731,9 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::calcImpurity(const IndexType *
         for (size_t i = 0; i < n; ++i)
         {
             const ClassIndexType iClass = this->_aResponse[aIdx[i]].val;
-            imp.hist[iClass] += algorithmFPType(1);
+            imp.hist[iClass] += 1.0;
         }
-        totalWeights = double(n);
+        totalWeights = static_cast<intermSummFPType>(n);
     }
     else
     {
@@ -775,9 +776,8 @@ template <typename BinIndexType>
 void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistFewClassesWithoutWeights(IndexType iFeature, const IndexType * aIdx,
                                                                                      const BinIndexType * binIndex, size_t n) const
 {
-    const algorithmFPType one(1.0);
-    const auto aResponse  = this->_aResponse.get();
-    auto nSamplesPerClass = this->_samplesPerClassBuf.get();
+    const auto aResponse                = this->_aResponse.get();
+    intermSummFPType * nSamplesPerClass = this->_samplesPerClassBuf.get();
     {
         for (size_t i = 0; i < n; ++i)
         {
@@ -786,7 +786,7 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistFewClassesWithoutWei
 
             const BinIndexType idx      = binIndex[r.idx];
             const ClassIndexType iClass = r.val;
-            nSamplesPerClass[idx * this->_nClasses + iClass] += one;
+            nSamplesPerClass[idx * this->_nClasses + iClass] += 1.0;
         }
     }
 }
@@ -799,8 +799,8 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistFewClassesWithWeight
     const auto aResponse = this->_aResponse.get();
     const auto aWeights  = this->_aWeights.get();
 
-    auto nFeatIdx         = this->_idxFeatureBuf.get();
-    auto nSamplesPerClass = this->_samplesPerClassBuf.get();
+    auto nFeatIdx                       = this->_idxFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = this->_samplesPerClassBuf.get();
 
     {
         for (size_t i = 0; i < n; ++i)
@@ -809,8 +809,8 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistFewClassesWithWeight
             const auto & r          = aResponse[aIdx[i]];
             const BinIndexType idx  = binIndex[r.idx];
             ++nFeatIdx[idx];
-            const auto weights          = aWeights[iSample].val;
-            const ClassIndexType iClass = r.val;
+            const intermSummFPType weights = aWeights[iSample].val;
+            const ClassIndexType iClass    = r.val;
             nSamplesPerClass[idx * this->_nClasses + iClass] += weights;
         }
     }
@@ -824,9 +824,9 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistManyClasses(IndexTyp
     const auto aResponse = this->_aResponse.get();
     const auto aWeights  = this->_aWeights.get();
 
-    auto nFeatIdx         = this->_idxFeatureBuf.get();
-    auto featWeights      = this->_weightsFeatureBuf.get();
-    auto nSamplesPerClass = this->_samplesPerClassBuf.get();
+    auto nFeatIdx                       = this->_idxFeatureBuf.get();
+    intermSummFPType * featWeights      = this->_weightsFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = this->_samplesPerClassBuf.get();
 
     {
         for (size_t i = 0; i < n; ++i)
@@ -835,8 +835,8 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistManyClasses(IndexTyp
             const auto & r          = aResponse[aIdx[i]];
             const BinIndexType idx  = binIndex[r.idx];
             ++nFeatIdx[idx];
-            const auto weights          = aWeights[iSample].val;
-            const ClassIndexType iClass = r.val;
+            const intermSummFPType weights = aWeights[iSample].val;
+            const ClassIndexType iClass    = r.val;
             featWeights[idx] += weights; //use for calculate leftWeights
             nSamplesPerClass[idx * this->_nClasses + iClass] += weights;
         }
@@ -847,7 +847,7 @@ template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <bool noWeights>
 int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitFewClassesDispatch(int nDiffFeatMax, size_t n, size_t nMinSplitPart,
                                                                             const ImpurityData & curImpurity, TSplitData & split,
-                                                                            const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                            const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                             const IndexType iFeature) const
 {
     DAAL_ASSERT(this->_nClasses <= this->_nClassesThreshold);
@@ -880,10 +880,10 @@ int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitFewClassesDispatch(int
 
 template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <typename BinIndexType>
-int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(algorithmFPType * featureBuf, IndexType iFeature, const IndexType * aIdx,
+int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(intermSummFPType * featureBuf, IndexType iFeature, const IndexType * aIdx,
                                                                           size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity,
-                                                                          TSplitData & split, const algorithmFPType minWeightLeaf,
-                                                                          const algorithmFPType totalWeights, const BinIndexType * binIndex) const
+                                                                          TSplitData & split, const intermSummFPType minWeightLeaf,
+                                                                          const intermSummFPType totalWeights, const BinIndexType * binIndex) const
 {
     const auto nDiffFeatMax = this->indexedFeatures().numIndices(iFeature);
     this->_samplesPerClassBuf.setValues(nClasses() * nDiffFeatMax, 0);
@@ -911,7 +911,7 @@ int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(algori
     else
     {
         // nSamplesPerClass, nFeatIdx and featWeights - computed
-        this->_weightsFeatureBuf.setValues(nDiffFeatMax, algorithmFPType(0));
+        this->_weightsFeatureBuf.setValues(nDiffFeatMax, intermSummFPType(0));
         this->_idxFeatureBuf.setValues(nDiffFeatMax, algorithmFPType(0));
         computeHistManyClasses(iFeature, aIdx, binIndex, n);
         idxFeatureBestSplit = static_cast<const crtp *>(this)->findSplitByHistDefault(nDiffFeatMax, n, nMinSplitPart, curImpurity, split,
@@ -929,14 +929,14 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
 {
     DAAL_ASSERT(bestSplit.nLeft > 0);
     DAAL_ASSERT(bestSplit.leftWeights > 0.);
-    algorithmFPType divL = algorithmFPType(1);
+    intermSummFPType divL = 1.0;
     if (noWeights)
     {
-        divL = algorithmFPType(1) / algorithmFPType(bestSplit.nLeft);
+        divL = 1.0 / static_cast<intermSummFPType>(bestSplit.nLeft);
     }
     else
     {
-        divL = isZero<algorithmFPType, cpu>(bestSplit.leftWeights) ? algorithmFPType(1.) : (algorithmFPType(1.) / bestSplit.leftWeights);
+        divL = isZero<intermSummFPType, cpu>(bestSplit.leftWeights) ? 1.0 : (1.0 / static_cast<intermSummFPType>(bestSplit.leftWeights));
     }
     bestSplit.left.var            = 1. - bestSplit.left.var * divL * divL; // Gini node impurity
     IndexType * bestSplitIdxRight = bestSplitIdx + bestSplit.nLeft;
@@ -986,9 +986,10 @@ template <typename algorithmFPType, CpuType cpu>
 class UnorderedRespHelperRandom : public RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >
 {
 public:
-    using Histogramm   = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::Histogramm;
-    using ImpurityData = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::ImpurityData;
-    using TSplitData   = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::TSplitData;
+    using intermSummFPType = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::intermSummFPType;
+    using Histogramm       = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::Histogramm;
+    using ImpurityData     = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::ImpurityData;
+    using TSplitData       = typename RespHelperBase<algorithmFPType, cpu, UnorderedRespHelperRandom<algorithmFPType, cpu> >::TSplitData;
 
 public:
     UnorderedRespHelperRandom(const dtrees::internal::IndexedFeatures * indexedFeatures, size_t nClasses,
@@ -999,20 +1000,20 @@ public:
     size_t genRandomBinIdx(const IndexType iFeature, const size_t minidx, const size_t maxidx) const;
 
     int findSplitByHistDefault(int nDiffFeatMax, size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                               const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights, const IndexType iFeature) const;
+                               const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <int K, bool noWeights>
     int findSplitFewClasses(int nDiffFeatMax, size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                            const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights, const IndexType iFeature) const;
+                            const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <bool noWeights>
     bool findSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                  const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                 const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                 const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 
     bool findSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                      const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 };
 
 template <typename algorithmFPType, CpuType cpu>
@@ -1046,22 +1047,22 @@ size_t UnorderedRespHelperRandom<algorithmFPType, cpu>::genRandomBinIdx(const In
 template <typename algorithmFPType, CpuType cpu>
 int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitByHistDefault(int nDiffFeatMax, size_t n, size_t nMinSplitPart,
                                                                             const ImpurityData & curImpurity, TSplitData & split,
-                                                                            const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                            const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                             const IndexType iFeature) const
 {
-    auto nFeatIdx         = this->_idxFeatureBuf.get();
-    auto featWeights      = this->_weightsFeatureBuf.get();
-    auto nSamplesPerClass = this->_samplesPerClassBuf.get();
+    auto nFeatIdx                       = this->_idxFeatureBuf.get();
+    intermSummFPType * featWeights      = this->_weightsFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = this->_samplesPerClassBuf.get();
 
-    algorithmFPType bestImpDecrease =
-        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + algorithmFPType(1.) - curImpurity.var);
+    intermSummFPType bestImpDecrease =
+        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + 1.0 - curImpurity.var);
 
     //init histogram for the left part
     this->_histLeft.setAll(0);
-    auto histLeft               = this->_histLeft.get();
-    size_t nLeft                = 0;
-    algorithmFPType leftWeights = 0.;
-    int idxFeatureBestSplit     = -1; //index of best feature value in the array of sorted feature values
+    auto histLeft                = this->_histLeft.get();
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0.;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
 
     size_t minidx = 0;
     size_t maxidx = nDiffFeatMax - 1;
@@ -1117,9 +1118,9 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitByHistDefault(int
     if (!(((n - nLeft) < nMinSplitPart) || ((totalWeights - leftWeights) < minWeightLeaf) || (nLeft < nMinSplitPart)
           || (leftWeights < minWeightLeaf)))
     {
-        auto histTotal           = curImpurity.hist.get();
-        algorithmFPType sumLeft  = 0;
-        algorithmFPType sumRight = 0;
+        auto histTotal            = curImpurity.hist.get();
+        intermSummFPType sumLeft  = 0;
+        intermSummFPType sumRight = 0;
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
         //proximal impurity improvement
@@ -1129,7 +1130,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitByHistDefault(int
             sumRight += (histTotal[iClass] - histLeft[iClass]) * (histTotal[iClass] - histLeft[iClass]);
         }
 
-        const algorithmFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
+        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
         if (decrease > bestImpDecrease)
         {
             split.left.hist     = this->_histLeft;
@@ -1142,7 +1143,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitByHistDefault(int
     }
     if (idxFeatureBestSplit >= 0)
     {
-        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - algorithmFPType(1);
+        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - 1.0;
         split.totalWeights     = totalWeights;
     }
 
@@ -1153,22 +1154,22 @@ template <typename algorithmFPType, CpuType cpu>
 template <int K, bool noWeights>
 int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDiffFeatMax, size_t n, size_t nMinSplitPart,
                                                                          const ImpurityData & curImpurity, TSplitData & split,
-                                                                         const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                         const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                          const IndexType iFeature) const
 {
-    auto nSamplesPerClass = this->_samplesPerClassBuf.get();
-    auto nFeatIdx         = this->_idxFeatureBuf.get();
+    intermSummFPType * nSamplesPerClass = this->_samplesPerClassBuf.get();
+    auto nFeatIdx                       = this->_idxFeatureBuf.get();
 
-    algorithmFPType bestImpDecrease =
-        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + algorithmFPType(1.) - curImpurity.var);
+    intermSummFPType bestImpDecrease =
+        split.impurityDecrease < 0 ? split.impurityDecrease : totalWeights * (split.impurityDecrease + 1.0 - curImpurity.var);
 
     //init histogram for the left part
     this->_histLeft.setAll(0);
-    auto histLeft = this->_histLeft.get();
-    size_t nLeft  = 0;
-    algorithmFPType leftWeights(0);
-    algorithmFPType minWeights(0);
-    int idxFeatureBestSplit = -1; //index of best feature value in the array of sorted feature values
+    auto histLeft                = this->_histLeft.get();
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0;
+    intermSummFPType minWeights  = 0;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
 
     size_t minidx = 0;
     size_t maxidx = nDiffFeatMax;
@@ -1180,9 +1181,9 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
     //solve for the min non-zero index
     if (noWeights)
     {
-        algorithmFPType thisNFeatIdx(0);
+        intermSummFPType thisNFeatIdx = 0;
         for (size_t iC = 0; iC < K; ++iC) thisNFeatIdx += nSamplesPerClass[iC];
-        while ((minidx < maxidx) && isZero<algorithmFPType, cpu>(thisNFeatIdx))
+        while ((minidx < maxidx) && isZero<intermSummFPType, cpu>(thisNFeatIdx))
         {
             minidx++;
 
@@ -1224,8 +1225,8 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
     //solve for the max non-zero index
     if (noWeights)
     {
-        algorithmFPType thisNFeatIdx(0);
-        while ((minidx < maxidx) && isZero<algorithmFPType, cpu>(thisNFeatIdx))
+        intermSummFPType thisNFeatIdx = 0;
+        while ((minidx < maxidx) && isZero<intermSummFPType, cpu>(thisNFeatIdx))
         {
             maxidx--;
             PRAGMA_FORCE_SIMD
@@ -1258,12 +1259,12 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
     if (noWeights)
     {
         //iterate idx down to a bin with values for FinalizeBestSplit
-        algorithmFPType thisNFeatIdx(0);
+        intermSummFPType thisNFeatIdx = 0;
 
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
         for (size_t iC = 0; iC < K; ++iC) thisNFeatIdx += nSamplesPerClass[idx * K + iC];
-        while ((minidx < idx) && isZero<algorithmFPType, cpu>(thisNFeatIdx))
+        while ((minidx < idx) && isZero<intermSummFPType, cpu>(thisNFeatIdx))
         {
             idx--;
             for (size_t iClass = 0; iClass < K; ++iClass)
@@ -1272,7 +1273,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
             }
         }
 
-        DAAL_ASSERT(!(isZero<algorithmFPType, cpu>(thisNFeatIdx)))
+        DAAL_ASSERT(!(isZero<intermSummFPType, cpu>(thisNFeatIdx)))
 
         if (split.featureUnordered) //only need last index
         {
@@ -1292,8 +1293,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
 
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
-        for (size_t iClass = 0; iClass < K; ++iClass)
-            leftWeights += histLeft[iClass]; //histleft is forced to float, and may cause issues with algorithmFPType = double
+        for (size_t iClass = 0; iClass < K; ++iClass) leftWeights += histLeft[iClass];
         nLeft = leftWeights;
     }
     else
@@ -1330,9 +1330,9 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
     if (!(((n - nLeft) < nMinSplitPart) || ((totalWeights - leftWeights) < minWeightLeaf) || (nLeft < nMinSplitPart)
           || (leftWeights < minWeightLeaf)))
     {
-        auto histTotal           = curImpurity.hist.get();
-        algorithmFPType sumLeft  = 0;
-        algorithmFPType sumRight = 0;
+        auto histTotal            = curImpurity.hist.get();
+        intermSummFPType sumLeft  = 0;
+        intermSummFPType sumRight = 0;
 
         //proximal impurity improvement
         for (size_t iClass = 0; iClass < K; ++iClass)
@@ -1341,7 +1341,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
             sumRight += (histTotal[iClass] - histLeft[iClass]) * (histTotal[iClass] - histLeft[iClass]);
         }
 
-        const algorithmFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
+        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
         if (decrease > bestImpDecrease)
         {
             split.left.hist     = this->_histLeft;
@@ -1354,7 +1354,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
     }
     if (idxFeatureBestSplit >= 0)
     {
-        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - algorithmFPType(1);
+        split.impurityDecrease = curImpurity.var + bestImpDecrease / totalWeights - 1.0;
         split.totalWeights     = totalWeights;
     }
 
@@ -1366,18 +1366,19 @@ template <bool noWeights>
 bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                               size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                               const ImpurityData & curImpurity, TSplitData & split,
-                                                                              const algorithmFPType minWeightLeaf, algorithmFPType totalWeights) const
+                                                                              const intermSummFPType minWeightLeaf,
+                                                                              intermSummFPType totalWeights) const
 {
     this->_impLeft.init(this->_nClasses);
     this->_impRight = curImpurity;
 
-    const bool bBestFromOtherFeatures      = isPositive<algorithmFPType, cpu>(split.impurityDecrease);
-    algorithmFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : algorithmFPType(-1);
+    const bool bBestFromOtherFeatures       = isPositive<intermSummFPType, cpu>(split.impurityDecrease);
+    intermSummFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1.0;
 
-    bool bFound                 = false;
-    IndexType iBest             = -1;
-    algorithmFPType leftWeights = algorithmFPType(0);
-    algorithmFPType v           = algorithmFPType(0);
+    bool bFound                  = false;
+    IndexType iBest              = -1;
+    intermSummFPType leftWeights = 0;
+    intermSummFPType v           = 0;
     algorithmFPType idx;
     size_t i;
 
@@ -1417,7 +1418,7 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitOrderedFeature(co
         for (i = 0; i < r; ++i)
         {
             const ClassIndexType iClass = this->_aResponse[aIdx[i]].val;
-            this->_impLeft.hist[iClass] += algorithmFPType(1);
+            this->_impLeft.hist[iClass] += 1.0;
         }
         leftWeights = i;
     }
@@ -1449,13 +1450,13 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitOrderedFeature(co
     if ((leftWeights >= minWeightLeaf) && ((totalWeights - leftWeights) >= minWeightLeaf)) //it is a valid split with enought leaf weights
     {
         //check if bFound condition below
-        if (!isPositive<algorithmFPType, cpu>(this->_impLeft.var)) this->_impLeft.var = 0;   //set left impurity to 0 if negative
-        if (!isPositive<algorithmFPType, cpu>(this->_impRight.var)) this->_impRight.var = 0; //set right impurity to 0 if negative
+        if (!isPositive<intermSummFPType, cpu>(this->_impLeft.var)) this->_impLeft.var = 0;   //set left impurity to 0 if negative
+        if (!isPositive<intermSummFPType, cpu>(this->_impRight.var)) this->_impRight.var = 0; //set right impurity to 0 if negative
 
         v = leftWeights * this->_impLeft.var + (totalWeights - leftWeights) * this->_impRight.var; //calculate overall weighted Gini index
 
         if (!(bBestFromOtherFeatures
-              && isGreater<algorithmFPType, cpu>(v, vBestFromOtherFeatures))) //if it has a better weighted gini overwite parameters
+              && isGreater<intermSummFPType, cpu>(v, vBestFromOtherFeatures))) //if it has a better weighted gini overwite parameters
         {
             bFound             = true;
             split.left.var     = this->_impLeft.var;
@@ -1470,8 +1471,8 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitOrderedFeature(co
     if (bFound) //if new best found
     {
         DAAL_ASSERT(iBest > 0);
-        const algorithmFPType impurityDecrease = curImpurity.var - v / totalWeights;
-        DAAL_CHECK_STATUS_VAR(!(isZero<algorithmFPType, cpu>(impurityDecrease)));
+        const intermSummFPType impurityDecrease = curImpurity.var - v / totalWeights;
+        DAAL_CHECK_STATUS_VAR(!(isZero<intermSummFPType, cpu>(impurityDecrease)));
         split.impurityDecrease = impurityDecrease;
 #ifdef DEBUG_CHECK_IMPURITY
         checkImpurity(aIdx, split.nLeft, split.left);
@@ -1490,14 +1491,14 @@ template <typename algorithmFPType, CpuType cpu>
 bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx,
                                                                                   size_t n, size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                                   const ImpurityData & curImpurity, TSplitData & split,
-                                                                                  const algorithmFPType minWeightLeaf,
-                                                                                  const algorithmFPType totalWeights) const
+                                                                                  const intermSummFPType minWeightLeaf,
+                                                                                  const intermSummFPType totalWeights) const
 {
     DAAL_ASSERT(n >= 2 * nMinSplitPart);
     this->_impRight.init(this->_nClasses);
     bool bFound                       = false;
     const bool bBestFromOtherFeatures = !(split.impurityDecrease < 0);
-    algorithmFPType vBest             = -1;
+    intermSummFPType vBest            = -1;
     IndexType iBest                   = -1;
     algorithmFPType min               = featureVal[0];
     algorithmFPType max               = featureVal[0];
@@ -1521,7 +1522,7 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitCategoricalFeatur
     }
     //first is the closest categorical feature less than the idx O(n) computation as ordering of featureVal is unknown.
 
-    const algorithmFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1;
+    const intermSummFPType vBestFromOtherFeatures = bBestFromOtherFeatures ? totalWeights * (curImpurity.var - split.impurityDecrease) : -1;
     for (size_t i = 0; i < n - nMinSplitPart;)
     {
         if (featureVal[i] != first)
@@ -1530,13 +1531,13 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitCategoricalFeatur
             continue;
         }
         this->_impLeft.init(this->_nClasses);
-        auto weights                = this->_aWeights[aIdx[i]].val;
-        size_t count                = 1;
-        algorithmFPType leftWeights = weights;
-        const algorithmFPType first = featureVal[i];
-        ClassIndexType xi           = this->_aResponse[aIdx[i]].val;
-        this->_impLeft.hist[xi]     = weights;
-        const size_t iStart         = i;
+        auto weights                 = this->_aWeights[aIdx[i]].val;
+        size_t count                 = 1;
+        intermSummFPType leftWeights = weights;
+        const algorithmFPType first  = featureVal[i];
+        ClassIndexType xi            = this->_aResponse[aIdx[i]].val;
+        this->_impLeft.hist[xi]      = weights;
+        const size_t iStart          = i;
         //there is an ordering to categorical features shown here that isn't described
         //its not clear if featureVal[i] == first will occur at a later point
         //but the for loop assumes that they could be grouped together in the array
@@ -1555,13 +1556,13 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitCategoricalFeatur
         for (size_t j = 0; j < this->_nClasses; ++j) this->_impRight.hist[j] = curImpurity.hist[j] - this->_impLeft.hist[j];
         this->calcGini(leftWeights, this->_impLeft);
         this->calcGini(totalWeights - leftWeights, this->_impRight);
-        const algorithmFPType v = leftWeights * this->_impLeft.var + (totalWeights - leftWeights) * this->_impRight.var;
+        const intermSummFPType v = leftWeights * this->_impLeft.var + (totalWeights - leftWeights) * this->_impRight.var;
 
         if (iBest < 0)
         {
-            if (bBestFromOtherFeatures && isGreater<algorithmFPType, cpu>(v, vBestFromOtherFeatures)) continue;
+            if (bBestFromOtherFeatures && isGreater<intermSummFPType, cpu>(v, vBestFromOtherFeatures)) continue;
         }
-        else if (isGreater<algorithmFPType, cpu>(v, vBest))
+        else if (isGreater<intermSummFPType, cpu>(v, vBest))
             continue;
         iBest              = i;
         vBest              = v;
@@ -1576,8 +1577,8 @@ bool UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitCategoricalFeatur
     }
     if (bFound)
     {
-        const algorithmFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
-        DAAL_CHECK_STATUS_VAR(!(isZero<algorithmFPType, cpu>(impurityDecrease)));
+        const intermSummFPType impurityDecrease = curImpurity.var - vBest / totalWeights;
+        DAAL_CHECK_STATUS_VAR(!(isZero<intermSummFPType, cpu>(impurityDecrease)));
         split.impurityDecrease = impurityDecrease;
         DAAL_ASSERT(split.nLeft >= nMinSplitPart);
         DAAL_ASSERT((n - split.nLeft) >= nMinSplitPart);
@@ -1595,6 +1596,7 @@ class TreeThreadCtx : public TreeThreadCtxBase<algorithmFPType, cpu>
 {
 public:
     typedef TreeThreadCtxBase<algorithmFPType, cpu> super;
+    using intermSummFPType = typename super::intermSummFPType;
     TreeThreadCtx(algorithmFPType * _varImp = nullptr) : super(_varImp), _nClasses(0) {}
     bool init(const decision_forest::training::Parameter & par, const NumericTable * x, size_t nClasses)
     {
@@ -1645,16 +1647,16 @@ Status TreeThreadCtx<algorithmFPType, cpu>::finalizeOOBError(const NumericTable
     DAAL_CHECK_BLOCK_STATUS(y);
     Atomic<size_t> nPredicted(0);
     Atomic<size_t> nError(0);
-    const algorithmFPType eps = services::internal::EpsilonVal<algorithmFPType>::get();
+    const intermSummFPType eps = services::internal::EpsilonVal<algorithmFPType>::get();
     daal::threader_for(nSamples, nSamples, [&](size_t i) {
         const OOBClassificationData * ptr = ((const OOBClassificationData *)this->oobBuf) + i * _nClasses;
         const size_t classLabel(y.get()[i]);
         size_t maxIdx                = 0;
-        algorithmFPType sum          = static_cast<algorithmFPType>(ptr[0]);
+        intermSummFPType sum         = static_cast<algorithmFPType>(ptr[0]);
         OOBClassificationData maxVal = ptr[0];
         for (size_t j = 1; j < _nClasses; ++j)
         {
-            sum += static_cast<algorithmFPType>(ptr[j]);
+            sum += static_cast<intermSummFPType>(ptr[j]);
             if (maxVal < ptr[j])
             {
                 maxVal = ptr[j];
@@ -1666,7 +1668,7 @@ Status TreeThreadCtx<algorithmFPType, cpu>::finalizeOOBError(const NumericTable
             for (size_t j = 0; j < _nClasses; ++j)
             {
                 resDecisionFunction[i * _nClasses + j] =
-                    static_cast<algorithmFPType>(ptr[j]) / services::internal::max<cpu, algorithmFPType>(sum, eps);
+                    static_cast<intermSummFPType>(ptr[j]) / services::internal::max<cpu, intermSummFPType>(sum, eps);
             }
         }
         if (maxVal == 0)
@@ -1682,9 +1684,10 @@ Status TreeThreadCtx<algorithmFPType, cpu>::finalizeOOBError(const NumericTable
         }
         if (resPerObs) resPerObs[i] = algorithmFPType(maxIdx != classLabel);
     });
-    if (res) *res = nPredicted.get() ? algorithmFPType(nError.get()) / algorithmFPType(nPredicted.get()) : 0;
+    if (res) *res = nPredicted.get() ? algorithmFPType(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) : 0;
     if (resAccuracy)
-        *resAccuracy = nPredicted.get() ? algorithmFPType(1) - algorithmFPType(nError.get()) / algorithmFPType(nPredicted.get()) : algorithmFPType(1);
+        *resAccuracy = nPredicted.get() ? algorithmFPType(1) - algorithmFPType(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) :
+                                          algorithmFPType(1);
     return Status();
 }
 
diff --git a/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
index 7321356c41b..cb455b59a04 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
@@ -163,6 +163,7 @@ template <typename algorithmFPType, CpuType cpu>
 class TreeThreadCtxBase
 {
 public:
+    typedef double intermSummFPType;
     TreeThreadCtxBase(algorithmFPType * _varImp = nullptr) : varImp(_varImp), varImpVariance(nullptr), nTrees(0), oobBuf(nullptr) {}
     ~TreeThreadCtxBase()
     {
@@ -510,11 +511,13 @@ class TrainBatchTaskBase
 {
 public:
     typedef TreeThreadCtxBase<algorithmFPType, cpu> ThreadCtxType;
+    using intermSummFPType = typename ThreadCtxType::intermSummFPType;
 
     services::Status run(engines::internal::BatchBaseImpl * engineImpl, dtrees::internal::Tree *& pTree, size_t & numElems);
 
 protected:
     typedef dtrees::internal::TVector<algorithmFPType, cpu> algorithmFPTypeArray;
+    typedef dtrees::internal::TVector<intermSummFPType, cpu> intermSummFPTypeArray;
     typedef dtrees::internal::TVector<IndexType, cpu> IndexTypeArray;
     TrainBatchTaskBase(HostAppIface * hostApp, const NumericTable * x, const NumericTable * y, const NumericTable * w, const Parameter & par,
                        const dtrees::internal::FeatureTypes & featTypes, const dtrees::internal::IndexedFeatures * indexedFeatures,
@@ -536,7 +539,7 @@ protected:
           _accuracy(daal::services::internal::EpsilonVal<algorithmFPType>::get()),
           _minSamplesSplit(2),
           _minWeightLeaf(0.),
-          _minImpurityDecrease(-daal::services::internal::EpsilonVal<algorithmFPType>::get() * x->getNumberOfRows()),
+          _minImpurityDecrease(-daal::services::internal::EpsilonVal<intermSummFPType>::get() * x->getNumberOfRows()),
           _maxLeafNodes(0),
           _useConstFeatures(false),
           _memorySavingMode(false),
@@ -556,8 +559,8 @@ protected:
                 const size_t firstRow = 0;
                 const size_t lastRow  = x->getNumberOfRows();
                 ReadRows<algorithmFPType, cpu> bd(const_cast<NumericTable *>(_weights), firstRow, lastRow - firstRow + 1);
-                const auto pbd               = bd.get();
-                algorithmFPType totalWeights = 0.0;
+                const auto pbd                = bd.get();
+                intermSummFPType totalWeights = 0.0;
                 PRAGMA_VECTOR_ALWAYS
                 for (size_t i = 0; i < lastRow; ++i)
                 {
@@ -565,13 +568,13 @@ protected:
                 }
                 _minWeightLeaf = par.minWeightFractionInLeafNode * totalWeights;
                 _minImpurityDecrease =
-                    par.minImpurityDecreaseInSplitNode * totalWeights - daal::services::internal::EpsilonVal<algorithmFPType>::get() * totalWeights;
+                    par.minImpurityDecreaseInSplitNode * totalWeights - daal::services::internal::EpsilonVal<intermSummFPType>::get() * totalWeights;
             }
             else
             {
                 _minWeightLeaf       = par.minWeightFractionInLeafNode * x->getNumberOfRows();
                 _minImpurityDecrease = par.minImpurityDecreaseInSplitNode * x->getNumberOfRows()
-                                       - daal::services::internal::EpsilonVal<algorithmFPType>::get() * x->getNumberOfRows();
+                                       - daal::services::internal::EpsilonVal<intermSummFPType>::get() * x->getNumberOfRows();
             }
             _maxLeafNodes = par.maxLeafNodes;
         }
@@ -580,25 +583,30 @@ protected:
     size_t nFeatures() const { return _data->getNumberOfColumns(); }
     typename DataHelper::NodeType::Base * buildDepthFirst(services::Status & s, size_t iStart, size_t n, size_t level,
                                                           typename DataHelper::ImpurityData & curImpurity, bool & bUnorderedFeaturesUsed,
-                                                          size_t nClasses, algorithmFPType totalWeights);
+                                                          size_t nClasses, intermSummFPType totalWeights);
     typename DataHelper::NodeType::Base * buildBestFirst(services::Status & s, size_t iStart, size_t n, size_t level,
                                                          typename DataHelper::ImpurityData & curImpurity, bool & bUnorderedFeaturesUsed,
-                                                         size_t nClasses, algorithmFPType totalWeights);
+                                                         size_t nClasses, intermSummFPType totalWeights);
     template <typename WorkItem>
     typename DataHelper::NodeType::Base * buildNode(const size_t level, const size_t nClasses, size_t & remainingSplitNodes, WorkItem & item,
                                                     typename DataHelper::ImpurityData & impurity);
 
-    algorithmFPType * featureBuf(size_t iBuf) const
+    intermSummFPType * featureBuf(size_t iBuf) const
     {
         DAAL_ASSERT(iBuf < _nFeatureBufs);
         return _aFeatureBuf[iBuf].get();
     }
+    algorithmFPType * featureBuf_FPType() const
+    {
+        this->_BufFPType.reset(this->_data->getNumberOfRows());
+        return this->_BufFPType.get();
+    }
     IndexType * featureIndexBuf(size_t iBuf) const
     {
         DAAL_ASSERT(iBuf < _nFeatureBufs);
         return _aFeatureIndexBuf[iBuf].get();
     }
-    bool terminateCriteria(size_t nSamples, size_t level, typename DataHelper::ImpurityData & imp, algorithmFPType totalWeights) const
+    bool terminateCriteria(size_t nSamples, size_t level, typename DataHelper::ImpurityData & imp, intermSummFPType totalWeights) const
     {
         const daal::algorithms::decision_forest::training::interface2::Parameter * algParameter =
             dynamic_cast<const daal::algorithms::decision_forest::training::interface2::Parameter *>(&_par);
@@ -614,13 +622,13 @@ protected:
     ThreadCtxType & threadCtx() { return _threadCtx; }
     typename DataHelper::NodeType::Split * makeSplit(size_t iFeature, algorithmFPType featureValue, bool bUnordered,
                                                      typename DataHelper::NodeType::Base * left, typename DataHelper::NodeType::Base * right,
-                                                     algorithmFPType imp);
+                                                     intermSummFPType imp);
     typename DataHelper::NodeType::Leaf * makeLeaf(const IndexType * idx, size_t n, typename DataHelper::ImpurityData & imp, size_t makeLeaf);
 
     NodeSplitResult findBestSplit(size_t level, size_t iStart, size_t n, const typename DataHelper::ImpurityData & curImpurity,
-                                  IndexType & iBestFeature, typename DataHelper::TSplitData & split, algorithmFPType totalWeights);
+                                  IndexType & iBestFeature, typename DataHelper::TSplitData & split, intermSummFPType totalWeights);
     NodeSplitResult findBestSplitSerial(size_t level, size_t iStart, size_t n, const typename DataHelper::ImpurityData & curImpurity,
-                                        IndexType & iBestFeature, typename DataHelper::TSplitData & split, algorithmFPType totalWeights);
+                                        IndexType & iBestFeature, typename DataHelper::TSplitData & split, intermSummFPType totalWeights);
     NodeSplitResult simpleSplit(size_t iStart, const typename DataHelper::ImpurityData & curImpurity, IndexType & iFeatureBest,
                                 typename DataHelper::TSplitData & split);
     void addImpurityDecrease(IndexType iFeature, size_t n, const typename DataHelper::ImpurityData & curImpurity,
@@ -669,7 +677,8 @@ protected:
     services::internal::HostAppHelper _hostApp;
     typename DataHelper::TreeType _tree;
     mutable TVector<IndexType, cpu> _aSample;
-    mutable TArray<algorithmFPTypeArray, cpu> _aFeatureBuf;
+    mutable TArray<intermSummFPTypeArray, cpu> _aFeatureBuf;
+    mutable TArray<algorithmFPType, cpu> _BufFPType;
     mutable TArray<IndexTypeArray, cpu> _aFeatureIndexBuf;
 
     const NumericTable * _data;
@@ -685,13 +694,13 @@ protected:
     const BinIndexType * _binIndex;
     const FeatureTypes & _featHelper;
     algorithmFPType _accuracy;
-    algorithmFPType _impurityThreshold;
+    intermSummFPType _impurityThreshold;
     ThreadCtxType & _threadCtx;
     size_t _nClasses;
     size_t * _numElems;
     size_t _minSamplesSplit;
-    algorithmFPType _minWeightLeaf;
-    algorithmFPType _minImpurityDecrease;
+    intermSummFPType _minWeightLeaf;
+    intermSummFPType _minImpurityDecrease;
     size_t _maxLeafNodes;
     bool _memorySavingMode;
 
@@ -766,7 +775,7 @@ services::Status TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, H
 
     setupHostApp();
 
-    double totalWeights = double(0);
+    typename DataHelper::intermSummFPType totalWeights = 0;
     typename DataHelper::ImpurityData initialImpurity;
     const bool noWeights = !_helper.providedWeights();
     if (noWeights)
@@ -803,7 +812,7 @@ services::Status TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, H
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
 typename DataHelper::NodeType::Split * TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, HyperparameterType, cpu>::makeSplit(
     size_t iFeature, algorithmFPType featureValue, bool bUnordered, typename DataHelper::NodeType::Base * left,
-    typename DataHelper::NodeType::Base * right, algorithmFPType imp)
+    typename DataHelper::NodeType::Base * right, intermSummFPType imp)
 {
     typename DataHelper::NodeType::Split * pNode = _tree.allocator().allocSplit();
     pNode->set(iFeature, featureValue, bUnordered);
@@ -825,7 +834,7 @@ typename DataHelper::NodeType::Leaf * TrainBatchTaskBase<algorithmFPType, BinInd
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
 typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, HyperparameterType, cpu>::buildDepthFirst(
     services::Status & s, size_t iStart, size_t n, size_t level, typename DataHelper::ImpurityData & curImpurity, bool & bUnorderedFeaturesUsed,
-    size_t nClasses, algorithmFPType totalWeights)
+    size_t nClasses, intermSummFPType totalWeights)
 {
     const size_t maxFeatures = nFeatures();
     if (_hostApp.isCancelled(s, n)) return nullptr;
@@ -839,9 +848,9 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
     DAAL_ASSERT(split_result.status.ok());
     if (split_result.bSplitSucceeded)
     {
-        const size_t nLeft   = split.nLeft;
-        const double imp     = curImpurity.var;
-        const double impLeft = split.left.var;
+        const size_t nLeft             = split.nLeft;
+        const intermSummFPType imp     = curImpurity.var;
+        const intermSummFPType impLeft = split.left.var;
 
         // check impurity decrease
         if (split.totalWeights * split.impurityDecrease < _minImpurityDecrease) return makeLeaf(_aSample.get() + iStart, n, curImpurity, nClasses);
@@ -922,11 +931,11 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
     DAAL_ASSERT(split_result.status.ok());
     if (split_result.bSplitSucceeded)
     {
-        const double imp     = impurity.var;
-        const double impLeft = split.left.var;
+        const intermSummFPType imp     = impurity.var;
+        const intermSummFPType impLeft = split.left.var;
 
         // check impurity decrease
-        double improve = imp * item.totalWeights - impLeft * item.leftWeights - (item.totalWeights - item.leftWeights) * (imp - impLeft);
+        intermSummFPType improve = imp * item.totalWeights - impLeft * item.leftWeights - (item.totalWeights - item.leftWeights) * (imp - impLeft);
         if (improve < _minImpurityDecrease)
         {
             return makeLeaf(_aSample.get() + item.start, item.n, impurity, nClasses);
@@ -964,7 +973,7 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
 typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, HyperparameterType, cpu>::buildBestFirst(
     services::Status & s, size_t iStart, size_t n, size_t level, typename DataHelper::ImpurityData & curImpurity, bool & bUnorderedFeaturesUsed,
-    size_t nClasses, algorithmFPType totalWeights)
+    size_t nClasses, intermSummFPType totalWeights)
 {
     struct WorkItem
     {
@@ -974,9 +983,9 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
         size_t n;
         size_t nLeft;
         size_t level;
-        double improvement;
-        algorithmFPType leftWeights;
-        algorithmFPType totalWeights;
+        intermSummFPType improvement;
+        intermSummFPType leftWeights;
+        intermSummFPType totalWeights;
         typename DataHelper::ImpurityData impurityLeft {};
         typename DataHelper::ImpurityData impurityRight {};
         typename DataHelper::NodeType::Split * node;
@@ -994,7 +1003,7 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
               node(nullptr)
         {}
 
-        WorkItem(bool featureUnordered, size_t start, size_t n, size_t level, algorithmFPType totalWeights)
+        WorkItem(bool featureUnordered, size_t start, size_t n, size_t level, intermSummFPType totalWeights)
             : isLeaf(true),
               featureUnordered(featureUnordered),
               start(start),
@@ -1125,7 +1134,7 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
 NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, HyperparameterType, cpu>::findBestSplit(
     size_t level, size_t iStart, size_t n, const typename DataHelper::ImpurityData & curImpurity, IndexType & iFeatureBest,
-    typename DataHelper::TSplitData & split, algorithmFPType totalWeights)
+    typename DataHelper::TSplitData & split, intermSummFPType totalWeights)
 {
     if (n == 2)
     {
@@ -1142,7 +1151,7 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
 NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, HyperparameterType, cpu>::findBestSplitSerial(
     size_t level, size_t iStart, size_t n, const typename DataHelper::ImpurityData & curImpurity, IndexType & iBestFeature,
-    typename DataHelper::TSplitData & bestSplit, algorithmFPType totalWeights)
+    typename DataHelper::TSplitData & bestSplit, intermSummFPType totalWeights)
 {
     services::Status st;
 
@@ -1153,7 +1162,7 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
     /* total number of features */
     const size_t maxFeatures = nFeatures();
     /* minimum fraction of all samples per bin */
-    const algorithmFPType qMax = 0.02;
+    const intermSummFPType qMax = 0.02;
     /* index of the best split, initialized to first index we investigate */
     IndexType * bestSplitIdx = featureIndexBuf(0) + iStart;
     /* sample index */
@@ -1200,7 +1209,8 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
 
         const auto iFeature = _aFeatureIdx[i];
         const bool bUseIndexedFeatures =
-            (!_memorySavingMode) && (algorithmFPType(n) > qMax * algorithmFPType(_helper.indexedFeatures().numIndices(iFeature)));
+            (!_memorySavingMode)
+            && (static_cast<intermSummFPType>(n) > qMax * static_cast<intermSummFPType>(_helper.indexedFeatures().numIndices(iFeature)));
 
         if (!_maxLeafNodes && !_useConstFeatures && !_memorySavingMode)
         {
@@ -1235,7 +1245,7 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
         }
         else
         {
-            algorithmFPType * featBuf = featureBuf(0) + iStart; //single thread
+            algorithmFPType * featBuf = featureBuf_FPType() + iStart; //single thread
             featureValuesToBuf(iFeature, featBuf, aIdx, n);
             if (featBuf[n - 1] - featBuf[0] <= _accuracy) //all values of the feature are the same
                 continue;
@@ -1311,7 +1321,7 @@ void TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hyperparamete
     IndexType iFeature, size_t n, const typename DataHelper::ImpurityData & curImpurity, const typename DataHelper::TSplitData & split)
 {
     DAAL_ASSERT(_threadCtx.varImp);
-    if (!isZero<algorithmFPType, cpu>(split.impurityDecrease)) _threadCtx.varImp[iFeature] += split.impurityDecrease;
+    if (!isZero<intermSummFPType, cpu>(split.impurityDecrease)) _threadCtx.varImp[iFeature] += split.impurityDecrease;
 }
 
 template <typename algorithmFPType, typename BinIndexType, typename DataHelper, typename HyperparameterType, CpuType cpu>
@@ -1336,15 +1346,15 @@ services::Status TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, H
             DAAL_CHECK_MALLOC(permutation.get());
             for (size_t i = 0; i < nOOB; permutation[i] = i, ++i)
                 ;
-            const size_t nTrees        = _threadCtx.nTrees;
-            const algorithmFPType div1 = algorithmFPType(1) / algorithmFPType(nTrees);
+            const size_t nTrees         = _threadCtx.nTrees;
+            const intermSummFPType div1 = 1.0 / static_cast<intermSummFPType>(nTrees);
             for (size_t i = 0, n = nFeatures(); i < n; ++i)
             {
                 shuffle<cpu>(_helper.engineImpl->getState(), nOOB, permutation.get());
                 const algorithmFPType permOOBError = computeOOBErrorPerm(t, nOOB, oobIndices.get(), permutation.get(), i);
                 const algorithmFPType diff         = (permOOBError - oobError);
                 //_threadCtx.varImp[i] is a mean of diff among all the trees
-                const algorithmFPType delta = diff - _threadCtx.varImp[i]; //old mean
+                const intermSummFPType delta = diff - _threadCtx.varImp[i]; //old mean
                 _threadCtx.varImp[i] += div1 * delta;
                 if (_threadCtx.varImpVariance) _threadCtx.varImpVariance[i] += delta * (diff - _threadCtx.varImp[i]); //new mean
             }
@@ -1376,14 +1386,14 @@ algorithmFPType TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
     services::internal::tmemcpy<algorithmFPType, cpu>(buf.get(), x.get(), dim);
     ReadRows<algorithmFPType, cpu> p(const_cast<NumericTable *>(_data), aInd[aPerm[0]], 1);
     buf[iPermutedFeature] = p.get()[iPermutedFeature];
-    algorithmFPType mean  = predictionError<algorithmFPType, DataHelper, cpu>(_helper, t, buf.get(), _resp, aInd[0]);
+    intermSummFPType mean = predictionError<algorithmFPType, DataHelper, cpu>(_helper, t, buf.get(), _resp, aInd[0]);
 
     for (size_t i = 1; i < n; ++i)
     {
         services::internal::tmemcpy<algorithmFPType, cpu>(buf.get(), x.set(const_cast<NumericTable *>(_data), aInd[i], 1), dim);
         buf[iPermutedFeature] = p.set(const_cast<NumericTable *>(_data), aInd[aPerm[i]], 1)[iPermutedFeature];
-        algorithmFPType val   = predictionError<algorithmFPType, DataHelper, cpu>(_helper, t, buf.get(), _resp, aInd[i]);
-        mean += (val - mean) / algorithmFPType(i + 1);
+        intermSummFPType val  = predictionError<algorithmFPType, DataHelper, cpu>(_helper, t, buf.get(), _resp, aInd[i]);
+        mean += (val - mean) / static_cast<intermSummFPType>(i + 1);
     }
     return mean;
 }
@@ -1396,11 +1406,11 @@ algorithmFPType TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
     //compute prediction error on each OOB row and get its mean online formulae (Welford)
     //TODO: can be threader_for() block
     ReadRows<algorithmFPType, cpu> x(const_cast<NumericTable *>(_data), aInd[0], 1);
-    algorithmFPType mean = _helper.predictionError(t, x.get(), _resp, aInd[0], _threadCtx.oobBuf);
+    intermSummFPType mean = _helper.predictionError(t, x.get(), _resp, aInd[0], _threadCtx.oobBuf);
     for (size_t i = 1; i < n; ++i)
     {
-        algorithmFPType val = _helper.predictionError(t, x.set(const_cast<NumericTable *>(_data), aInd[i], 1), _resp, aInd[i], _threadCtx.oobBuf);
-        mean += (val - mean) / algorithmFPType(i + 1);
+        intermSummFPType val = _helper.predictionError(t, x.set(const_cast<NumericTable *>(_data), aInd[i], 1), _resp, aInd[i], _threadCtx.oobBuf);
+        mean += (val - mean) / static_cast<intermSummFPType>(i + 1);
     }
     return mean;
 }
diff --git a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
index e67db03502a..8aebc6c25dc 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
@@ -47,6 +47,19 @@ using namespace decision_forest::training::internal;
 using namespace dtrees::internal;
 using namespace dtrees::training::internal;
 
+// IMPORTANT!!
+// Most of the computations here are done in double precision regardless of the input type.
+// This is because it needs to be able to do math on large numbers, sometimes multiplied
+// by a response variable or some other quantity, and this math needs to be close to exact
+// in order for results to be sensible. With float32, the maximum integer that can be
+// represented exactly is just 2^23~=8.3 million, and passing datasets larger than this
+// with calculations about means/variances/counts done in float32 precision results in too
+// large imprecisions, which can oftentimes be larger than model predictions themselves.
+// It might be possible to use float32 for some small datasets if it can be established
+// that the calculations won't break, but since counts are multiplied or divided by
+// real numbers in the range of the response variable, it's not just a matter of making
+// the switch at exactly 2^23 rows.
+
 //computes mean2 and var2 as the mean and mse for the set of elements s2, s2 = s - s1
 //where mean, var are mean and mse for s,
 //where mean1, var1 are mean and mse for s1
@@ -57,39 +70,39 @@ using namespace dtrees::training::internal;
 //variance between 'var1' and 'var2' with weights given by 'leftWeights' and 'rightWeights',
 //respectively, which would meet the following condition:
 // var = (1 / (leftWeights + rightWeights)) * (var1*leftWeights + var2*rightWeights + (mean1 - mean2)^2 * leftWeights * rightWeights / (leftWeights + rightWeights))
-template <typename algorithmFPType, CpuType cpu>
-void subtractImpurity(algorithmFPType var, algorithmFPType mean, algorithmFPType var1, algorithmFPType mean1, algorithmFPType leftWeights,
-                      algorithmFPType & var2, algorithmFPType & mean2, algorithmFPType rightWeights)
+template <typename intermSummFPType, CpuType cpu>
+void subtractImpurity(intermSummFPType var, intermSummFPType mean, intermSummFPType var1, intermSummFPType mean1, intermSummFPType leftWeights,
+                      intermSummFPType & var2, intermSummFPType & mean2, intermSummFPType rightWeights)
 {
     //TODO: investigate reusing decision_tree::regression::training::internal::MSEDataStatistics here
-    mean2                   = mean + (leftWeights * (mean - mean1)) / rightWeights;
-    const algorithmFPType b = leftWeights / rightWeights;
-    var2                    = var + (mean - mean2) * (mean + mean2) + (var - var1 + (mean - mean1) * (mean + mean1)) * b;
+    mean2                    = mean + (leftWeights * (mean - mean1)) / rightWeights;
+    const intermSummFPType b = leftWeights / rightWeights;
+    var2                     = var + (mean - mean2) * (mean + mean2) + (var - var1 + (mean - mean1) * (mean + mean1)) * b;
     if (var2 < 0) var2 = 0;
 }
 
 //computes meanPrev as the mean of n-1 elements after removing of element x (based on mean of n elements passed as 'mean' argument)
 //instead of impurity, computes the sum of (xi - meanPrev)(xi - meanPrev) for n-1 elements
 //(based on the sum of (xi - mean)*(xi - mean) of n elements passed as 'var' argument)
-template <typename algorithmFPType, CpuType cpu>
-void calcPrevImpurity(algorithmFPType var, algorithmFPType mean, algorithmFPType & varPrev, algorithmFPType & meanPrev, algorithmFPType x,
-                      algorithmFPType totalWeights, algorithmFPType weights)
+template <typename intermSummFPType, CpuType cpu>
+void calcPrevImpurity(intermSummFPType var, intermSummFPType mean, intermSummFPType & varPrev, intermSummFPType & meanPrev, intermSummFPType x,
+                      intermSummFPType totalWeights, intermSummFPType weights)
 {
-    algorithmFPType residual = (isPositive<algorithmFPType, cpu>(totalWeights - weights) ? (totalWeights - weights) : 1.);
-    algorithmFPType delta    = (x - mean) / residual;
-    varPrev                  = var - delta * totalWeights * (x - mean) * weights;
-    meanPrev                 = mean - delta * weights;
+    intermSummFPType residual = (isPositive<intermSummFPType, cpu>(totalWeights - weights) ? (totalWeights - weights) : 1.);
+    intermSummFPType delta    = (x - mean) / residual;
+    varPrev                   = var - delta * totalWeights * (x - mean) * weights;
+    meanPrev                  = mean - delta * weights;
     if (varPrev < 0) varPrev = 0;
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // Service structure, contains regression error data for OOB calculation
 //////////////////////////////////////////////////////////////////////////////////////////
-template <typename algorithmFPType, CpuType cpu>
+template <typename intermSummFPType, CpuType cpu>
 struct RegErr
 {
-    algorithmFPType value = 0;
-    size_t count          = 0;
+    intermSummFPType value = 0;
+    size_t count           = 0;
     void add(const RegErr & o)
     {
         count += o.count;
@@ -105,14 +118,15 @@ class OrderedRespHelperBest : public DataHelper<algorithmFPType, algorithmFPType
 {
 public:
     typedef algorithmFPType TResponse;
-    typedef DataHelper<algorithmFPType, algorithmFPType, cpu> super;
     typedef double intermSummFPType;
+    typedef DataHelper<algorithmFPType, algorithmFPType, cpu> super;
 
     struct ImpurityData
     {
-        double var; //impurity is a variance
-        double mean;
-        double value() const { return var; }
+        typedef intermSummFPType intermSummFPType;
+        intermSummFPType var; //impurity is a variance
+        intermSummFPType mean;
+        intermSummFPType value() const { return var; }
     };
 
     typedef SplitData<algorithmFPType, ImpurityData> TSplitData;
@@ -124,21 +138,21 @@ public:
     {}
 
     template <bool noWeights>
-    void calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, double & totalweights) const;
+    void calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, intermSummFPType & totalweights) const;
 
     template <bool noWeights, bool featureUnordered>
-    int findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, algorithmFPType * buf, size_t n, size_t nMinSplitPart,
-                            const ImpurityData & curImpurity, TSplitData & split, const algorithmFPType minWeightLeaf,
-                            const algorithmFPType totalWeights, const IndexType iFeature) const;
+    int findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, intermSummFPType * buf, size_t n, size_t nMinSplitPart,
+                            const ImpurityData & curImpurity, TSplitData & split, const intermSummFPType minWeightLeaf,
+                            const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <bool noWeights>
     bool findBestSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                      const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
     template <bool noWeights>
     bool findBestSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                          const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                         const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                         const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 
 #ifdef DEBUG_CHECK_IMPURITY
     void checkImpurity(const IndexType * ptrIdx, size_t n, const ImpurityData & expected) const
@@ -151,12 +165,13 @@ public:
 protected:
     //buffer for the computation using indexed features
     mutable TVector<IndexType, cpu, DefaultAllocator<cpu> > _idxFeatureBuf;
-    mutable TVector<algorithmFPType, cpu, DefaultAllocator<cpu> > _weightsFeatureBuf;
+    mutable TVector<intermSummFPType, cpu, DefaultAllocator<cpu> > _weightsFeatureBuf;
 };
 
 template <typename algorithmFPType, CpuType cpu>
 template <bool noWeights>
-void OrderedRespHelperBest<algorithmFPType, cpu>::calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp, double & totalWeights) const
+void OrderedRespHelperBest<algorithmFPType, cpu>::calcImpurity(const IndexType * aIdx, size_t n, ImpurityData & imp,
+                                                               intermSummFPType & totalWeights) const
 {
     imp.var  = 0;
     imp.mean = this->_aResponse[aIdx[0]].val;
@@ -165,12 +180,12 @@ void OrderedRespHelperBest<algorithmFPType, cpu>::calcImpurity(const IndexType *
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = 1; i < n; ++i)
         {
-            const double delta = this->_aResponse[aIdx[i]].val - imp.mean; //x[i] - mean
-            imp.mean += delta / double(i + 1);
+            const intermSummFPType delta = this->_aResponse[aIdx[i]].val - imp.mean; //x[i] - mean
+            imp.mean += delta / static_cast<intermSummFPType>(i + 1);
             imp.var += delta * (this->_aResponse[aIdx[i]].val - imp.mean);
         }
-        totalWeights = double(n);
-        imp.var /= double(n); //impurity is MSE
+        totalWeights = static_cast<intermSummFPType>(n);
+        imp.var /= totalWeights; //impurity is MSE
     }
     else
     {
@@ -178,10 +193,10 @@ void OrderedRespHelperBest<algorithmFPType, cpu>::calcImpurity(const IndexType *
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = 1; i < n; ++i)
         {
-            const double weights = this->_aWeights[aIdx[i]].val;
-            const double delta   = this->_aResponse[aIdx[i]].val - imp.mean; //x[i] - mean
+            const intermSummFPType weights = this->_aWeights[aIdx[i]].val;
+            const intermSummFPType delta   = this->_aResponse[aIdx[i]].val - imp.mean; //x[i] - mean
             totalWeights += weights;
-            DAAL_ASSERT(!(isZero<double, cpu>(totalWeights)));
+            DAAL_ASSERT(!(isZero<intermSummFPType, cpu>(totalWeights)));
             imp.mean += weights * delta / totalWeights;
             imp.var += weights * delta * (this->_aResponse[aIdx[i]].val - imp.mean);
         }
@@ -204,9 +219,9 @@ void OrderedRespHelperBest<algorithmFPType, cpu>::calcImpurity(const IndexType *
 
 template <typename algorithmFPType, CpuType cpu>
 template <bool noWeights, bool featureUnordered>
-int OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, algorithmFPType * buf, size_t n,
+int OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, intermSummFPType * buf, size_t n,
                                                                      size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                                                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
+                                                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights,
                                                                      const IndexType iFeature) const
 {
     auto featWeights = _weightsFeatureBuf.get();
@@ -214,15 +229,15 @@ int OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitByHist(size_t nDif
 
     intermSummFPType bestImpDecreasePart =
         split.impurityDecrease < 0 ? -1 : (split.impurityDecrease + curImpurity.mean * curImpurity.mean) * totalWeights;
-    size_t nLeft                = 0;
-    algorithmFPType leftWeights = 0.;
-    intermSummFPType sumLeft    = 0;
-    int idxFeatureBestSplit     = -1; //index of best feature value in the array of sorted feature values
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0.;
+    intermSummFPType sumLeft     = 0;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
     for (size_t i = 0; i < nDiffFeatMax; ++i)
     {
         if (!nFeatIdx[i]) continue;
 
-        algorithmFPType thisFeatWeights = noWeights ? nFeatIdx[i] : featWeights[i];
+        intermSummFPType thisFeatWeights = noWeights ? nFeatIdx[i] : featWeights[i];
 
         nLeft       = (featureUnordered ? nFeatIdx[i] : nLeft + nFeatIdx[i]);
         leftWeights = (featureUnordered ? thisFeatWeights : leftWeights + thisFeatWeights);
@@ -236,7 +251,7 @@ int OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitByHist(size_t nDif
         const intermSummFPType impDecreasePart = sumLeft * sumLeft / leftWeights + sumRight * sumRight / (totalWeights - leftWeights);
         if (impDecreasePart > bestImpDecreasePart)
         {
-            split.left.mean     = algorithmFPType(sumLeft);
+            split.left.mean     = sumLeft;
             split.nLeft         = nLeft;
             split.leftWeights   = leftWeights;
             idxFeatureBestSplit = i;
@@ -257,25 +272,25 @@ template <bool noWeights>
 bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                               size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                               const ImpurityData & curImpurity, TSplitData & split,
-                                                                              const algorithmFPType minWeightLeaf,
-                                                                              const algorithmFPType totalWeights) const
+                                                                              const intermSummFPType minWeightLeaf,
+                                                                              const intermSummFPType totalWeights) const
 {
     ImpurityData left;
     ImpurityData right;
-    algorithmFPType xi = this->_aResponse[aIdx[0]].val;
-    left.var           = 0;
-    left.mean          = xi;
-    IndexType iBest    = -1;
-    algorithmFPType vBest;
+    intermSummFPType xi = this->_aResponse[aIdx[0]].val;
+    left.var            = 0;
+    left.mean           = xi;
+    IndexType iBest     = -1;
+    intermSummFPType vBest;
     auto aResponse = this->_aResponse.get();
     auto aWeights  = this->_aWeights.get();
     auto weights   = aWeights[aIdx[0]].val;
-    calcPrevImpurity<double, cpu>(curImpurity.var * totalWeights, curImpurity.mean, right.var, right.mean, xi, totalWeights, weights);
+    calcPrevImpurity<intermSummFPType, cpu>(curImpurity.var * totalWeights, curImpurity.mean, right.var, right.mean, xi, totalWeights, weights);
 #ifdef DEBUG_CHECK_IMPURITY
     checkImpurityInternal(aIdx + 1, n - 1, right);
 #endif
 
-    vBest = split.impurityDecrease < 0 ? daal::services::internal::MaxVal<algorithmFPType>::get() :
+    vBest = split.impurityDecrease < 0 ? daal::services::internal::MaxVal<intermSummFPType>::get() :
                                          (curImpurity.var - split.impurityDecrease) * totalWeights;
     if (noWeights)
     {
@@ -287,7 +302,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
             {
                 //can make a split
                 //nLeft == i, nRight == n - i
-                const algorithmFPType v = left.var + right.var;
+                const intermSummFPType v = left.var + right.var;
                 if (v < vBest)
                 {
                     vBest             = v;
@@ -299,12 +314,12 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
             }
 
             //update impurity and continue
-            xi                    = aResponse[aIdx[i]].val;
-            algorithmFPType delta = xi - left.mean;
-            left.mean += delta / algorithmFPType(i + 1);
+            xi                     = aResponse[aIdx[i]].val;
+            intermSummFPType delta = xi - left.mean;
+            left.mean += delta / static_cast<intermSummFPType>(i + 1);
             left.var += delta * (xi - left.mean);
             if (left.var < 0) left.var = 0;
-            calcPrevImpurity<double, cpu>(right.var, right.mean, right.var, right.mean, xi, double(n - i), 1.);
+            calcPrevImpurity<intermSummFPType, cpu>(right.var, right.mean, right.var, right.mean, xi, static_cast<intermSummFPType>(n - i), 1.);
 #ifdef DEBUG_CHECK_IMPURITY
             checkImpurityInternal(aIdx, i + 1, left);
             checkImpurityInternal(aIdx + i + 1, n - i - 1, right);
@@ -313,7 +328,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
     }
     else
     {
-        algorithmFPType leftWeights = weights;
+        intermSummFPType leftWeights = weights;
         for (size_t i = 1; i < (n - nMinSplitPart + 1); ++i)
         {
             weights = aWeights[aIdx[i]].val;
@@ -323,7 +338,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
             {
                 //can make a split
                 //nLeft == i, nRight == n - i
-                const algorithmFPType v = left.var + right.var;
+                const intermSummFPType v = left.var + right.var;
                 if (v < vBest)
                 {
                     vBest             = v;
@@ -335,12 +350,12 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
             }
 
             //update impurity and continue
-            xi                    = aResponse[aIdx[i]].val;
-            algorithmFPType delta = xi - left.mean;
-            left.mean += weights * delta / (isPositive<algorithmFPType, cpu>(leftWeights + weights) ? leftWeights + weights : 1.);
+            xi                     = aResponse[aIdx[i]].val;
+            intermSummFPType delta = xi - left.mean;
+            left.mean += weights * delta / (isPositive<intermSummFPType, cpu>(leftWeights + weights) ? leftWeights + weights : 1.);
             left.var += weights * delta * (xi - left.mean);
             if (left.var < 0) left.var = 0;
-            calcPrevImpurity<double, cpu>(right.var, right.mean, right.var, right.mean, xi, totalWeights - leftWeights, weights);
+            calcPrevImpurity<intermSummFPType, cpu>(right.var, right.mean, right.var, right.mean, xi, totalWeights - leftWeights, weights);
             leftWeights += weights;
 #ifdef DEBUG_CHECK_IMPURITY
             checkImpurityInternal(aIdx, i + 1, left);
@@ -354,7 +369,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitOrderedFeature(co
     split.impurityDecrease = curImpurity.var - vBest / totalWeights;
     split.nLeft            = iBest;
     split.totalWeights     = totalWeights;
-    split.left.var /= (isPositive<algorithmFPType, cpu>(split.leftWeights) ? split.leftWeights : 1.);
+    split.left.var /= (isPositive<intermSummFPType, cpu>(split.leftWeights) ? split.leftWeights : 1.);
     split.iStart       = 0;
     split.featureValue = featureVal[iBest - 1];
     return true;
@@ -365,13 +380,13 @@ template <bool noWeights>
 bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx,
                                                                                   size_t n, size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                                   const ImpurityData & curImpurity, TSplitData & split,
-                                                                                  const algorithmFPType minWeightLeaf,
-                                                                                  const algorithmFPType totalWeights) const
+                                                                                  const intermSummFPType minWeightLeaf,
+                                                                                  const intermSummFPType totalWeights) const
 {
     DAAL_ASSERT(n >= 2 * nMinSplitPart);
     ImpurityData left;
     ImpurityData right;
-    algorithmFPType vBest;
+    intermSummFPType vBest;
     bool bFound               = false;
     size_t nDiffFeatureValues = 0;
     auto aResponse            = this->_aResponse.get();
@@ -383,7 +398,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitCategoricalFeatur
         size_t count                   = 1;
         const algorithmFPType firstVal = featureVal[i];
         const size_t iStart            = i;
-        algorithmFPType leftWeights    = aWeights[aIdx[i]].val;
+        intermSummFPType leftWeights   = aWeights[aIdx[i]].val;
         for (++i; (i < n) && (featureVal[i] == firstVal); ++count, ++i)
         {
             leftWeights += aWeights[aIdx[i]].val;
@@ -394,12 +409,12 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitCategoricalFeatur
 
         if ((i == n) && (nDiffFeatureValues == 2) && bFound) break; //only 2 feature values, one possible split, already found
 
-        double weights = double(0);
+        intermSummFPType weights = 0;
         calcImpurity<noWeights>(aIdx + iStart, count, left, weights);
         DAAL_ASSERT(fabs(weights - leftWeights) < 0.001);
-        subtractImpurity<double, cpu>(curImpurity.var, curImpurity.mean, left.var, left.mean, leftWeights, right.var, right.mean,
-                                      totalWeights - leftWeights);
-        const algorithmFPType v = leftWeights * left.var + (totalWeights - leftWeights) * right.var;
+        subtractImpurity<intermSummFPType, cpu>(curImpurity.var, curImpurity.mean, left.var, left.mean, leftWeights, right.var, right.mean,
+                                                totalWeights - leftWeights);
+        const intermSummFPType v = leftWeights * left.var + (totalWeights - leftWeights) * right.var;
         if (!bFound || v < vBest)
         {
             vBest              = v;
@@ -414,7 +429,7 @@ bool OrderedRespHelperBest<algorithmFPType, cpu>::findBestSplitCategoricalFeatur
     }
     if (bFound)
     {
-        const algorithmFPType impurityDecrease = curImpurity.var - vBest / (isPositive<algorithmFPType, cpu>(totalWeights) ? totalWeights : 1.);
+        const intermSummFPType impurityDecrease = curImpurity.var - vBest / (isPositive<intermSummFPType, cpu>(totalWeights) ? totalWeights : 1.);
         if (split.impurityDecrease < 0 || split.impurityDecrease < impurityDecrease)
         {
             split.impurityDecrease = impurityDecrease;
@@ -455,7 +470,8 @@ template <typename algorithmFPType, CpuType cpu, typename crtp>
 class RespHelperBase : public OrderedRespHelperBest<algorithmFPType, cpu>
 {
 public:
-    typedef algorithmFPType TResponse;
+    using TResponse        = typename OrderedRespHelperBest<algorithmFPType, cpu>::TResponse;
+    using intermSummFPType = typename OrderedRespHelperBest<algorithmFPType, cpu>::intermSummFPType;
     typedef dtrees::internal::TreeImpRegression<> TreeType;
     typedef typename TreeType::NodeType NodeType;
     using ImpurityData = typename OrderedRespHelperBest<algorithmFPType, cpu>::ImpurityData;
@@ -473,27 +489,26 @@ public:
                       const NumericTable * weights) DAAL_C11_OVERRIDE;
     void convertLeftImpToRight(size_t n, const ImpurityData & total, TSplitData & split)
     {
-        subtractImpurity<double, cpu>(total.var, total.mean, split.left.var, split.left.mean, split.leftWeights, split.left.var, split.left.mean,
-                                      split.totalWeights - split.leftWeights);
+        subtractImpurity<intermSummFPType, cpu>(total.var, total.mean, split.left.var, split.left.mean, split.leftWeights, split.left.var,
+                                                split.left.mean, split.totalWeights - split.leftWeights);
         split.nLeft       = n - split.nLeft;
         split.leftWeights = split.totalWeights - split.leftWeights;
     }
 
     bool findSplitForFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                              const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                             const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                             const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 
     template <typename BinIndexType>
-    int findSplitForFeatureSorted(algorithmFPType * featureBuf, IndexType iFeature, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
-                                  const ImpurityData & curImpurity, TSplitData & split, const algorithmFPType minWeightLeaf,
-                                  const algorithmFPType totalWeights, const BinIndexType * binIndex) const;
+    int findSplitForFeatureSorted(intermSummFPType * featureBuf, IndexType iFeature, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
+                                  const ImpurityData & curImpurity, TSplitData & split, const intermSummFPType minWeightLeaf,
+                                  const intermSummFPType totalWeights, const BinIndexType * binIndex) const;
 
-    typedef double intermSummFPType;
     template <typename BinIndexType>
-    void computeHistWithWeights(algorithmFPType * buf, IndexType iFeature, const IndexType * aIdx, const BinIndexType * binIndex, size_t n,
+    void computeHistWithWeights(intermSummFPType * buf, IndexType iFeature, const IndexType * aIdx, const BinIndexType * binIndex, size_t n,
                                 intermSummFPType & sumTotal) const;
     template <typename BinIndexType>
-    void computeHistWithoutWeights(algorithmFPType * buf, IndexType iFeature, const IndexType * aIdx, const BinIndexType * binIndex, size_t n,
+    void computeHistWithoutWeights(intermSummFPType * buf, IndexType iFeature, const IndexType * aIdx, const BinIndexType * binIndex, size_t n,
                                    intermSummFPType & sumTotal) const;
 
     template <bool noWeights, typename BinIndexType>
@@ -520,8 +535,8 @@ public:
         algorithmFPType val = this->predictionError(response, *y.get());
         if (oobBuf)
         {
-            ((RegErr<algorithmFPType, cpu> *)oobBuf)[iRow].value += response;
-            ((RegErr<algorithmFPType, cpu> *)oobBuf)[iRow].count++;
+            ((RegErr<intermSummFPType, cpu> *)oobBuf)[iRow].value += response;
+            ((RegErr<intermSummFPType, cpu> *)oobBuf)[iRow].count++;
         }
         return val;
     }
@@ -624,7 +639,7 @@ template <typename algorithmFPType, CpuType cpu, typename crtp>
 bool RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                      size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                      const ImpurityData & curImpurity, TSplitData & split,
-                                                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const
+                                                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const
 {
     const bool noWeights = !this->_weights;
     if (noWeights)
@@ -651,15 +666,15 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
 {
     DAAL_ASSERT(bestSplit.nLeft > 0);
     DAAL_ASSERT(bestSplit.leftWeights > 0.);
-    algorithmFPType divL = 1.;
-    int iRowSplitVal     = -1;
-    int iNext            = -1;
-    int idxNext          = this->_aResponse.size() - 1;
-    size_t iLeft         = 0;
-    size_t iRight        = 0;
+    intermSummFPType divL = 1.;
+    int iRowSplitVal      = -1;
+    int iNext             = -1;
+    int idxNext           = this->_aResponse.size() - 1;
+    size_t iLeft          = 0;
+    size_t iRight         = 0;
     if (noWeights)
     {
-        divL = algorithmFPType(1.) / algorithmFPType(bestSplit.nLeft);
+        divL = 1.0 / static_cast<intermSummFPType>(bestSplit.nLeft);
 
         bestSplit.left.mean *= divL;
         bestSplit.left.var            = 0;
@@ -679,8 +694,8 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
             {
                 if (idx == idxFeatureValueBestSplit) iRowSplitVal = aResponse[iSample].idx;
                 DAAL_ASSERT(iLeft < bestSplit.nLeft);
-                bestSplitIdx[iLeft++]   = iSample;
-                const algorithmFPType y = aResponse[iSample].val;
+                bestSplitIdx[iLeft++]    = iSample;
+                const intermSummFPType y = aResponse[iSample].val;
                 bestSplit.left.var += (y - bestSplit.left.mean) * (y - bestSplit.left.mean);
             }
             if ((idx > idxFeatureValueBestSplit) && (idxNext > idx))
@@ -692,7 +707,8 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
     }
     else
     {
-        divL = isZero<algorithmFPType, cpu>(bestSplit.leftWeights) ? algorithmFPType(1.) : (algorithmFPType(1.) / bestSplit.leftWeights);
+        divL =
+            isZero<intermSummFPType, cpu>(bestSplit.leftWeights) ? intermSummFPType(1) : (1.0 / static_cast<intermSummFPType>(bestSplit.leftWeights));
 
         bestSplit.left.mean *= divL;
         bestSplit.left.var            = 0;
@@ -713,9 +729,9 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
             {
                 if (idx == idxFeatureValueBestSplit) iRowSplitVal = aResponse[iSample].idx;
                 DAAL_ASSERT(iLeft < bestSplit.nLeft);
-                bestSplitIdx[iLeft++]   = iSample;
-                const algorithmFPType y = aResponse[iSample].val;
-                const algorithmFPType w = aWeights[iSample].val;
+                bestSplitIdx[iLeft++]    = iSample;
+                const intermSummFPType y = aResponse[iSample].val;
+                const intermSummFPType w = aWeights[iSample].val;
                 bestSplit.left.var += w * (y - bestSplit.left.mean) * (y - bestSplit.left.mean);
             }
             if ((idx > idxFeatureValueBestSplit) && (idxNext > idx))
@@ -741,7 +757,7 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
 
 template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <typename BinIndexType>
-void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithoutWeights(algorithmFPType * buf, IndexType iFeature, const IndexType * aIdx,
+void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithoutWeights(intermSummFPType * buf, IndexType iFeature, const IndexType * aIdx,
                                                                            const BinIndexType * binIndex, size_t n, intermSummFPType & sumTotal) const
 {
     auto nFeatIdx  = this->_idxFeatureBuf.get(); //number of indexed feature values, array
@@ -762,7 +778,7 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithoutWeights(algor
 
 template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <typename BinIndexType>
-void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithWeights(algorithmFPType * buf, IndexType iFeature, const IndexType * aIdx,
+void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithWeights(intermSummFPType * buf, IndexType iFeature, const IndexType * aIdx,
                                                                         const BinIndexType * binIndex, size_t n, intermSummFPType & sumTotal) const
 {
     auto nFeatIdx    = this->_idxFeatureBuf.get(); //number of indexed feature values, array
@@ -787,16 +803,16 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::computeHistWithWeights(algorith
 
 template <typename algorithmFPType, CpuType cpu, typename crtp>
 template <typename BinIndexType>
-int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(algorithmFPType * buf, IndexType iFeature, const IndexType * aIdx, size_t n,
-                                                                          size_t nMinSplitPart, const ImpurityData & curImpurity, TSplitData & split,
-                                                                          const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights,
-                                                                          const BinIndexType * binIndex) const
+int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(intermSummFPType * buf, IndexType iFeature, const IndexType * aIdx,
+                                                                          size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity,
+                                                                          TSplitData & split, const intermSummFPType minWeightLeaf,
+                                                                          const intermSummFPType totalWeights, const BinIndexType * binIndex) const
 {
-    const auto nDiffFeatMax = this->indexedFeatures().numIndices(iFeature);
+    const size_t nDiffFeatMax = this->indexedFeatures().numIndices(iFeature);
     this->_idxFeatureBuf.setValues(nDiffFeatMax, 0);
 
     //the buffer keeps sums of responses for each of unique feature values
-    for (size_t i = 0; i < nDiffFeatMax; ++i) buf[i] = algorithmFPType(0);
+    for (size_t i = 0; i < nDiffFeatMax; ++i) buf[i] = 0;
 
     const bool noWeights      = !this->_weights;
     intermSummFPType sumTotal = 0; //total sum of responses in the set being split
@@ -818,7 +834,7 @@ int RespHelperBase<algorithmFPType, cpu, crtp>::findSplitForFeatureSorted(algori
     }
     else
     {
-        this->_weightsFeatureBuf.setValues(nDiffFeatMax, algorithmFPType(0));
+        this->_weightsFeatureBuf.setValues(nDiffFeatMax, intermSummFPType(0));
         computeHistWithWeights(buf, iFeature, aIdx, binIndex, n, sumTotal);
 
         if (split.featureUnordered)
@@ -841,9 +857,9 @@ template <typename algorithmFPType, CpuType cpu>
 class OrderedRespHelperRandom : public RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >
 {
 public:
-    typedef double intermSummFPType;
-    using ImpurityData = typename RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >::ImpurityData;
-    using TSplitData   = typename RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >::TSplitData;
+    using intermSummFPType = typename RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >::intermSummFPType;
+    using ImpurityData     = typename RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >::ImpurityData;
+    using TSplitData       = typename RespHelperBase<algorithmFPType, cpu, OrderedRespHelperRandom<algorithmFPType, cpu> >::TSplitData;
 
 public:
     OrderedRespHelperRandom(const dtrees::internal::IndexedFeatures * indexedFeatures, size_t dummy,
@@ -854,18 +870,18 @@ public:
     size_t genRandomBinIdx(const IndexType iFeature, const size_t minidx, const size_t maxidx) const;
 
     template <bool noWeights, bool featureUnordered>
-    int findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, algorithmFPType * buf, size_t n, size_t nMinSplitPart,
-                            const ImpurityData & curImpurity, TSplitData & split, const algorithmFPType minWeightLeaf,
-                            const algorithmFPType totalWeights, const IndexType iFeature) const;
+    int findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, intermSummFPType * buf, size_t n, size_t nMinSplitPart,
+                            const ImpurityData & curImpurity, TSplitData & split, const intermSummFPType minWeightLeaf,
+                            const intermSummFPType totalWeights, const IndexType iFeature) const;
 
     template <bool noWeights>
     bool findBestSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                      const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                     const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                     const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
     template <bool noWeights>
     bool findBestSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n, size_t nMinSplitPart,
                                          const algorithmFPType accuracy, const ImpurityData & curImpurity, TSplitData & split,
-                                         const algorithmFPType minWeightLeaf, const algorithmFPType totalWeights) const;
+                                         const intermSummFPType minWeightLeaf, const intermSummFPType totalWeights) const;
 };
 
 template <typename algorithmFPType, CpuType cpu>
@@ -898,20 +914,20 @@ size_t OrderedRespHelperRandom<algorithmFPType, cpu>::genRandomBinIdx(const Inde
 
 template <typename algorithmFPType, CpuType cpu>
 template <bool noWeights, bool featureUnordered>
-int OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, algorithmFPType * buf,
+int OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitByHist(size_t nDiffFeatMax, intermSummFPType sumTotal, intermSummFPType * buf,
                                                                        size_t n, size_t nMinSplitPart, const ImpurityData & curImpurity,
-                                                                       TSplitData & split, const algorithmFPType minWeightLeaf,
-                                                                       const algorithmFPType totalWeights, const IndexType iFeature) const
+                                                                       TSplitData & split, const intermSummFPType minWeightLeaf,
+                                                                       const intermSummFPType totalWeights, const IndexType iFeature) const
 {
     auto featWeights = this->_weightsFeatureBuf.get();
     auto nFeatIdx    = this->_idxFeatureBuf.get(); //number of indexed feature values, array
 
     intermSummFPType bestImpDecreasePart =
         split.impurityDecrease < 0 ? -1 : (split.impurityDecrease + curImpurity.mean * curImpurity.mean) * totalWeights;
-    size_t nLeft                = 0;
-    algorithmFPType leftWeights = 0.;
-    intermSummFPType sumLeft    = 0;
-    int idxFeatureBestSplit     = -1; //index of best feature value in the array of sorted feature values
+    size_t nLeft                 = 0;
+    intermSummFPType leftWeights = 0.;
+    intermSummFPType sumLeft     = 0;
+    int idxFeatureBestSplit      = -1; //index of best feature value in the array of sorted feature values
 
     size_t minidx = 0;
     size_t maxidx = nDiffFeatMax - 1;
@@ -993,7 +1009,7 @@ int OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitByHist(size_t nD
 
         if (impDecreasePart > bestImpDecreasePart)
         {
-            split.left.mean     = algorithmFPType(sumLeft);
+            split.left.mean     = sumLeft;
             split.nLeft         = nLeft;
             split.leftWeights   = leftWeights;
             idxFeatureBestSplit = idx;
@@ -1015,18 +1031,18 @@ template <bool noWeights>
 bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(const algorithmFPType * featureVal, const IndexType * aIdx, size_t n,
                                                                                 size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                                 const ImpurityData & curImpurity, TSplitData & split,
-                                                                                const algorithmFPType minWeightLeaf,
-                                                                                const algorithmFPType totalWeights) const
+                                                                                const intermSummFPType minWeightLeaf,
+                                                                                const intermSummFPType totalWeights) const
 {
     ImpurityData left;
     ImpurityData right;
     IndexType iBest = -1;
-    algorithmFPType vBest;
-    algorithmFPType leftWeights = 0.;
-    auto aResponse              = this->_aResponse.get();
-    auto aWeights               = this->_aWeights.get();
+    intermSummFPType vBest;
+    intermSummFPType leftWeights = 0.;
+    auto aResponse               = this->_aResponse.get();
+    auto aWeights                = this->_aWeights.get();
     algorithmFPType idx;
-    vBest = split.impurityDecrease < 0 ? daal::services::internal::MaxVal<algorithmFPType>::get() :
+    vBest = split.impurityDecrease < 0 ? daal::services::internal::MaxVal<intermSummFPType>::get() :
                                          (curImpurity.var - split.impurityDecrease) * totalWeights;
     size_t i;
 
@@ -1068,8 +1084,8 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = 1; i < r; ++i)
         {
-            const double delta = this->_aResponse[aIdx[i]].val - left.mean; //x[i] - mean
-            left.mean += delta / double(i + 1);
+            const intermSummFPType delta = this->_aResponse[aIdx[i]].val - left.mean; //x[i] - mean
+            left.mean += delta / static_cast<intermSummFPType>(i + 1);
             left.var += delta * (this->_aResponse[aIdx[i]].val - left.mean);
             DAAL_ASSERT(left.var >= 0);
         }
@@ -1077,8 +1093,8 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = r + 1; i < n; ++i)
         {
-            const double delta = this->_aResponse[aIdx[i]].val - right.mean; //x[i] - mean
-            right.mean += delta / double(i + 1 - r);
+            const intermSummFPType delta = this->_aResponse[aIdx[i]].val - right.mean; //x[i] - mean
+            right.mean += delta / static_cast<intermSummFPType>(i + 1 - r);
             right.var += delta * (this->_aResponse[aIdx[i]].val - right.mean);
         }
 
@@ -1090,22 +1106,22 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = 1; i < r; ++i)
         {
-            const double weights = this->_aWeights[aIdx[i]].val;
-            const double delta   = this->_aResponse[aIdx[i]].val - left.mean; //x[i] - mean
+            const intermSummFPType weights = this->_aWeights[aIdx[i]].val;
+            const intermSummFPType delta   = this->_aResponse[aIdx[i]].val - left.mean; //x[i] - mean
             leftWeights += weights;
-            DAAL_ASSERT(!(isZero<double, cpu>(leftWeights)));
+            DAAL_ASSERT(!(isZero<intermSummFPType, cpu>(leftWeights)));
             left.mean += weights * delta / leftWeights;
             left.var += weights * delta * (this->_aResponse[aIdx[i]].val - left.mean);
         }
 
-        algorithmFPType rightWeights = this->_aWeights[aIdx[r]].val;
+        intermSummFPType rightWeights = this->_aWeights[aIdx[r]].val;
         PRAGMA_VECTOR_ALWAYS
         for (size_t i = r + 1; i < n; ++i)
         {
-            const double weights = this->_aWeights[aIdx[i]].val;
-            const double delta   = this->_aResponse[aIdx[i]].val - right.mean; //x[i] - mean
+            const intermSummFPType weights = this->_aWeights[aIdx[i]].val;
+            const intermSummFPType delta   = this->_aResponse[aIdx[i]].val - right.mean; //x[i] - mean
             rightWeights += weights;
-            DAAL_ASSERT(!(isZero<double, cpu>(rightWeights)));
+            DAAL_ASSERT(!(isZero<intermSummFPType, cpu>(rightWeights)));
             right.mean += weights * delta / rightWeights;
             right.var += weights * delta * (this->_aResponse[aIdx[i]].val - right.mean);
         }
@@ -1113,7 +1129,7 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(
 
     if (!((leftWeights < minWeightLeaf) || ((totalWeights - leftWeights) < minWeightLeaf)))
     {
-        const algorithmFPType v = left.var + right.var;
+        const intermSummFPType v = left.var + right.var;
         if (v < vBest)
         {
             vBest             = v;
@@ -1129,7 +1145,7 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitOrderedFeature(
     split.impurityDecrease = curImpurity.var - vBest / totalWeights;
     split.nLeft            = iBest;
     split.totalWeights     = totalWeights;
-    split.left.var /= (isPositive<algorithmFPType, cpu>(split.leftWeights) ? split.leftWeights : 1.);
+    split.left.var /= (isPositive<intermSummFPType, cpu>(split.leftWeights) ? split.leftWeights : 1.);
     split.iStart       = 0;
     split.featureValue = idx;
     return true;
@@ -1140,13 +1156,13 @@ template <bool noWeights>
 bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitCategoricalFeature(const algorithmFPType * featureVal, const IndexType * aIdx,
                                                                                     size_t n, size_t nMinSplitPart, const algorithmFPType accuracy,
                                                                                     const ImpurityData & curImpurity, TSplitData & split,
-                                                                                    const algorithmFPType minWeightLeaf,
-                                                                                    const algorithmFPType totalWeights) const
+                                                                                    const intermSummFPType minWeightLeaf,
+                                                                                    const intermSummFPType totalWeights) const
 {
     DAAL_ASSERT(n >= 2 * nMinSplitPart);
     ImpurityData left;
     ImpurityData right;
-    algorithmFPType vBest;
+    intermSummFPType vBest;
     bool bFound = false;
     //size_t nDiffFeatureValues = 0;
     auto aResponse      = this->_aResponse.get();
@@ -1181,10 +1197,10 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitCategoricalFeat
             i++;
             continue;
         }
-        size_t count                = 1;
-        firstVal                    = featureVal[i];
-        const size_t iStart         = i;
-        algorithmFPType leftWeights = aWeights[aIdx[i]].val;
+        size_t count                 = 1;
+        firstVal                     = featureVal[i];
+        const size_t iStart          = i;
+        intermSummFPType leftWeights = aWeights[aIdx[i]].val;
         for (++i; (i < n) && (featureVal[i] == firstVal); ++count, ++i)
         {
             leftWeights += aWeights[aIdx[i]].val;
@@ -1195,12 +1211,12 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitCategoricalFeat
 
         //if ((i == n) && (nDiffFeatureValues == 2) && bFound) break; //only 2 feature values, one possible split, already found
 
-        double weights = double(0);
+        intermSummFPType weights = 0;
         this->template calcImpurity<noWeights>(aIdx + iStart, count, left, weights);
         DAAL_ASSERT(fabs(weights - leftWeights) < 0.001);
-        subtractImpurity<double, cpu>(curImpurity.var, curImpurity.mean, left.var, left.mean, leftWeights, right.var, right.mean,
-                                      totalWeights - leftWeights);
-        const algorithmFPType v = leftWeights * left.var + (totalWeights - leftWeights) * right.var;
+        subtractImpurity<intermSummFPType, cpu>(curImpurity.var, curImpurity.mean, left.var, left.mean, leftWeights, right.var, right.mean,
+                                                totalWeights - leftWeights);
+        const intermSummFPType v = leftWeights * left.var + (totalWeights - leftWeights) * right.var;
         if (!bFound || v < vBest)
         {
             vBest              = v;
@@ -1215,7 +1231,7 @@ bool OrderedRespHelperRandom<algorithmFPType, cpu>::findBestSplitCategoricalFeat
     }
     if (bFound)
     {
-        const algorithmFPType impurityDecrease = curImpurity.var - vBest / (isPositive<algorithmFPType, cpu>(totalWeights) ? totalWeights : 1.);
+        const intermSummFPType impurityDecrease = curImpurity.var - vBest / (isPositive<intermSummFPType, cpu>(totalWeights) ? totalWeights : 1.);
         if (split.impurityDecrease < 0 || split.impurityDecrease < impurityDecrease)
         {
             split.impurityDecrease = impurityDecrease;
@@ -1234,6 +1250,7 @@ class TreeThreadCtx : public TreeThreadCtxBase<algorithmFPType, cpu>
 {
 public:
     typedef TreeThreadCtxBase<algorithmFPType, cpu> super;
+    using intermSummFPType = typename super::intermSummFPType;
     TreeThreadCtx(algorithmFPType * _varImp = nullptr) : super(_varImp) {}
     bool init(const decision_forest::training::Parameter & par, const NumericTable * x, size_t /*dummy*/)
     {
@@ -1242,7 +1259,7 @@ public:
         if (par.resultsToCompute
             & (computeOutOfBagError | computeOutOfBagErrorPerObservation | computeOutOfBagErrorR2 | computeOutOfBagErrorPrediction))
         {
-            size_t sz    = sizeof(RegErr<algorithmFPType, cpu>) * x->getNumberOfRows();
+            size_t sz    = sizeof(RegErr<intermSummFPType, cpu>) * x->getNumberOfRows();
             this->oobBuf = service_calloc<byte, cpu>(sz);
             DAAL_CHECK_STATUS_VAR(this->oobBuf);
         }
@@ -1254,8 +1271,8 @@ public:
         super::reduceTo(mode, other, nVars, nSamples);
         if (this->oobBuf)
         {
-            RegErr<algorithmFPType, cpu> * dst       = (RegErr<algorithmFPType, cpu> *)other.oobBuf;
-            const RegErr<algorithmFPType, cpu> * src = (const RegErr<algorithmFPType, cpu> *)this->oobBuf;
+            RegErr<intermSummFPType, cpu> * dst       = (RegErr<intermSummFPType, cpu> *)other.oobBuf;
+            const RegErr<intermSummFPType, cpu> * src = (const RegErr<intermSummFPType, cpu> *)this->oobBuf;
             for (size_t i = 0; i < nSamples; ++i) dst[i].add(src[i]);
         }
     }
@@ -1267,12 +1284,12 @@ public:
         const size_t nSamples = resp->getNumberOfRows();
         ReadRows<algorithmFPType, cpu> y(const_cast<NumericTable *>(resp), 0, nSamples);
         DAAL_CHECK_BLOCK_STATUS(y);
-        const algorithmFPType * py         = y.get();
-        size_t nPredicted                  = 0;
-        algorithmFPType _res               = 0;
-        algorithmFPType yMean              = 0;
-        algorithmFPType sumMeanDiff        = 0;
-        RegErr<algorithmFPType, cpu> * ptr = (RegErr<algorithmFPType, cpu> *)this->oobBuf;
+        const algorithmFPType * py          = y.get();
+        size_t nPredicted                   = 0;
+        intermSummFPType _res               = 0;
+        intermSummFPType yMean              = 0;
+        intermSummFPType sumMeanDiff        = 0;
+        RegErr<intermSummFPType, cpu> * ptr = (RegErr<intermSummFPType, cpu> *)this->oobBuf;
 
         PRAGMA_FORCE_SIMD
         PRAGMA_VECTOR_ALWAYS
@@ -1286,8 +1303,8 @@ public:
         {
             if (ptr[i].count)
             {
-                ptr[i].value /= algorithmFPType(ptr[i].count);
-                const algorithmFPType oobForObs = (ptr[i].value - py[i]) * (ptr[i].value - py[i]);
+                ptr[i].value /= static_cast<intermSummFPType>(ptr[i].count);
+                const intermSummFPType oobForObs = (ptr[i].value - py[i]) * (ptr[i].value - py[i]);
 
                 if (resPerObs) resPerObs[i] = oobForObs;
                 _res += oobForObs;
@@ -1298,11 +1315,11 @@ public:
             }
             else
             {
-                if (resPerObs) resPerObs[i] = algorithmFPType(-1); //was not in OOB set of any tree and hence not predicted
-                if (resPrediction) resPrediction[i] = algorithmFPType(0);
+                if (resPerObs) resPerObs[i] = intermSummFPType(-1); //was not in OOB set of any tree and hence not predicted
+                if (resPrediction) resPrediction[i] = intermSummFPType(0);
             }
         }
-        if (res && nPredicted > 0) *res = _res / algorithmFPType(nPredicted);
+        if (res && nPredicted > 0) *res = _res / static_cast<intermSummFPType>(nPredicted);
         if (resR2) *resR2 = 1 - _res / sumMeanDiff;
         return Status();
     }
diff --git a/cpp/daal/src/algorithms/dtrees/gbt/gbt_train_aux.i b/cpp/daal/src/algorithms/dtrees/gbt/gbt_train_aux.i
index 1cd1865dd91..19760c724ef 100644
--- a/cpp/daal/src/algorithms/dtrees/gbt/gbt_train_aux.i
+++ b/cpp/daal/src/algorithms/dtrees/gbt/gbt_train_aux.i
@@ -140,8 +140,9 @@ protected:
 template <typename algorithmFPType, CpuType cpu>
 struct gh
 {
-    algorithmFPType g; //gradient
-    algorithmFPType h; //hessian
+    typedef double intermSummFPType; // needed due to templating shared with forests
+    algorithmFPType g;               //gradient
+    algorithmFPType h;               //hessian
     gh() : g(0), h(0) {}
     gh(algorithmFPType _g, algorithmFPType _h) : g(_g), h(_h) {}
     gh(const gh & o) : g(o.g), h(o.h) {}

From 0b745bc24308d600cb8b22964458dc6069dcba96 Mon Sep 17 00:00:00 2001
From: david-cortes-intel <david.cortes@intel.com>
Date: Thu, 11 Sep 2025 09:42:40 -0700
Subject: [PATCH 2/5] make compiler happy

---
 .../df_classification_train_dense_default_impl.i           | 7 ++++---
 .../regression/df_regression_train_dense_default_impl.i    | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
index 5d8aa44e2c5..c6c5910fa80 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
@@ -56,13 +56,12 @@ template <typename algorithmFPType, CpuType cpu>
 class UnorderedRespHelperBest : public DataHelper<algorithmFPType, ClassIndexType, cpu>
 {
 public:
-    typedef double intermSummFPType;
     typedef DataHelper<algorithmFPType, ClassIndexType, cpu> super;
-    typedef typename dtrees::internal::TVector<intermSummFPType, cpu, dtrees::internal::ScalableAllocator<cpu> > Histogramm;
 
     struct ImpurityData
     {
-        typedef intermSummFPType intermSummFPType;
+        typedef double intermSummFPType;
+        typedef typename dtrees::internal::TVector<intermSummFPType, cpu, dtrees::internal::ScalableAllocator<cpu> > Histogramm;
         intermSummFPType var; //impurity is a variance
         Histogramm hist;
 
@@ -76,6 +75,8 @@ public:
         }
     };
 
+    using intermSummFPType = typename ImpurityData::intermSummFPType;
+    using Histogramm = typename ImpurityData::Histogramm;
     typedef SplitData<algorithmFPType, ImpurityData> TSplitData;
 
 public:
diff --git a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
index 8aebc6c25dc..ff4a630b010 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
@@ -118,17 +118,17 @@ class OrderedRespHelperBest : public DataHelper<algorithmFPType, algorithmFPType
 {
 public:
     typedef algorithmFPType TResponse;
-    typedef double intermSummFPType;
     typedef DataHelper<algorithmFPType, algorithmFPType, cpu> super;
 
     struct ImpurityData
     {
-        typedef intermSummFPType intermSummFPType;
+        typedef double intermSummFPType;
         intermSummFPType var; //impurity is a variance
         intermSummFPType mean;
         intermSummFPType value() const { return var; }
     };
 
+    using intermSummFPType = typename ImpurityData::intermSummFPType;
     typedef SplitData<algorithmFPType, ImpurityData> TSplitData;
 
 public:

From cafb956f93eed46630b3caf5aeb450a1011822c0 Mon Sep 17 00:00:00 2001
From: david-cortes-intel <david.cortes@intel.com>
Date: Thu, 11 Sep 2025 09:43:47 -0700
Subject: [PATCH 3/5] linter

---
 .../classification/df_classification_train_dense_default_impl.i | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
index c6c5910fa80..0e13ee1b175 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
@@ -76,7 +76,7 @@ public:
     };
 
     using intermSummFPType = typename ImpurityData::intermSummFPType;
-    using Histogramm = typename ImpurityData::Histogramm;
+    using Histogramm       = typename ImpurityData::Histogramm;
     typedef SplitData<algorithmFPType, ImpurityData> TSplitData;
 
 public:

From 289a0221ee542058a17bb507160cb52441874bd8 Mon Sep 17 00:00:00 2001
From: david-cortes-intel <david.cortes@intel.com>
Date: Fri, 10 Oct 2025 06:38:38 -0700
Subject: [PATCH 4/5] rename variables

---
 .../dtrees/forest/df_train_dense_default_impl.i        | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
index cb455b59a04..9747715fa0f 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i
@@ -596,10 +596,10 @@ protected:
         DAAL_ASSERT(iBuf < _nFeatureBufs);
         return _aFeatureBuf[iBuf].get();
     }
-    algorithmFPType * featureBuf_FPType() const
+    algorithmFPType * featureBufFPType() const
     {
-        this->_BufFPType.reset(this->_data->getNumberOfRows());
-        return this->_BufFPType.get();
+        this->_bufFPType.reset(this->_data->getNumberOfRows());
+        return this->_bufFPType.get();
     }
     IndexType * featureIndexBuf(size_t iBuf) const
     {
@@ -678,7 +678,7 @@ protected:
     typename DataHelper::TreeType _tree;
     mutable TVector<IndexType, cpu> _aSample;
     mutable TArray<intermSummFPTypeArray, cpu> _aFeatureBuf;
-    mutable TArray<algorithmFPType, cpu> _BufFPType;
+    mutable TArray<algorithmFPType, cpu> _bufFPType;
     mutable TArray<IndexTypeArray, cpu> _aFeatureIndexBuf;
 
     const NumericTable * _data;
@@ -1245,7 +1245,7 @@ NodeSplitResult TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, Hy
         }
         else
         {
-            algorithmFPType * featBuf = featureBuf_FPType() + iStart; //single thread
+            algorithmFPType * featBuf = featureBufFPType() + iStart; //single thread
             featureValuesToBuf(iFeature, featBuf, aIdx, n);
             if (featBuf[n - 1] - featBuf[0] <= _accuracy) //all values of the feature are the same
                 continue;

From defca94583063ece4e4bde41658b81bc0bf8f3e6 Mon Sep 17 00:00:00 2001
From: David Cortes <david.cortes@intel.com>
Date: Tue, 14 Oct 2025 11:54:56 +0200
Subject: [PATCH 5/5] missing changes

---
 .../df_classification_train_dense_default_impl.i   | 14 ++++++++------
 .../df_regression_train_dense_default_impl.i       |  3 +--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
index 8a3d49cbc2d..7af77495c0e 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/classification/df_classification_train_dense_default_impl.i
@@ -1332,8 +1332,9 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
         for (size_t iClass = 0; iClass < K; ++iClass) leftWeights += histLeft[iClass];
     }
 
-    if (!(((n - nLeft) < nMinSplitPart) || ((totalWeights - leftWeights) < minWeightLeaf) || (nLeft < nMinSplitPart) || (leftWeights < minWeightLeaf))
-        && leftWeights)
+    const intermSummFPType rightWeights = totalWeights - leftWeights;
+    if (!(((n - nLeft) < nMinSplitPart) || (rightWeights < minWeightLeaf) || (nLeft < nMinSplitPart) || (leftWeights < minWeightLeaf)) && leftWeights
+        && rightWeights > 0)
     {
         auto histTotal            = curImpurity.hist.get();
         intermSummFPType sumLeft  = 0;
@@ -1346,7 +1347,7 @@ int UnorderedRespHelperRandom<algorithmFPType, cpu>::findSplitFewClasses(int nDi
             sumRight += (histTotal[iClass] - histLeft[iClass]) * (histTotal[iClass] - histLeft[iClass]);
         }
 
-        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / (totalWeights - leftWeights);
+        const intermSummFPType decrease = sumLeft / leftWeights + sumRight / rightWeights;
         if (decrease > bestImpDecrease)
         {
             split.left.hist     = this->_histLeft;
@@ -1692,10 +1693,11 @@ Status TreeThreadCtx<algorithmFPType, cpu>::finalizeOOBError(const NumericTable
         }
         if (resPerObs) resPerObs[i] = algorithmFPType(maxIdx != classLabel);
     });
-    if (res) *res = nPredicted.get() ? algorithmFPType(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) : 0;
+    if (res) *res = nPredicted.get() ? static_cast<intermSummFPType>(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) : 0;
     if (resAccuracy)
-        *resAccuracy = nPredicted.get() ? algorithmFPType(1) - algorithmFPType(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) :
-                                          algorithmFPType(1);
+        *resAccuracy = nPredicted.get() ?
+                           intermSummFPType(1) - static_cast<intermSummFPType>(nError.get()) / static_cast<intermSummFPType>(nPredicted.get()) :
+                           intermSummFPType(1);
     return Status();
 }
 
diff --git a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
index 0b3375ebf03..93f81717662 100644
--- a/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
+++ b/cpp/daal/src/algorithms/dtrees/forest/regression/df_regression_train_dense_default_impl.i
@@ -724,8 +724,7 @@ void RespHelperBase<algorithmFPType, cpu, crtp>::finalizeBestSplit(const IndexTy
     }
     else
     {
-        divL =
-            isZero<intermSummFPType, cpu>(bestSplit.leftWeights) ? intermSummFPType(1) : (1.0 / static_cast<intermSummFPType>(bestSplit.leftWeights));
+        divL = isZero<intermSummFPType, cpu>(bestSplit.leftWeights) ? intermSummFPType(1) : (1.0 / bestSplit.leftWeights);
 
         bestSplit.left.mean *= divL;
         bestSplit.left.var            = 0;