@@ -122,36 +122,89 @@ def _daal_tsne(self, P, n_samples, X_embedded):
122122
123123 return X_embedded
124124
125+ # Comment 2025-11-24: This appears to be a copy-paste from an earlier version of the original
126+ # scikit-learn with some modifications to make calls to oneDAL under a narrow subset of
127+ # allowed input parameters, copy-pastying the rest of the sklearn code when oneDAL is not
128+ # called. Note that the conditions checked here are out of synch with the latest sklearn by now.
129+ # An early 'is supported' check that offloads to stock sklearn was added later on, which results
130+ # in having a lot of dead code paths in this function that can be safely removed.
131+ # Note: this method is called from inside 'fit' from the base class in stock scikit-learn.
132+ # Hence, the offloading logic is different than in other classes, as falling back to 'fit'
133+ # from the base class would lead to a circular loop.
125134 def _fit (self , X , skip_num_points = 0 ):
126135 """Private function to fit the model using X as training data."""
127- if isinstance (self .init , str ) and self .init == "warn" :
128- warnings .warn (
129- "The default initialization in TSNE will change "
130- "from 'random' to 'pca' in 1.2." ,
131- FutureWarning ,
132- )
133- self ._init = "random"
136+
137+ _patching_status = PatchingConditionsChain ("sklearn.manifold.TSNE._tsne" )
138+ _patching_status .and_conditions (
139+ [
140+ (
141+ self .method == "barnes_hut" ,
142+ 'Used t-SNE method is not "barnes_hut" which is the only supported.' ,
143+ ),
144+ (self .n_components == 2 , "Number of components != 2." ),
145+ (self .verbose == 0 , "Verbose mode is set." ),
146+ (
147+ daal_check_version ((2021 , "P" , 600 )),
148+ "oneDAL version is lower than 2021.6." ,
149+ ),
150+ (
151+ not (
152+ isinstance (self .init , str ) and self .init == "pca" and issparse (X )
153+ ),
154+ "PCA initialization is not supported with sparse input matrices." ,
155+ ),
156+ # Note: these conditions below should result in errors, but stock scikit-learn
157+ # does not check for errors at this exact point. Hence, this offloads the erroring
158+ # out to the base class, wherever in the process they might be encountered.
159+ (
160+ np .isscalar (self .angle ) and self .angle > 0.0 and self .angle < 1.0 ,
161+ "'angle' must be between 0.0 - 1.0" ,
162+ ),
163+ (self .early_exaggeration >= 1.0 , "early_exaggeration must be at least 1" ),
164+ (
165+ (
166+ isinstance (self .init , str )
167+ and self .init
168+ in ["random" , "pca" ]
169+ + (
170+ ["warn" ]
171+ if sklearn_check_version ("1.0" )
172+ and not sklearn_check_version ("1.2" )
173+ else []
174+ )
175+ )
176+ or isinstance (self .init , np .ndarray ),
177+ "'init' must be 'exact', 'pca', or a numpy array." ,
178+ ),
179+ ]
180+ )
181+ _dal_ready = _patching_status .get_status (logs = True )
182+ if not _dal_ready :
183+ return super ()._fit (X , skip_num_points )
184+
185+ if sklearn_check_version ("1.0" ) and not sklearn_check_version ("1.2" ):
186+ if isinstance (self .init , str ) and self .init == "warn" :
187+ warnings .warn (
188+ "The default initialization in TSNE will change "
189+ "from 'random' to 'pca' in 1.2." ,
190+ FutureWarning ,
191+ )
192+ self ._init = "random"
193+ else :
194+ self ._init = self .init
134195 else :
135196 self ._init = self .init
136197
137- if isinstance (self ._init , str ) and self ._init == "pca" and issparse (X ):
138- raise TypeError (
139- "PCA initialization is currently not supported "
140- "with the sparse input matrix. Use "
141- 'init="random" instead.'
142- )
143-
144- if self .method not in ["barnes_hut" , "exact" ]:
145- raise ValueError ("'method' must be 'barnes_hut' or 'exact'" )
146- if self .angle < 0.0 or self .angle > 1.0 :
147- raise ValueError ("'angle' must be between 0.0 - 1.0" )
148- if self .learning_rate == "warn" :
149- warnings .warn (
150- "The default learning rate in TSNE will change "
151- "from 200.0 to 'auto' in 1.2." ,
152- FutureWarning ,
153- )
154- self ._learning_rate = 200.0
198+ if sklearn_check_version ("1.0" ) and not sklearn_check_version ("1.2" ):
199+ if self .learning_rate == "warn" :
200+ warnings .warn (
201+ "The default learning rate in TSNE will change "
202+ "from 200.0 to 'auto' in 1.2." ,
203+ FutureWarning ,
204+ )
205+ self ._learning_rate = 200.0
206+ else :
207+ self ._learning_rate = self .learning_rate
155208 else :
156209 self ._learning_rate = self .learning_rate
157210 if self ._learning_rate == "auto" :
@@ -227,28 +280,15 @@ def _fit(self, X, skip_num_points=0):
227280 "or provide the dense distance matrix."
228281 )
229282
230- if self .method == "barnes_hut" and self .n_components > 3 :
231- raise ValueError (
232- "'n_components' should be inferior to 4 for the "
233- "barnes_hut algorithm as it relies on "
234- "quad-tree or oct-tree."
235- )
236283 random_state = check_random_state (self .random_state )
237284
238- if self .early_exaggeration < 1.0 :
239- raise ValueError (
240- "early_exaggeration must be at least 1, but is {}" .format (
241- self .early_exaggeration
242- )
243- )
244-
245285 if not sklearn_check_version ("1.2" ):
246286 if self .n_iter < 250 :
247287 raise ValueError ("n_iter should be at least 250" )
248288
249289 n_samples = X .shape [0 ]
250290
251- neighbors_nn = None
291+ # neighbors_nn = None # <- unused variable in stock sklearn, commented out due to coverity
252292 if self .method == "exact" :
253293 # Retrieve the distance matrix, either using the precomputed one or
254294 # computing it.
@@ -278,9 +318,8 @@ def _fit(self, X, skip_num_points=0):
278318 "All distances should be positive, the " "metric given is not correct"
279319 )
280320
281- if (
282- self .metric != "euclidean"
283- and getattr (self , "square_distances" , True ) is True
321+ if self .metric != "euclidean" and (
322+ sklearn_check_version ("1.2" ) or self .square_distances is True
284323 ):
285324 distances **= 2
286325
@@ -339,15 +378,14 @@ def _fit(self, X, skip_num_points=0):
339378 # Free the memory used by the ball_tree
340379 del knn
341380
342- if (
343- getattr (self , "square_distances" , True ) is True
344- or self .metric == "euclidean"
381+ # knn return the euclidean distance but we need it squared
382+ # to be consistent with the 'exact' method. Note that the
383+ # the method was derived using the euclidean method as in the
384+ # input space. Not sure of the implication of using a different
385+ # metric.
386+ if sklearn_check_version ("1.2" ) or (
387+ self .square_distances is True or self .metric == "euclidean"
345388 ):
346- # knn return the euclidean distance but we need it squared
347- # to be consistent with the 'exact' method. Note that the
348- # the method was derived using the euclidean method as in the
349- # input space. Not sure of the implication of using a different
350- # metric.
351389 distances_nn .data **= 2
352390
353391 # compute the joint probability distribution for the input space
@@ -358,16 +396,23 @@ def _fit(self, X, skip_num_points=0):
358396 elif self ._init == "pca" :
359397 pca = PCA (
360398 n_components = self .n_components ,
361- svd_solver = "randomized" ,
362399 random_state = random_state ,
363400 )
401+ if sklearn_check_version ("1.2" ):
402+ # Always output a numpy array, no matter what is configured globally
403+ pca .set_output (transform = "default" )
364404 X_embedded = pca .fit_transform (X ).astype (np .float32 , copy = False )
365- warnings .warn (
366- "The PCA initialization in TSNE will change to "
367- "have the standard deviation of PC1 equal to 1e-4 "
368- "in 1.2. This will ensure better convergence." ,
369- FutureWarning ,
370- )
405+ if sklearn_check_version ("1.0" ) and not sklearn_check_version ("1.2" ):
406+ warnings .warn (
407+ "The PCA initialization in TSNE will change to "
408+ "have the standard deviation of PC1 equal to 1e-4 "
409+ "in 1.2. This will ensure better convergence." ,
410+ FutureWarning ,
411+ )
412+ if sklearn_check_version ("1.2" ):
413+ # PCA is rescaled so that PC1 has standard deviation 1e-4 which is
414+ # the default value for random initialization. See issue #18018.
415+ X_embedded = X_embedded / np .std (X_embedded [:, 0 ]) * 1e-4
371416 elif self ._init == "random" :
372417 # The embedding is initialized with iid samples from Gaussians with
373418 # standard deviation 1e-4.
@@ -377,40 +422,11 @@ def _fit(self, X, skip_num_points=0):
377422 else :
378423 raise ValueError ("'init' must be 'pca', 'random', or " "a numpy array" )
379424
380- # Degrees of freedom of the Student's t-distribution. The suggestion
381- # degrees_of_freedom = n_components - 1 comes from
382- # "Learning a Parametric Embedding by Preserving Local Structure"
383- # Laurens van der Maaten, 2009.
384- degrees_of_freedom = max (self .n_components - 1 , 1 )
385-
386- _patching_status = PatchingConditionsChain ("sklearn.manifold.TSNE._tsne" )
387- _patching_status .and_conditions (
388- [
389- (
390- self .method == "barnes_hut" ,
391- 'Used t-SNE method is not "barnes_hut" which is the only supported.' ,
392- ),
393- (self .n_components == 2 , "Number of components != 2." ),
394- (self .verbose == 0 , "Verbose mode is set." ),
395- (
396- daal_check_version ((2021 , "P" , 600 )),
397- "oneDAL version is lower than 2021.6." ,
398- ),
399- ]
400- )
401- _dal_ready = _patching_status .get_status (logs = True )
425+ # Note: by this point, stock sklearn would calculate degrees of freedom, but oneDAL
426+ # doesn't use them.
402427
403- if _dal_ready :
404- X_embedded = check_array (X_embedded , dtype = [np .float32 , np .float64 ])
405- return self ._daal_tsne (P , n_samples , X_embedded = X_embedded )
406- return self ._tsne (
407- P ,
408- degrees_of_freedom ,
409- n_samples ,
410- X_embedded = X_embedded ,
411- neighbors = neighbors_nn ,
412- skip_num_points = skip_num_points ,
413- )
428+ X_embedded = check_array (X_embedded , dtype = [np .float32 , np .float64 ])
429+ return self ._daal_tsne (P , n_samples , X_embedded = X_embedded )
414430
415431 fit .__doc__ = BaseTSNE .fit .__doc__
416432 fit_transform .__doc__ = BaseTSNE .fit_transform .__doc__
0 commit comments