@@ -119,6 +119,126 @@ def intercept_update_step(self, y, Xw):
119
119
return np .mean (Xw - y )
120
120
121
121
122
+ class WeightedQuadratic (BaseDatafit ):
123
+ r"""Weighted Quadratic datafit to handle sample weights.
124
+
125
+ The datafit reads:
126
+
127
+ .. math:: 1 / (2 xx \sum_(i=1)^(n_"samples") weights_i)
128
+ \sum_(i=1)^(n_"samples") weights_i (y_i - (Xw)_i)^ 2
129
+
130
+ Attributes
131
+ ----------
132
+ Xtwy : array, shape (n_features,)
133
+ Pre-computed quantity used during the gradient evaluation.
134
+ Equal to ``X.T @ (samples_weights * y)``.
135
+ sample_weights : array, shape (n_samples,)
136
+ Weights for each sample.
137
+
138
+ Note
139
+ ----
140
+ The class is jit compiled at fit time using Numba compiler.
141
+ This allows for faster computations.
142
+ """
143
+
144
+ def __init__ (self , sample_weights ):
145
+ self .sample_weights = sample_weights
146
+
147
+ def get_spec (self ):
148
+ spec = (
149
+ ('Xtwy' , float64 [:]),
150
+ ('sample_weights' , float64 [:]),
151
+ )
152
+ return spec
153
+
154
+ def params_to_dict (self ):
155
+ return {'sample_weights' : self .sample_weights }
156
+
157
+ def get_lipschitz (self , X , y ):
158
+ n_features = X .shape [1 ]
159
+ lipschitz = np .zeros (n_features , dtype = X .dtype )
160
+ w_sum = self .sample_weights .sum ()
161
+
162
+ for j in range (n_features ):
163
+ lipschitz [j ] = (self .sample_weights * X [:, j ] ** 2 ).sum () / w_sum
164
+
165
+ return lipschitz
166
+
167
+ def get_lipschitz_sparse (self , X_data , X_indptr , X_indices , y ):
168
+ n_features = len (X_indptr ) - 1
169
+ lipschitz = np .zeros (n_features , dtype = X_data .dtype )
170
+ w_sum = self .sample_weights .sum ()
171
+
172
+ for j in range (n_features ):
173
+ nrm2 = 0.
174
+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
175
+ nrm2 += self .sample_weights [X_indices [idx ]] * X_data [idx ] ** 2
176
+
177
+ lipschitz [j ] = nrm2 / w_sum
178
+
179
+ return lipschitz
180
+
181
+ def initialize (self , X , y ):
182
+ self .Xtwy = X .T @ (self .sample_weights * y )
183
+
184
+ def initialize_sparse (self , X_data , X_indptr , X_indices , y ):
185
+ n_features = len (X_indptr ) - 1
186
+ self .Xty = np .zeros (n_features , dtype = X_data .dtype )
187
+
188
+ for j in range (n_features ):
189
+ xty = 0
190
+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
191
+ xty += (X_data [idx ] * self .sample_weights [X_indices [idx ]]
192
+ * y [X_indices [idx ]])
193
+ self .Xty [j ] = xty
194
+
195
+ def get_global_lipschitz (self , X , y ):
196
+ w_sum = self .sample_weights .sum ()
197
+ return norm (X .T @ np .sqrt (self .sample_weights ), ord = 2 ) ** 2 / w_sum
198
+
199
+ def get_global_lipschitz_sparse (self , X_data , X_indptr , X_indices , y ):
200
+ return spectral_norm (
201
+ X_data * np .sqrt (self .sample_weights [X_indices ]),
202
+ X_indptr , X_indices , len (y )) ** 2 / self .sample_weights .sum ()
203
+
204
+ def value (self , y , w , Xw ):
205
+ w_sum = self .sample_weights .sum ()
206
+ return np .sum (self .sample_weights * (y - Xw ) ** 2 ) / (2 * w_sum )
207
+
208
+ def gradient_scalar (self , X , y , w , Xw , j ):
209
+ return (X [:, j ] @ (self .sample_weights * (Xw - y ))) / self .sample_weights .sum ()
210
+
211
+ def gradient_scalar_sparse (self , X_data , X_indptr , X_indices , y , Xw , j ):
212
+ XjTXw = 0.
213
+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
214
+ XjTXw += X_data [i ] * self .sample_weights [X_indices [i ]] * Xw [X_indices [i ]]
215
+ return (XjTXw - self .Xty [j ]) / self .sample_weights .sum ()
216
+
217
+ def gradient (self , X , y , Xw ):
218
+ return X .T @ (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
219
+
220
+ def raw_grad (self , y , Xw ):
221
+ return (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
222
+
223
+ def raw_hessian (self , y , Xw ):
224
+ return self .sample_weights / self .sample_weights .sum ()
225
+
226
+ def full_grad_sparse (self , X_data , X_indptr , X_indices , y , Xw ):
227
+ n_features = X_indptr .shape [0 ] - 1
228
+ grad = np .zeros (n_features , dtype = Xw .dtype )
229
+
230
+ for j in range (n_features ):
231
+ XjTXw = 0.
232
+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
233
+ XjTXw += (X_data [i ] * self .sample_weights [X_indices [i ]]
234
+ * Xw [X_indices [i ]])
235
+ grad [j ] = (XjTXw - self .Xty [j ]) / self .sample_weights .sum ()
236
+ return grad
237
+
238
+ def intercept_update_step (self , y , Xw ):
239
+ return np .sum (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
240
+
241
+
122
242
@njit
123
243
def sigmoid (x ):
124
244
"""Vectorwise sigmoid."""
0 commit comments