[Cherry-Pick]Cp fit paddle26 (#1823)

RachelXu7 · web-flow · commit 521157e390aa · 2023-12-28T21:36:33.000+08:00
diff --git a/paddleslim/quant/advanced/gptq.py b/paddleslim/quant/advanced/gptq.py
@@ -106,8 +106,9 @@ def fasterquant(self,
         H = self.hessian
         del self.hessian
         dead = paddle.where(paddle.diag(H) == 0)
-        H[dead, dead] = 1
-        W[:, dead] = 0
+        if dead[0].shape[0] != 0:
+            H[dead, dead] = 1
+            W[:, dead] = 0
         del dead
         if actorder:
             perm = paddle.argsort(paddle.diag(H), descending=True)
@@ -122,9 +123,15 @@ def fasterquant(self,
         damp = percdamp * paddle.mean(paddle.diag(H))
         diag = paddle.arange(self.columns)
         H[diag, diag] += damp
-
-        H = paddle.inverse(H)
-        H = paddle.linalg.cholesky(H, upper=True)
+        try:
+            H = paddle.inverse(H)
+            H = paddle.linalg.cholesky(H, upper=True)
+        except:
+            print('We skip GPTQ this layer now.')
+            print(
+                'If you want GPTQ this layer, please try setting damp_percent larger or increasing the number of samples.'
+            )
+            return
         Hinv = H
 
         for i1 in range(0, self.columns, blocksize):
@@ -182,4 +189,4 @@ def fasterquant(self,
 
         self.quantized = True
         del H, Q, Hinv, W, Losses
-        paddle.device.cuda.empty_cache()
+        paddle.device.cuda.empty_cache()
diff --git a/paddleslim/quant/advanced/piecewise_search.py b/paddleslim/quant/advanced/piecewise_search.py
@@ -97,6 +97,8 @@ def search(self, layer_name, sampled_input, act_abs_max, weight):
                 mask_for_search = paddle.where(labels == centroids.argsort()[i],
                                                1., 0.)
                 mask_for_ones = paddle.where(mask_for_search == 0., 1., 0.)
+                mask_for_search = mask_for_search.cast(dtype)
+                mask_for_ones = mask_for_ones.cast(dtype)
 
                 while alpha <= alpha_max:
                     if alpha < 1:
@@ -125,6 +127,7 @@ def search(self, layer_name, sampled_input, act_abs_max, weight):
                     if smooth_scale_out is not None:
                         mask_for_ones_new = paddle.where(
                             smooth_scale_out == 0., 1., 0.)
+                        mask_for_ones_new = mask_for_ones_new.cast(dtype)
                         mask_for_ones *= mask_for_ones_new
                         smooth_scale_ = smooth_scale_out + smooth_scale
                         smooth_scale_tmp = smooth_scale_ + mask_for_ones