Add block_size attribute for nf4 operator

wanghaoshuang · wanghaoshuang · commit a6f740444df9 · 2023-11-02T16:32:48.000+08:00
diff --git a/csrc/lc/nf4.cu b/csrc/lc/nf4.cu
@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
     }
 }
 
-
-
-
 PD_BUILD_OP(quantize_nf4)
     .Inputs({"input"})
     .Outputs({"out", "abs_max"})
+    .Attrs({"block_size: int"})
     .SetKernelFn(PD_KERNEL(QuantizeNF4));
diff --git a/paddleslim/lc/quantizers/nf4.py b/paddleslim/lc/quantizers/nf4.py
@@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False):
         self.double_quant_scale = None
 
     def quantize(self, x: paddle.Tensor):
-        out, abs_max = paddleslim_ops.quantize_nf4(x)
+        out, abs_max = paddleslim_ops.quantize_nf4(
+            x, block_size=self.block_size)
         self.quant_scale = abs_max
         return out
 

Original file line number	Diff line number	Diff line change
`@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s`
`226`	`226`	`}`
`227`	`227`	`}`
`228`	`228`
`229`		`-`
`230`		`-`
`231`		`-`
`232`	`229`	`PD_BUILD_OP(quantize_nf4)`
`233`	`230`	`.Inputs({"input"})`
`234`	`231`	`.Outputs({"out", "abs_max"})`
	`232`	`+ .Attrs({"block_size: int"})`
`235`	`233`	`.SetKernelFn(PD_KERNEL(QuantizeNF4));`