Skip to content

Commit a6f7404

Browse files
committed
Add block_size attribute for nf4 operator
1 parent 3ce762b commit a6f7404

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

csrc/lc/nf4.cu

+1-3
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
226226
}
227227
}
228228

229-
230-
231-
232229
PD_BUILD_OP(quantize_nf4)
233230
.Inputs({"input"})
234231
.Outputs({"out", "abs_max"})
232+
.Attrs({"block_size: int"})
235233
.SetKernelFn(PD_KERNEL(QuantizeNF4));

paddleslim/lc/quantizers/nf4.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False):
1414
self.double_quant_scale = None
1515

1616
def quantize(self, x: paddle.Tensor):
17-
out, abs_max = paddleslim_ops.quantize_nf4(x)
17+
out, abs_max = paddleslim_ops.quantize_nf4(
18+
x, block_size=self.block_size)
1819
self.quant_scale = abs_max
1920
return out
2021

0 commit comments

Comments
 (0)