Skip to content

Commit 7cac65d

Browse files
committed
Add block_size attribute for nf4 operator
1 parent 3ce762b commit 7cac65d

File tree

2 files changed

+20
-21
lines changed

2 files changed

+20
-21
lines changed

csrc/lc/nf4.cu

+18-20
Original file line numberDiff line numberDiff line change
@@ -185,23 +185,23 @@ std::vector<paddle::Tensor> LaunchQuantizeNF4(const paddle::Tensor& input, int
185185

186186
auto abs_max = paddle::full({num_blocks}, 1, paddle::DataType::FLOAT32, input.place());
187187

188-
const DataType_ *in_ptr = reinterpret_cast<const DataType_*>(input.data<data_t>());
189-
unsigned char *out_ptr = output.mutable_data<unsigned char>();
190-
float *abs_max_ptr = abs_max.mutable_data<float>();
191-
192-
if(block_size == 2048) {
193-
kQuantizeBlockwiseNF4<DataType_, 2048, 4><<<num_blocks, 512>>>(in_ptr, abs_max_ptr, out_ptr, n);
194-
} else if(block_size == 1024) {
195-
kQuantizeBlockwiseNF4<DataType_, 1024, 4><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
196-
} else if(block_size == 512) {
197-
kQuantizeBlockwiseNF4<DataType_, 512, 2><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
198-
} else if(block_size == 256) {
199-
kQuantizeBlockwiseNF4<DataType_, 256, 2><<<num_blocks, 128>>>(in_ptr, abs_max_ptr, out_ptr, n);
200-
} else if(block_size == 128) {
201-
kQuantizeBlockwiseNF4<DataType_, 128, 2><<<num_blocks, 64>>>(in_ptr, abs_max_ptr, out_ptr, n);
202-
} else if(block_size == 64) {
203-
kQuantizeBlockwiseNF4<DataType_, 64, 2><<<num_blocks, 32>>>(in_ptr, abs_max_ptr, out_ptr, n);
204-
}
188+
// const DataType_ *in_ptr = reinterpret_cast<const DataType_*>(input.data<data_t>());
189+
// unsigned char *out_ptr = output.mutable_data<unsigned char>();
190+
// float *abs_max_ptr = abs_max.mutable_data<float>();
191+
192+
// if(block_size == 2048) {
193+
// kQuantizeBlockwiseNF4<DataType_, 2048, 4><<<num_blocks, 512>>>(in_ptr, abs_max_ptr, out_ptr, n);
194+
// } else if(block_size == 1024) {
195+
// kQuantizeBlockwiseNF4<DataType_, 1024, 4><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
196+
// } else if(block_size == 512) {
197+
// kQuantizeBlockwiseNF4<DataType_, 512, 2><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
198+
// } else if(block_size == 256) {
199+
// kQuantizeBlockwiseNF4<DataType_, 256, 2><<<num_blocks, 128>>>(in_ptr, abs_max_ptr, out_ptr, n);
200+
// } else if(block_size == 128) {
201+
// kQuantizeBlockwiseNF4<DataType_, 128, 2><<<num_blocks, 64>>>(in_ptr, abs_max_ptr, out_ptr, n);
202+
// } else if(block_size == 64) {
203+
// kQuantizeBlockwiseNF4<DataType_, 64, 2><<<num_blocks, 32>>>(in_ptr, abs_max_ptr, out_ptr, n);
204+
// }
205205
return {output, abs_max};
206206
}
207207

@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
226226
}
227227
}
228228

229-
230-
231-
232229
PD_BUILD_OP(quantize_nf4)
233230
.Inputs({"input"})
234231
.Outputs({"out", "abs_max"})
232+
.Attrs({"block_size: int"})
235233
.SetKernelFn(PD_KERNEL(QuantizeNF4));

paddleslim/lc/quantizers/nf4.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False):
1414
self.double_quant_scale = None
1515

1616
def quantize(self, x: paddle.Tensor):
17-
out, abs_max = paddleslim_ops.quantize_nf4(x)
17+
out, abs_max = paddleslim_ops.quantize_nf4(
18+
x, block_size=self.block_size)
1819
self.quant_scale = abs_max
1920
return out
2021

0 commit comments

Comments
 (0)