@@ -185,23 +185,23 @@ std::vector<paddle::Tensor> LaunchQuantizeNF4(const paddle::Tensor& input, int
185
185
186
186
auto abs_max = paddle::full ({num_blocks}, 1 , paddle::DataType::FLOAT32, input.place ());
187
187
188
- const DataType_ *in_ptr = reinterpret_cast <const DataType_*>(input.data <data_t >());
189
- unsigned char *out_ptr = output.mutable_data <unsigned char >();
190
- float *abs_max_ptr = abs_max.mutable_data <float >();
191
-
192
- if (block_size == 2048 ) {
193
- kQuantizeBlockwiseNF4 <DataType_, 2048 , 4 ><<<num_blocks, 512 >>> (in_ptr, abs_max_ptr, out_ptr, n);
194
- } else if (block_size == 1024 ) {
195
- kQuantizeBlockwiseNF4 <DataType_, 1024 , 4 ><<<num_blocks, 256 >>> (in_ptr, abs_max_ptr, out_ptr, n);
196
- } else if (block_size == 512 ) {
197
- kQuantizeBlockwiseNF4 <DataType_, 512 , 2 ><<<num_blocks, 256 >>> (in_ptr, abs_max_ptr, out_ptr, n);
198
- } else if (block_size == 256 ) {
199
- kQuantizeBlockwiseNF4 <DataType_, 256 , 2 ><<<num_blocks, 128 >>> (in_ptr, abs_max_ptr, out_ptr, n);
200
- } else if (block_size == 128 ) {
201
- kQuantizeBlockwiseNF4 <DataType_, 128 , 2 ><<<num_blocks, 64 >>> (in_ptr, abs_max_ptr, out_ptr, n);
202
- } else if (block_size == 64 ) {
203
- kQuantizeBlockwiseNF4 <DataType_, 64 , 2 ><<<num_blocks, 32 >>> (in_ptr, abs_max_ptr, out_ptr, n);
204
- }
188
+ // const DataType_ *in_ptr = reinterpret_cast<const DataType_*>(input.data<data_t>());
189
+ // unsigned char *out_ptr = output.mutable_data<unsigned char>();
190
+ // float *abs_max_ptr = abs_max.mutable_data<float>();
191
+
192
+ // if(block_size == 2048) {
193
+ // kQuantizeBlockwiseNF4<DataType_, 2048, 4><<<num_blocks, 512>>>(in_ptr, abs_max_ptr, out_ptr, n);
194
+ // } else if(block_size == 1024) {
195
+ // kQuantizeBlockwiseNF4<DataType_, 1024, 4><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
196
+ // } else if(block_size == 512) {
197
+ // kQuantizeBlockwiseNF4<DataType_, 512, 2><<<num_blocks, 256>>>(in_ptr, abs_max_ptr, out_ptr, n);
198
+ // } else if(block_size == 256) {
199
+ // kQuantizeBlockwiseNF4<DataType_, 256, 2><<<num_blocks, 128>>>(in_ptr, abs_max_ptr, out_ptr, n);
200
+ // } else if(block_size == 128) {
201
+ // kQuantizeBlockwiseNF4<DataType_, 128, 2><<<num_blocks, 64>>>(in_ptr, abs_max_ptr, out_ptr, n);
202
+ // } else if(block_size == 64) {
203
+ // kQuantizeBlockwiseNF4<DataType_, 64, 2><<<num_blocks, 32>>>(in_ptr, abs_max_ptr, out_ptr, n);
204
+ // }
205
205
return {output, abs_max};
206
206
}
207
207
@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
226
226
}
227
227
}
228
228
229
-
230
-
231
-
232
229
PD_BUILD_OP (quantize_nf4)
233
230
.Inputs({" input" })
234
231
.Outputs({" out" , " abs_max" })
232
+ .Attrs({" block_size: int" })
235
233
.SetKernelFn(PD_KERNEL(QuantizeNF4));
0 commit comments