Add prototype of QLoRA

wanghaoshuang · wanghaoshuang · commit d970e20197ef · 2023-10-27T11:46:27.000+08:00
diff --git a/paddleslim/lc/__init__.py b/paddleslim/lc/__init__.py
diff --git a/paddleslim/lc/layers/__init__.py b/paddleslim/lc/layers/__init__.py
diff --git a/paddleslim/lc/layers/linear.py b/paddleslim/lc/layers/linear.py
@@ -0,0 +1,20 @@
+import paddle
+import paddle.nn as nn
+
+
+class WeightQuantizationLinear(nn.Layer):
+    def __init__(
+            self,
+            linear: paddle.nn.Linear, ):
+        super().__init__()
+        self.in_features = linear.weight.shape[0]
+        self.out_features = linear.weight.shape[1]
+        self.dtype = linear.dtype
+        self.weight_name = linear.weight.name
+        self.quant_weight_name = ".".join([self.weight_name, "quant_weight"])
+
+    def forward(self, x):
+        raise NotImplementedError()
+
+    def quantize(self, weight) -> paddle.Tensor:
+        raise NotImplementedError()
diff --git a/paddleslim/lc/layers/nf4_linear.py b/paddleslim/lc/layers/nf4_linear.py
@@ -0,0 +1,57 @@
+import paddle
+import paddle.nn as nn
+from paddleslim.lc.quantizers import NF4Quantizer
+from .linear import WeightQuantizationLinear
+
+
+class NF4Linear(WeightQuantizationLinear):
+    quant_dtype = "int4"
+    weight_dtype = "int8"
+
+    def __init__(
+            self,
+            linear: nn.Linear,
+            block_size=64,
+            double_quant=False, ):
+        super(NF4Linear, self).__init__(linear)
+        self.block_size = block_size
+        self.double_quant = double_quant
+        self.quantizer = NF4Quantizer(block_size, double_quant)
+        # PaddlePaddle dosen't support Int4 data type, one Int8 data represents two Int4 data.
+        self.quant_weight = self.create_parameter(
+            shape=[self.out_features // 2, self.in_features],
+            attr=paddle.ParamAttr(self.quant_weight_name),
+            dtype=NF4Linear.weight_dtype,
+            is_bias=False, )
+
+        self.quant_scale_name = ".".join([self.weight_name, "quant_scale"])
+        self.quant_scale = self.create_parameter(
+            shape=[self.out_features],
+            attr=paddle.ParamAttr(self.quant_scale_name),
+            dtype="float32",  # to be fixed
+            is_bias=False, )
+        if self.double_quant:
+            self.double_quant_scale_name = ".".join(
+                [self.weight_name, "double_quant_scale"])
+            self.double_quant_scale = self.create_parameter(
+                shape=[self.out_features],
+                attr=paddle.ParamAttr(self.double_quant_scale_name),
+                dtype="float32",
+                is_bias=False, )
+
+    def quantize(self, weight):
+        quantized_weight = self.quantizer.quantize(weight)
+        #self.set_state_dict({self.quant_weight_name: quantized_weight})
+        self.quant_weight.set_value(quantized_weight)
+        #self.set_state_dict({self.quant_scale_name: self.quantizer.quant_scale})
+        self.quant_scale.set_value(self.quantizer.quant_scale)
+        if self.double_quant:
+            #self.set_state_dict({self.double_quant_scale_name: self.quantizer.double_quant_scale})
+            self.double_quant_scale.set_value(self.quantizer.double_quant_scale)
+        return quantized_weight
+
+    def forward(self, x):
+        self.quantizer.quant_scale = self.state_dict[self.quant_scale_name]
+        self.quantizer.double_quant_scale = self.state_dict[
+            self.double_quant_scale_name]
+        return self.quantizer.matmul(x, self.quant_weight)
diff --git a/paddleslim/lc/quantizers/__init__.py b/paddleslim/lc/quantizers/__init__.py
@@ -0,0 +1 @@
+from .nf4 import NF4Quantizer
diff --git a/paddleslim/lc/quantizers/base_quantizer.py b/paddleslim/lc/quantizers/base_quantizer.py
@@ -0,0 +1,12 @@
+import paddle
+
+
+class BaseQuantizer():
+    def quantize(self, x: paddle.Tensor):
+        raise NotImplementedError()
+
+    def dequantize(self, x: paddle.Tensor):
+        raise NotImplementedError()
+
+    def matmul(self, x: paddle.Tensor, y: paddle.Tensor, bias: paddle.Tensor):
+        raise NotImplementedError()
diff --git a/paddleslim/lc/quantizers/nf4.py b/paddleslim/lc/quantizers/nf4.py
@@ -0,0 +1,22 @@
+import paddle
+from .base_quantizer import BaseQuantizer
+
+
+class NF4Quantizer(BaseQuantizer):
+    dtype = "int4"
+
+    def __init__(self, block_size=64, double_quant=False):
+        super(BaseQuantizer, self).__init__()
+        self.block_size = block_size
+        self.double_quant = double_quant
+        self.quant_scale = None
+        self.double_quant_scale = None
+
+    def quantize(self, x: paddle.Tensor):
+        return x
+
+    def dequantize(self, x: paddle.Tensor):
+        return x
+
+    def matmul(self, x: paddle.Tensor, y: paddle.Tensor, bias: paddle.Tensor):
+        return x @ self.dequantize(y) + bias