@@ -93,7 +93,7 @@ def test_teq_detect_absorb_layers(self):
9393 "transformer.h.0.mlp.fc_in" : {"bits" : 8 , "group_size" : - 1 , "scheme" : "sym" },
9494 "transformer.h.0.mlp.fc_out" : {"bits" : 4 , "group_size" : 32 , "scheme" : "asym" },
9595 }
96- quantizer = TEQuantizer (quant_config = weight_config , folding = True , example_inputs = example_inputs )
96+ quantizer = TEQuantizer (quant_config = weight_config , folding = False , example_inputs = example_inputs )
9797 model = quantizer .quantize (copy .deepcopy (self .gptj ), run_fn = train )
9898 out1 = model (test_input )
9999 self .assertTrue (torch .allclose (out1 [0 ], out0 [0 ], atol = 0.03 ))
@@ -106,13 +106,14 @@ def test_teq(self):
106106
107107 weight_config = {
108108 # 'op_name': (bit, group_size, scheme)
109- "transformer.h.0.mlp.fc_in" : {"bits" : 8 , "group_size" : - 1 , "scheme" : "sym" },
109+ "transformer.h.0.mlp.fc_in" : {"bits" : 4 , "group_size" : - 1 , "scheme" : "sym" },
110110 "transformer.h.0.mlp.fc_out" : {"bits" : 4 , "group_size" : 32 , "scheme" : "asym" },
111111 }
112- absorb_dict = {"transformer.h.0.mlp.fc_in" : ["transformer.h.0.mlp.fc_out" ]}
112+ # absorb_dict = {"transformer.h.0.mlp.fc_in": ["transformer.h.0.mlp.fc_out"]}
113+ absorb_dict = None
113114
114115 quantizer = TEQuantizer (
115- quant_config = weight_config , folding = True , absorb_to_layer = absorb_dict , example_inputs = example_inputs
116+ quant_config = weight_config , folding = False , absorb_to_layer = absorb_dict , example_inputs = example_inputs
116117 )
117118 model = quantizer .quantize (copy .deepcopy (self .gptj ), run_fn = train )
118119 out1 = model (test_input )
@@ -129,16 +130,17 @@ def test_teq(self):
129130 "bits" : 8 ,
130131 "group_size" : - 1 ,
131132 "use_sym" : True ,
132- "folding" : True ,
133- "absorb_to_layer" : {"transformer.h.0.mlp.fc_in" : ["transformer.h.0.mlp.fc_out" ]},
133+ "folding" : False ,
134+ # "absorb_to_layer": {"transformer.h.0.mlp.fc_in": ["transformer.h.0.mlp.fc_out"]},
135+ "absorb_to_layer" : {"transformer.h.0.mlp.fc_in" : ["transformer.h.0.mlp.fc_in" ]},
134136 },
135137 "transformer.h.0.mlp.fc_out" : {
136138 "dtype" : "int" ,
137139 "bits" : 4 ,
138140 "group_size" : 32 ,
139141 "use_sym" : False ,
140- "folding" : True ,
141- "absorb_to_layer" : {"transformer.h.0.mlp.fc_in " : ["transformer.h.0.mlp.fc_out" ]},
142+ "folding" : False ,
143+ "absorb_to_layer" : {"transformer.h.0.mlp.fc_out " : ["transformer.h.0.mlp.fc_out" ]},
142144 },
143145 },
144146 }
0 commit comments