@@ -42,11 +42,9 @@ def test_build_chatbot_with_AMP(self):
4242 print ("\n " )
4343 self .assertIsNotNone (response )
4444
45- def test_build_chatbot_with_llm_runtime (self ):
46- loading_config = LoadingModelConfig (use_llm_runtime = True )
45+ def test_build_chatbot_with_weight_only_quant (self ):
4746 config = PipelineConfig (model_name_or_path = "facebook/opt-125m" ,
48- optimization_config = WeightOnlyQuantConfig (compute_dtype = "int8" , weight_dtype = "int8" ),
49- loading_config = loading_config
47+ optimization_config = WeightOnlyQuantizationConfig ()
5048 )
5149 chatbot = build_chatbot (config )
5250 self .assertIsNotNone (chatbot )
@@ -72,18 +70,5 @@ def test_build_chatbot_with_bitsandbytes_quant(self):
7270 print (response )
7371 self .assertIsNotNone (response )
7472
75- # run this case will cause core dump
76- # def test_build_chatbot_with_weight_only_quant(self):
77- # loading_config = LoadingModelConfig(use_llm_runtime=False)
78- # config = PipelineConfig(model_name_or_path="facebook/opt-125m",
79- # optimization_config=WeightOnlyQuantConfig(compute_dtype="fp32", weight_dtype="int4_fullrange"),
80- # loading_config=loading_config
81- # )
82- # chatbot = build_chatbot(config)
83- # self.assertIsNotNone(chatbot)
84- # response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.")
85- # print(response)
86- # self.assertIsNotNone(response)
87-
8873if __name__ == '__main__' :
8974 unittest .main ()
0 commit comments