Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 4870a78

Browse files
committed
recover neuralchat ut
1 parent 1d45b42 commit 4870a78

File tree

1 file changed

+2
-17
lines changed

1 file changed

+2
-17
lines changed

intel_extension_for_transformers/neural_chat/tests/optimization/test_optimization.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,9 @@ def test_build_chatbot_with_AMP(self):
4242
print("\n")
4343
self.assertIsNotNone(response)
4444

45-
def test_build_chatbot_with_llm_runtime(self):
46-
loading_config = LoadingModelConfig(use_llm_runtime=True)
45+
def test_build_chatbot_with_weight_only_quant(self):
4746
config = PipelineConfig(model_name_or_path="facebook/opt-125m",
48-
optimization_config=WeightOnlyQuantConfig(compute_dtype="int8", weight_dtype="int8"),
49-
loading_config=loading_config
47+
optimization_config=WeightOnlyQuantizationConfig()
5048
)
5149
chatbot = build_chatbot(config)
5250
self.assertIsNotNone(chatbot)
@@ -72,18 +70,5 @@ def test_build_chatbot_with_bitsandbytes_quant(self):
7270
print(response)
7371
self.assertIsNotNone(response)
7472

75-
# run this case will cause core dump
76-
# def test_build_chatbot_with_weight_only_quant(self):
77-
# loading_config = LoadingModelConfig(use_llm_runtime=False)
78-
# config = PipelineConfig(model_name_or_path="facebook/opt-125m",
79-
# optimization_config=WeightOnlyQuantConfig(compute_dtype="fp32", weight_dtype="int4_fullrange"),
80-
# loading_config=loading_config
81-
# )
82-
# chatbot = build_chatbot(config)
83-
# self.assertIsNotNone(chatbot)
84-
# response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.")
85-
# print(response)
86-
# self.assertIsNotNone(response)
87-
8873
if __name__ == '__main__':
8974
unittest.main()

0 commit comments

Comments
 (0)