Skip to content

Commit 08ec908

Browse files
Remove itrex dependency for 2x example (#2024)
Signed-off-by: Kaihui-intel <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent d9377b8 commit 08ec908

File tree

14 files changed

+44
-25
lines changed

14 files changed

+44
-25
lines changed

Diff for: examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def replace_architectures(json_path):
197197
json.dump(data, file, indent=4)
198198

199199
def eval_func(model):
200-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
200+
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
201201

202202
model_dir = model
203203
if isinstance(model, str) and model.endswith(".onnx"):

Diff for: examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ onnxruntime-extensions; python_version < '3.11'
77
datasets
88
optimum
99
evaluate
10-
intel-extension-for-transformers >= 1.4.1
1110
peft
12-
lm-eval==0.4.2
11+
lm-eval==0.4.3
12+
numba
13+
pydantic

Diff for: examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def replace_architectures(json_path):
134134
json.dump(data, file, indent=4)
135135

136136
def eval_func(model):
137-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
137+
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
138138

139139
model_dir = model
140140
if isinstance(model, str) and model.endswith(".onnx"):

Diff for: examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ onnxruntime-extensions; python_version < '3.11'
77
datasets
88
optimum
99
evaluate
10-
intel-extension-for-transformers >= 1.4.1
1110
peft
12-
lm-eval==0.4.2
11+
lm-eval==0.4.3
12+
numba
13+
pydantic
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
accelerate
22
datasets
33
einops
4-
intel-extension-for-transformers
54
optimum
65
peft
76
sentencepiece
@@ -10,4 +9,6 @@ torch
109
tqdm
1110
tiktoken
1211
transformers_stream_generator
13-
lm_eval==0.4.2
12+
lm_eval==0.4.3
13+
numba
14+
pydantic

Diff for: examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ def group_texts(examples):
588588
eval_batch = args.per_device_eval_batch_size
589589
user_model = None if args.use_accelerate else model
590590

591-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
591+
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
592592
eval_args = LMEvalParser(
593593
model="hf",
594594
user_model=user_model,

Diff for: examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ transformers
88
pytest
99
wandb
1010
einops
11-
neural-compressor
12-
intel-extension-for-transformers
13-
lm_eval==0.4.2
11+
lm_eval==0.4.3
12+
numba
13+
pydantic

Diff for: examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ function run_benchmark {
8989
extra_cmd=$extra_cmd" --woq_algo TEQ"
9090
elif [ "${topology}" = "opt_125m_ipex" ]; then
9191
model_name_or_path="facebook/opt-125m"
92-
extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed"
92+
extra_cmd=$extra_cmd" --ipex"
9393
elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
9494
model_name_or_path="facebook/opt-125m"
95-
extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed --sq --alpha 0.5"
95+
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
9696
elif [ "${topology}" = "bloom_560m_ipex_sq" ]; then
9797
model_name_or_path="bigscience/bloom-560m"
9898
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"

Diff for: examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,10 @@ def eval_func(model):
343343

344344
if args.ipex:
345345
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
346+
from transformers import AutoTokenizer, AutoConfig
346347
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
348+
config = AutoConfig.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
349+
setattr(user_model, "config", config)
347350
else:
348351
user_model, tokenizer = get_user_model()
349352
kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
@@ -354,7 +357,7 @@ def eval_func(model):
354357
if args.accuracy:
355358
user_model.eval()
356359
if args.code_generation:
357-
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
360+
from neural_compressor.evaluation.bigcode_eval import evaluate
358361
from transformers import AutoTokenizer
359362
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
360363
results = evaluate(
@@ -370,7 +373,7 @@ def eval_func(model):
370373
else:
371374
acc = results["results"][task_name]["acc"]
372375
else:
373-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
376+
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
374377
eval_args = LMEvalParser(
375378
model="hf",
376379
user_model=user_model,
@@ -395,7 +398,7 @@ def eval_func(model):
395398
samples = args.iters * args.batch_size
396399

397400
if args.code_generation:
398-
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
401+
from neural_compressor.evaluation.bigcode_eval import evaluate
399402
from transformers import AutoTokenizer
400403
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
401404
start = time.time()
@@ -413,7 +416,7 @@ def eval_func(model):
413416
else:
414417
acc = results["results"][task_name]["acc"]
415418
else:
416-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
419+
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
417420
eval_args = LMEvalParser(
418421
model="hf",
419422
user_model=user_model,

Diff for: examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ function run_tuning {
4545
--tune \
4646
--batch_size $batch_size \
4747
--output_dir ${output_model} \
48-
--cache_dir ${dataset_location}
48+
--cache_dir ${dataset_location} \
49+
--trust_remote_code
4950

5051
}
5152

Diff for: examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@
2424
help='the folder path to save the results.')
2525
parser.add_argument('--cache_dir', default=None, type=str,
2626
help='the folder path to save the results.')
27+
parser.add_argument("--trust_remote_code", action="store_true")
2728

2829
args = parser.parse_args()
2930
model_name = 'openai/whisper-large'
3031
processor = WhisperProcessor.from_pretrained(model_name)
3132
model = WhisperForConditionalGeneration.from_pretrained(model_name)
3233
# dataset
33-
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir)
34+
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir,
35+
trust_remote_code=args.trust_remote_code)
3436

3537
# metric
3638
wer = load("wer")

Diff for: neural_compressor/adaptor/torch_utils/awq.py

+3
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,9 @@ def block_inference(self, model):
454454
"""
455455
total_out = []
456456
for args, kwargs in zip(self.total_block_args, self.total_block_kwargs):
457+
# to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1
458+
if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None:
459+
kwargs["layer_past"] = None
457460
out = model(*args, **kwargs)
458461
if isinstance(out, tuple): # pragma: no cover
459462
out = out[0]

Diff for: neural_compressor/adaptor/torch_utils/bf16_convert.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import torch.nn as nn
2020

2121
from ...utils import logger
22+
from .util import append_attr
2223

2324

2425
class BF16ModuleWrapper(nn.Module):
@@ -62,9 +63,10 @@ def Convert(model, tune_cfg):
6263
def _bf16_wrapper_model(model, bf16_ops_list, prefix=""):
6364
for name, child in model.named_children():
6465
op_name = prefix + "." + name if prefix != "" else name
66+
_bf16_wrapper_model(child, bf16_ops_list, op_name)
6567
for bf16_op_name in bf16_ops_list:
6668
if op_name == bf16_op_name[0] or op_name == bf16_op_name[0].split(".module")[0]:
67-
child = BF16ModuleWrapper(child)
68-
setattr(model, name, child)
69-
_bf16_wrapper_model(child, bf16_ops_list, op_name)
69+
child_bf16 = BF16ModuleWrapper(child)
70+
append_attr(child_bf16, child)
71+
setattr(model, name, child_bf16)
7072
return model

Diff for: test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,13 @@ def test_mix_precision(self):
401401
ptq_fx_op_name_list["conv.*"] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
402402
conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list)
403403
q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func)
404-
self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
405-
self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
404+
self.assertEqual(q_model._model.conv.module.weight.dtype, torch.bfloat16)
405+
self.assertEqual(q_model._model.conv.module.bias.dtype, torch.bfloat16)
406+
self.assertEqual(
407+
q_model._model.conv.stride[0],
408+
1,
409+
msg="GraphModule object should have the attributes of the original module.",
410+
)
406411

407412
def test_hawq_metric(self):
408413
# Test for hawq metric

0 commit comments

Comments
 (0)