You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).float()
.half()
Move the model to the device
#model.to(device)
model = model.eval()
response, history = model.chat(tokenizer, "hello", history=[])
print(response)
response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
print(response)`
`---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 13
9 # .half()
10 # Move the model to the device
11 #model.to(device)
12 model = model.eval()
---> 13 response, history = model.chat(tokenizer, "hello", history=[])
14 print(response)
15 response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
TypeError: GenerationMixin._extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format'`
Expected Behavior
No response
Steps To Reproduce
The code that is running is just the few lines of sample code above, and this TypeError error occurred. I suspect it's a problem with the environment, but I don't know which package specifically needs to be modified.
Environment
- OS:linux- Python:3.10.14
- Transformers:4.28.1
- PyTorch:1.12.1+cu113
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
True
Anything else?
No response
The text was updated successfully, but these errors were encountered:
Is there an existing issue for this?
Current Behavior
运行的就是示例代码如下,出现了下面的TypeError 报错
`from transformers import AutoTokenizer, AutoModel
import torch
model_path="/home/songxinyue/new/models/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).float()
.half()
Move the model to the device
#model.to(device)
model = model.eval()
response, history = model.chat(tokenizer, "hello", history=[])
print(response)
response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
print(response)`
`---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 13
9 # .half()
10 # Move the model to the device
11 #model.to(device)
12 model = model.eval()
---> 13 response, history = model.chat(tokenizer, "hello", history=[])
14 print(response)
15 response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:1033, in ChatGLMForConditionalGeneration.chat(self, tokenizer, query, history, max_length, num_beams, do_sample, top_p, temperature, logits_processor, **kwargs)
1030 gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
1031 "temperature": temperature, "logits_processor": logits_processor, **kwargs}
1032 inputs = self.build_inputs(tokenizer, query, history=history)
-> 1033 outputs = self.generate(**inputs, **gen_kwargs)
1034 outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
1035 response = tokenizer.decode(outputs)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:2015, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2007 input_ids, model_kwargs = self._expand_inputs_for_generation(
2008 input_ids=input_ids,
2009 expand_size=generation_config.num_return_sequences,
2010 is_encoder_decoder=self.config.is_encoder_decoder,
2011 **model_kwargs,
2012 )
2014 # 12. run sample (it degenerates to greedy search when
generation_config.do_sample=False
)-> 2015 result = self._sample(
2016 input_ids,
2017 logits_processor=prepared_logits_processor,
2018 stopping_criteria=prepared_stopping_criteria,
2019 generation_config=generation_config,
2020 synced_gpus=synced_gpus,
2021 streamer=streamer,
2022 **model_kwargs,
2023 )
2025 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2026 # 11. prepare beam search scorer
2027 beam_scorer = BeamSearchScorer(
2028 batch_size=batch_size,
2029 num_beams=generation_config.num_beams,
(...)
2034 max_length=generation_config.max_length,
2035 )
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:3014, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3012 if streamer is not None:
3013 streamer.put(next_tokens.cpu())
-> 3014 model_kwargs = self._update_model_kwargs_for_generation(
3015 outputs,
3016 model_kwargs,
3017 is_encoder_decoder=self.config.is_encoder_decoder,
3018 )
3020 unfinished_sequences = unfinished_sequences & ~stopping_criteria(input_ids, scores)
3021 this_peer_finished = unfinished_sequences.max() == 0
File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:871, in ChatGLMForConditionalGeneration._update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, standardize_cache_format)
863 def _update_model_kwargs_for_generation(
864 self,
865 outputs: ModelOutput,
(...)
869 ) -> Dict[str, Any]:
870 # update past_key_values
--> 871 model_kwargs["past_key_values"] = self._extract_past_from_model_output(
872 outputs, standardize_cache_format=standardize_cache_format
873 )
875 # update attention mask
876 if "attention_mask" in model_kwargs:
TypeError: GenerationMixin._extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format'`
Expected Behavior
No response
Steps To Reproduce
The code that is running is just the few lines of sample code above, and this TypeError error occurred. I suspect it's a problem with the environment, but I don't know which package specifically needs to be modified.
Environment
Anything else?
No response
The text was updated successfully, but these errors were encountered: