from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
model_name_or_id = "google/gemma-7b-it"
model = AutoModelForCausalLM.from_pretrained(model_name_or_id, torch_dtype=torch.float16, device_map="auto", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_id)
chat = [
{ "role": "user", "content": " 유희왕에서 가장 강력한 카드가 무엇인지 알고 있습니까?" },
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**input_ids,
max_new_tokens=128,
do_sample=True,
top_p=0.95,
temperature=0.7,
repetition_penalty=1.1,
)
print(tokenizer.decode(outputs[0]))