try3 / app.py
Nitzantry1's picture
Create app.py
ef9cde4 verified
raw
history blame
1.06 kB
import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# ื˜ื•ืขืŸ ืืช ื”ืžื•ื“ืœ ื•ื”-tokenizer
tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictalm-7b-instruct')
model = AutoModelForCausalLM.from_pretrained('dicta-il/dictalm-7b-instruct', trust_remote_code=True).cuda()
# ื”ื’ื“ืจืช ื”ืคื•ื ืงืฆื™ื” ืœืฆ'ืื˜ ืขื ื”ืžื•ื“ืœ
def chat_with_model(prompt):
model.eval()
with torch.inference_mode():
kwargs = dict(
inputs=tokenizer(prompt, return_tensors='pt').input_ids.to(model.device),
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.75,
max_length=100,
min_new_tokens=5
)
output = model.generate(**kwargs)
response_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
return response_text
# ื™ืฆื™ืจืช ืžืžืฉืง ืขื Gradio
interface = gr.Interface(fn=chat_with_model, inputs="text", outputs="text", title="Chat with DictaLM Model")
interface.launch()