Spaces:

Nitzantry1
/

try3

Sleeping

try3 / app.py

Create app.py

ef9cde4 verified 6 months ago

1.06 kB

	import os
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# טוען את המודל וה-tokenizer
	tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictalm-7b-instruct')
	model = AutoModelForCausalLM.from_pretrained('dicta-il/dictalm-7b-instruct', trust_remote_code=True).cuda()

	# הגדרת הפונקציה לצ'אט עם המודל
	def chat_with_model(prompt):
	model.eval()
	with torch.inference_mode():
	kwargs = dict(
	inputs=tokenizer(prompt, return_tensors='pt').input_ids.to(model.device),
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.75,
	max_length=100,
	min_new_tokens=5
	)
	output = model.generate(**kwargs)
	response_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
	return response_text

	# יצירת ממשק עם Gradio
	interface = gr.Interface(fn=chat_with_model, inputs="text", outputs="text", title="Chat with DictaLM Model")
	interface.launch()