Spaces:

osmankoc
/

zoa-llm-api

Sleeping

App Files Files Community

zoa-llm-api / app.py

osmankoc

fix messages

6f3a481 about 1 month ago

raw

history blame contribute delete

2 kB

	import spaces
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	MODEL_NAME = "osmankoc/llama-2-7b-zoa"

	# Model ve tokenizer'ı önceden yükle
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
	)

	# ZeroGPU için model GPU'ya sadece gerektiğinde yüklenecek
	@spaces.GPU
	def generate(prompt):
	messages = "You are HTML Web Developer. Generate HTML code using Tailwind CSS framework and Shadcn UI components. Add HTML tags to the code. Don't forget to use the correct classes. Don't write inline styles and descriptions. Here user's prompt: " + prompt
	# messages = [
	# {"role": "system", "content": "You are HTML Web Developer. enerate HTML code using Tailwind CSS framework and Shadcn UI components. Add HTML tags to the code. Don't forget to use the correct classes. Don't write inline styles and descriptions."},
	# {"role": "user", "content": prompt}
	# ]
	# text = tokenizer.apply_chat_template(
	# messages,
	# tokenize=False,
	# add_generation_prompt=True
	# )

	inputs = tokenizer(messages, return_tensors="pt").to("cuda")

	output = model.generate(**inputs, max_length=2500)
	response = tokenizer.decode(output[0], skip_special_tokens=True)

	# model_inputs = tokenizer([text], return_tensors="pt").to("cuda")

	# generated_ids = model.generate(
	# **model_inputs,
	# max_new_tokens=512
	# )
	# generated_ids = [
	# output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	# ]

	# response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return response

	# Gradio UI (Basit bir API arayüzü gibi çalışacak)
	demo = gr.Interface(
	fn=generate,
	inputs=gr.Textbox(placeholder="Enter prompt..."),
	outputs=gr.Textbox(),
	)

	demo.launch()