File size: 2,000 Bytes
06a7d62
 
 
d844f20
 
039fbc1
06a7d62
53b7ea9
 
 
 
 
 
dbea975
06a7d62
 
6f3a481
 
 
 
 
8358801
 
 
 
 
b5ae9b7
8358801
b5ae9b7
8358801
 
 
 
 
 
 
 
 
 
 
 
b5ae9b7
8358801
b5ae9b7
06a7d62
 
dbea975
06a7d62
 
 
 
 
 
823f198
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import spaces
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

MODEL_NAME = "osmankoc/llama-2-7b-zoa"

# Model ve tokenizer'ı önceden yükle
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, device_map="auto"
)

# ZeroGPU için model GPU'ya sadece gerektiğinde yüklenecek
@spaces.GPU
def generate(prompt):
    messages = "You are HTML Web Developer. Generate HTML code using Tailwind CSS framework and Shadcn UI components. Add HTML tags to the code. Don't forget to use the correct classes. Don't write inline styles and descriptions. Here user's prompt: " + prompt
    # messages = [
    #     {"role": "system", "content": "You are HTML Web Developer. enerate HTML code using Tailwind CSS framework and Shadcn UI components. Add HTML tags to the code. Don't forget to use the correct classes. Don't write inline styles and descriptions."},
    #     {"role": "user", "content": prompt}
    # ]
    # text = tokenizer.apply_chat_template(
    #     messages,
    #     tokenize=False,
    #     add_generation_prompt=True
    # )
    
    inputs = tokenizer(messages, return_tensors="pt").to("cuda")
    
    output = model.generate(**inputs, max_length=2500)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
    
    # generated_ids = model.generate(
    #     **model_inputs,
    #     max_new_tokens=512
    # )
    # generated_ids = [
    #     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    # ]

    # response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return response

# Gradio UI (Basit bir API arayüzü gibi çalışacak)
demo = gr.Interface(
    fn=generate,
    inputs=gr.Textbox(placeholder="Enter prompt..."),
    outputs=gr.Textbox(),
)

demo.launch()