File size: 3,154 Bytes
622b383
 
 
 
 
 
 
 
 
 
 
f26facc
622b383
 
bfbae3d
622b383
 
 
f26facc
622b383
 
 
 
 
f26facc
622b383
 
 
 
f26facc
622b383
9d6f2d6
622b383
 
bfbae3d
622b383
 
 
 
 
 
 
 
 
f26facc
622b383
 
 
 
 
 
 
 
 
f26facc
622b383
 
 
 
f26facc
622b383
 
f26facc
622b383
 
 
f26facc
622b383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f26facc
622b383
f26facc
 
 
91b1901
6337b9d
622b383
dcb36da
6337b9d
 
24e5e81
622b383
6337b9d
622b383
 
 
f26facc
 
acbb010
622b383
f26facc
622b383
f26facc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download

# Modeli indirme
hf_hub_download(
    repo_id="CerebrumTech/cere-gemma-2-9b-tr",
    filename="unsloth.F16.gguf",
    local_dir="./models"
)

# Yanıt üretme fonksiyonu
@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    model,
    max_tokens,
    temperature,
    top_p,
    top_k,
    repetition_penalty,
):
    chat_template = MessagesFormatterType.VICUNA 

    llm = Llama(
        model_path=f"models/unsloth.F16.gguf",
        flash_attn=True,
        n_gpu_layers=81,
        n_batch=1024,
        n_ctx=8192,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt=system_message,
        predefined_messages_formatter_type=chat_template,
        debug_output=True
    )

    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repetition_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for user_msg, assistant_msg in history:
        user = {
            'role': Roles.user,
            'content': user_msg
        }
        assistant = {
            'role': Roles.assistant,
            'content': assistant_msg
        }
        messages.add_message(user)
        messages.add_message(assistant)

    stream = agent.get_chat_response(
        message,
        llm_sampling_settings=settings,
        chat_history=messages,
        returns_streaming_generator=True,
        print_output=False
    )

    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

# Arayüz oluşturma fonksiyonu
def create_interface(model_name, description):
    return gr.ChatInterface(
        fn=respond,
        additional_inputs=[
            gr.Textbox(value="", label="System message"),
            gr.Textbox(value=model_name, label="Model", interactive=False),
            gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
            gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
            gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
            gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
        ],
        title=model_name,
        description=description,
    )

# Açıklama ve arayüz oluşturma
description = """<p align="center">CerebrumTech/cere-gemma-2-9b-tr</p>"""
interface = create_interface('Cere-Gemma-2-9b', description)

# Gradio uygulamasını başlatma
if __name__ == "__main__":
    interface.launch()