Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,154 Bytes
622b383 f26facc 622b383 bfbae3d 622b383 f26facc 622b383 f26facc 622b383 f26facc 622b383 9d6f2d6 622b383 bfbae3d 622b383 f26facc 622b383 f26facc 622b383 f26facc 622b383 f26facc 622b383 f26facc 622b383 f26facc 622b383 f26facc 91b1901 6337b9d 622b383 dcb36da 6337b9d 24e5e81 622b383 6337b9d 622b383 f26facc acbb010 622b383 f26facc 622b383 f26facc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
# Modeli indirme
hf_hub_download(
repo_id="CerebrumTech/cere-gemma-2-9b-tr",
filename="unsloth.F16.gguf",
local_dir="./models"
)
# Yanıt üretme fonksiyonu
@spaces.GPU(duration=120)
def respond(
message,
history: list[tuple[str, str]],
system_message,
model,
max_tokens,
temperature,
top_p,
top_k,
repetition_penalty,
):
chat_template = MessagesFormatterType.VICUNA
llm = Llama(
model_path=f"models/unsloth.F16.gguf",
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt=system_message,
predefined_messages_formatter_type=chat_template,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.temperature = temperature
settings.top_k = top_k
settings.top_p = top_p
settings.max_tokens = max_tokens
settings.repeat_penalty = repetition_penalty
settings.stream = True
messages = BasicChatHistory()
for user_msg, assistant_msg in history:
user = {
'role': Roles.user,
'content': user_msg
}
assistant = {
'role': Roles.assistant,
'content': assistant_msg
}
messages.add_message(user)
messages.add_message(assistant)
stream = agent.get_chat_response(
message,
llm_sampling_settings=settings,
chat_history=messages,
returns_streaming_generator=True,
print_output=False
)
outputs = ""
for output in stream:
outputs += output
yield outputs
# Arayüz oluşturma fonksiyonu
def create_interface(model_name, description):
return gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Textbox(value=model_name, label="Model", interactive=False),
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
],
title=model_name,
description=description,
)
# Açıklama ve arayüz oluşturma
description = """<p align="center">CerebrumTech/cere-gemma-2-9b-tr</p>"""
interface = create_interface('Cere-Gemma-2-9b', description)
# Gradio uygulamasını başlatma
if __name__ == "__main__":
interface.launch()
|