|
import gradio as gr |
|
from llama_cpp import Llama |
|
|
|
llm = Llama( |
|
model_path="yugogpt-q4_0.gguf", |
|
n_ctx=2048, |
|
n_threads=4, |
|
n_batch=512, |
|
use_mlock=True, |
|
use_mmap=True |
|
) |
|
|
|
def format_chat_history(history): |
|
formatted_history = "" |
|
for user_msg, assistant_msg in history: |
|
formatted_history += f"Pitanje: {user_msg}\nOdgovor: {assistant_msg}\n" |
|
return formatted_history |
|
|
|
def clean_text(text): |
|
|
|
text = text.replace("ASSISTANT:", "").strip() |
|
|
|
|
|
punctuation_marks = ['.', ',', '!', '?', ':', ';'] |
|
for mark in punctuation_marks: |
|
text = text.replace(mark, mark + ' ') |
|
|
|
|
|
words = text.split() |
|
text = ' '.join(words) |
|
|
|
return text |
|
|
|
def chat(message, history): |
|
system_prompt = """Ti si YugoGPT, visoko precizan AI asistent. |
|
|
|
OSNOVNI PRINCIPI: |
|
- Dajem konkretne i tačne informacije |
|
- Odgovaram samo o temama koje dobro poznajem |
|
- Koristim jasan i precizan srpski jezik |
|
- Fokusiram se na činjenice |
|
- Odgovaram direktno i pozitivno |
|
- Izbegavam nagađanja""" |
|
|
|
chat_history = format_chat_history(history) |
|
|
|
full_prompt = f"""SYSTEM: {system_prompt} |
|
|
|
KONTEKST: |
|
{chat_history} |
|
|
|
Pitanje: {message} |
|
Odgovor:""" |
|
|
|
response = llm( |
|
full_prompt, |
|
max_tokens=2048, |
|
temperature=0.1, |
|
top_p=0.1, |
|
repeat_penalty=1.2, |
|
top_k=20, |
|
stop=["Pitanje:", "\n\n"], |
|
stream=True |
|
) |
|
|
|
partial_message = "" |
|
for chunk in response: |
|
if chunk and chunk['choices'][0]['text']: |
|
text = clean_text(chunk['choices'][0]['text']) |
|
partial_message = clean_text(partial_message + text) |
|
yield partial_message |
|
|
|
demo = gr.ChatInterface( |
|
fn=chat, |
|
title="YugoGPT Stručni Asistent", |
|
description="Precizan izvor informacija. PAŽNJA!!! ZNA DA LAŽE!!!", |
|
examples=[ |
|
"Objasnite princip rada relacione baze podataka", |
|
"Kako funkcioniše HTTP protokol?", |
|
"Opišite osnovne komponente računara" |
|
] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False |
|
) |
|
|
|
|