File size: 2,217 Bytes
202d927
 
 
 
d6bb9af
231afb1
fa18beb
 
 
 
202d927
 
c4b24bf
 
 
602b78e
c4b24bf
 
602b78e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202d927
602b78e
3f4254b
602b78e
 
 
 
 
a268e29
602b78e
c4b24bf
 
fa18beb
 
c4b24bf
a268e29
c4b24bf
 
602b78e
 
a2b69dd
9411e9a
fa18beb
 
a268e29
 
fa18beb
a268e29
602b78e
b16e5ce
202d927
a2b69dd
b16e5ce
 
 
602b78e
 
 
202d927
 
9411e9a
02622b1
602b78e
fa18beb
602b78e
 
 
fa18beb
202d927
 
9411e9a
6922353
fa18beb
 
 
 
602b78e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
from llama_cpp import Llama

llm = Llama(
    model_path="yugogpt-q4_0.gguf",
    n_ctx=2048,
    n_threads=4,
    n_batch=512,
    use_mlock=True,
    use_mmap=True
)

def format_chat_history(history):
    formatted_history = ""
    for user_msg, assistant_msg in history:
        formatted_history += f"Pitanje: {user_msg}\nOdgovor: {assistant_msg}\n"
    return formatted_history

def clean_text(text):
    # Basic cleaning
    text = text.replace("ASSISTANT:", "").strip()
    
    # Handle spaces after punctuation
    punctuation_marks = ['.', ',', '!', '?', ':', ';']
    for mark in punctuation_marks:
        text = text.replace(mark, mark + ' ')
    
    # Normalize spaces
    words = text.split()
    text = ' '.join(words)
    
    return text

def chat(message, history):
    system_prompt = """Ti si YugoGPT, visoko precizan AI asistent.

OSNOVNI PRINCIPI:
- Dajem konkretne i tačne informacije
- Odgovaram samo o temama koje dobro poznajem
- Koristim jasan i precizan srpski jezik
- Fokusiram se na činjenice
- Odgovaram direktno i pozitivno
- Izbegavam nagađanja"""
    
    chat_history = format_chat_history(history)
    
    full_prompt = f"""SYSTEM: {system_prompt}

KONTEKST:
{chat_history}

Pitanje: {message}
Odgovor:"""
    
    response = llm(
        full_prompt,
        max_tokens=2048,
        temperature=0.1,
        top_p=0.1,
        repeat_penalty=1.2,
        top_k=20,
        stop=["Pitanje:", "\n\n"],
        stream=True
    )
    
    partial_message = ""
    for chunk in response:
        if chunk and chunk['choices'][0]['text']:
            text = clean_text(chunk['choices'][0]['text'])
            partial_message = clean_text(partial_message + text)
            yield partial_message

demo = gr.ChatInterface(
    fn=chat,
    title="YugoGPT Stručni Asistent",
    description="Precizan izvor informacija. PAŽNJA!!! ZNA DA LAŽE!!!",
    examples=[
        "Objasnite princip rada relacione baze podataka",
        "Kako funkcioniše HTTP protokol?",
        "Opišite osnovne komponente računara"
    ]
)

if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )