File size: 2,077 Bytes
202d927
 
 
 
d6bb9af
2ff10f4
 
 
fa18beb
2ff10f4
 
 
202d927
 
c4b24bf
 
 
2ff10f4
c4b24bf
 
202d927
2ff10f4
cfcc6a6
2ff10f4
 
 
 
 
 
c4b24bf
 
fa18beb
c4b24bf
cfcc6a6
c4b24bf
 
cfcc6a6
2ff10f4
a2b69dd
9411e9a
fa18beb
2ff10f4
 
 
fa18beb
2ff10f4
cfcc6a6
b16e5ce
202d927
a2b69dd
b16e5ce
 
 
37ef3ce
602b78e
202d927
 
9411e9a
cfcc6a6
2ff10f4
fa18beb
cfcc6a6
 
 
fa18beb
202d927
 
9411e9a
6922353
fa18beb
 
 
 
2ff10f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from llama_cpp import Llama

llm = Llama(
    model_path="yugogpt-q4_0.gguf",
    n_ctx=4096,            # Doubled context length
    n_threads=8,           # Increased threads
    n_batch=1024,          # Increased batch size
    use_mlock=True,
    use_mmap=True,
    n_gpu_layers=0,        # Set this to higher number if GPU available
    verbose=False          # Reduced logging for better performance
)

def format_chat_history(history):
    formatted_history = ""
    for user_msg, assistant_msg in history:
        formatted_history += f"USER: {user_msg}\nA: {assistant_msg}\n"
    return formatted_history

def chat(message, history):
    system_prompt = """Ti si YugoGPT, profesionalni AI asistent koji daje precizne i korisne informacije.

PRAVILA:
- Dajem jasne i konkretne informacije
- Koristim precizan srpski jezik
- Fokusiram se na činjenice
- Odgovaram direktno i efikasno
- Održavam profesionalan ton"""
    
    chat_history = format_chat_history(history)
    full_prompt = f"""SYSTEM: {system_prompt}

KONTEKST:
{chat_history}

USER: {message}
A: """
    
    response = llm(
        full_prompt,
        max_tokens=4096,    # Increased max tokens
        temperature=0.1,    # Keeping it precise
        top_p=0.1,
        repeat_penalty=1.2,
        top_k=20,
        stop=["USER:", "\n\n"],
        stream=True
    )
    
    partial_message = ""
    for chunk in response:
        if chunk and chunk['choices'][0]['text']:
            partial_message += chunk['choices'][0]['text']
            yield partial_message

demo = gr.ChatInterface(
    fn=chat,
    title="YugoGPT Stručni Asistent",
    description="Profesionalni izvor informacija i stručne pomoći, PAŽNJA, ZNA DA LAŽE!!!",
    examples=[
        "Koji su osnovni principi relacionih baza podataka?",
        "Objasnite kako funkcioniše HTTP protokol",
        "Koje su glavne komponente računara i njihove funkcije?"
    ]
)

if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )