Update app.py
Browse files
app.py
CHANGED
@@ -3,48 +3,47 @@ from llama_cpp import Llama
|
|
3 |
|
4 |
llm = Llama(
|
5 |
model_path="yugogpt-q4_0.gguf",
|
6 |
-
n_ctx=
|
7 |
-
n_threads=
|
8 |
-
n_batch=
|
9 |
use_mlock=True,
|
10 |
-
use_mmap=True
|
|
|
|
|
11 |
)
|
12 |
|
13 |
def format_chat_history(history):
|
14 |
formatted_history = ""
|
15 |
for user_msg, assistant_msg in history:
|
16 |
-
formatted_history += f"USER: {user_msg}\
|
17 |
return formatted_history
|
18 |
|
19 |
def chat(message, history):
|
20 |
-
system_prompt = """Ti si YugoGPT,
|
21 |
|
22 |
-
PRAVILA
|
23 |
-
- Dajem
|
24 |
-
-
|
25 |
-
-
|
26 |
-
-
|
27 |
-
-
|
28 |
-
- Fokusiram se na ključne, dokazive činjenice
|
29 |
-
- Odgovaram direktno na postavljeno pitanje"""
|
30 |
|
31 |
chat_history = format_chat_history(history)
|
32 |
-
|
33 |
full_prompt = f"""SYSTEM: {system_prompt}
|
34 |
|
35 |
KONTEKST:
|
36 |
{chat_history}
|
37 |
|
38 |
USER: {message}
|
39 |
-
|
40 |
|
41 |
response = llm(
|
42 |
full_prompt,
|
43 |
-
max_tokens=
|
44 |
-
temperature=0.
|
45 |
-
top_p=0.1,
|
46 |
repeat_penalty=1.2,
|
47 |
-
top_k=20,
|
48 |
stop=["USER:", "\n\n"],
|
49 |
stream=True
|
50 |
)
|
@@ -58,7 +57,7 @@ ASSISTANT: """
|
|
58 |
demo = gr.ChatInterface(
|
59 |
fn=chat,
|
60 |
title="YugoGPT Stručni Asistent",
|
61 |
-
description="
|
62 |
examples=[
|
63 |
"Koji su osnovni principi relacionih baza podataka?",
|
64 |
"Objasnite kako funkcioniše HTTP protokol",
|
@@ -72,3 +71,4 @@ if __name__ == "__main__":
|
|
72 |
server_port=7860,
|
73 |
share=False
|
74 |
)
|
|
|
|
3 |
|
4 |
llm = Llama(
|
5 |
model_path="yugogpt-q4_0.gguf",
|
6 |
+
n_ctx=4096, # Doubled context length
|
7 |
+
n_threads=8, # Increased threads
|
8 |
+
n_batch=1024, # Increased batch size
|
9 |
use_mlock=True,
|
10 |
+
use_mmap=True,
|
11 |
+
n_gpu_layers=0, # Set this to higher number if GPU available
|
12 |
+
verbose=False # Reduced logging for better performance
|
13 |
)
|
14 |
|
15 |
def format_chat_history(history):
|
16 |
formatted_history = ""
|
17 |
for user_msg, assistant_msg in history:
|
18 |
+
formatted_history += f"USER: {user_msg}\nA: {assistant_msg}\n"
|
19 |
return formatted_history
|
20 |
|
21 |
def chat(message, history):
|
22 |
+
system_prompt = """Ti si YugoGPT, profesionalni AI asistent koji daje precizne i korisne informacije.
|
23 |
|
24 |
+
PRAVILA:
|
25 |
+
- Dajem jasne i konkretne informacije
|
26 |
+
- Koristim precizan srpski jezik
|
27 |
+
- Fokusiram se na činjenice
|
28 |
+
- Odgovaram direktno i efikasno
|
29 |
+
- Održavam profesionalan ton"""
|
|
|
|
|
30 |
|
31 |
chat_history = format_chat_history(history)
|
|
|
32 |
full_prompt = f"""SYSTEM: {system_prompt}
|
33 |
|
34 |
KONTEKST:
|
35 |
{chat_history}
|
36 |
|
37 |
USER: {message}
|
38 |
+
A: """
|
39 |
|
40 |
response = llm(
|
41 |
full_prompt,
|
42 |
+
max_tokens=4096, # Increased max tokens
|
43 |
+
temperature=0.1, # Keeping it precise
|
44 |
+
top_p=0.1,
|
45 |
repeat_penalty=1.2,
|
46 |
+
top_k=20,
|
47 |
stop=["USER:", "\n\n"],
|
48 |
stream=True
|
49 |
)
|
|
|
57 |
demo = gr.ChatInterface(
|
58 |
fn=chat,
|
59 |
title="YugoGPT Stručni Asistent",
|
60 |
+
description="Profesionalni izvor informacija i stručne pomoći, PAŽNJA, ZNA DA LAŽE!!!",
|
61 |
examples=[
|
62 |
"Koji su osnovni principi relacionih baza podataka?",
|
63 |
"Objasnite kako funkcioniše HTTP protokol",
|
|
|
71 |
server_port=7860,
|
72 |
share=False
|
73 |
)
|
74 |
+
|