cigol123 commited on
Commit
2ff10f4
·
verified ·
1 Parent(s): 37ef3ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -3,48 +3,47 @@ from llama_cpp import Llama
3
 
4
  llm = Llama(
5
  model_path="yugogpt-q4_0.gguf",
6
- n_ctx=2048,
7
- n_threads=4,
8
- n_batch=512,
9
  use_mlock=True,
10
- use_mmap=True
 
 
11
  )
12
 
13
  def format_chat_history(history):
14
  formatted_history = ""
15
  for user_msg, assistant_msg in history:
16
- formatted_history += f"USER: {user_msg}\nASSISTANT: {assistant_msg}\n"
17
  return formatted_history
18
 
19
  def chat(message, history):
20
- system_prompt = """Ti si YugoGPT, visoko precizan AI asistent koji daje isključivo proverljive informacije.
21
 
22
- PRAVILA RADA:
23
- - Dajem samo činjenično tačne informacije
24
- - Kada nemam dovoljno informacija, kažem: "Nemam dovoljno informacija o tome"
25
- - Ne nagađam i ne izmišljam
26
- - Koristim jasan i precizan jezik
27
- - Primarno komuniciram na srpskom jeziku
28
- - Fokusiram se na ključne, dokazive činjenice
29
- - Odgovaram direktno na postavljeno pitanje"""
30
 
31
  chat_history = format_chat_history(history)
32
-
33
  full_prompt = f"""SYSTEM: {system_prompt}
34
 
35
  KONTEKST:
36
  {chat_history}
37
 
38
  USER: {message}
39
- ASSISTANT: """
40
 
41
  response = llm(
42
  full_prompt,
43
- max_tokens=2048,
44
- temperature=0.7, # Extremely low temperature for maximum precision
45
- top_p=0.1, # Very strict filtering
46
  repeat_penalty=1.2,
47
- top_k=20, # Reduced for more focused token selection
48
  stop=["USER:", "\n\n"],
49
  stream=True
50
  )
@@ -58,7 +57,7 @@ ASSISTANT: """
58
  demo = gr.ChatInterface(
59
  fn=chat,
60
  title="YugoGPT Stručni Asistent",
61
- description="Precizan izvor informacija i stručne pomoći. PAŽNJA, ZNA DA LAŽE!!!",
62
  examples=[
63
  "Koji su osnovni principi relacionih baza podataka?",
64
  "Objasnite kako funkcioniše HTTP protokol",
@@ -72,3 +71,4 @@ if __name__ == "__main__":
72
  server_port=7860,
73
  share=False
74
  )
 
 
3
 
4
  llm = Llama(
5
  model_path="yugogpt-q4_0.gguf",
6
+ n_ctx=4096, # Doubled context length
7
+ n_threads=8, # Increased threads
8
+ n_batch=1024, # Increased batch size
9
  use_mlock=True,
10
+ use_mmap=True,
11
+ n_gpu_layers=0, # Set this to higher number if GPU available
12
+ verbose=False # Reduced logging for better performance
13
  )
14
 
15
  def format_chat_history(history):
16
  formatted_history = ""
17
  for user_msg, assistant_msg in history:
18
+ formatted_history += f"USER: {user_msg}\nA: {assistant_msg}\n"
19
  return formatted_history
20
 
21
  def chat(message, history):
22
+ system_prompt = """Ti si YugoGPT, profesionalni AI asistent koji daje precizne i korisne informacije.
23
 
24
+ PRAVILA:
25
+ - Dajem jasne i konkretne informacije
26
+ - Koristim precizan srpski jezik
27
+ - Fokusiram se na činjenice
28
+ - Odgovaram direktno i efikasno
29
+ - Održavam profesionalan ton"""
 
 
30
 
31
  chat_history = format_chat_history(history)
 
32
  full_prompt = f"""SYSTEM: {system_prompt}
33
 
34
  KONTEKST:
35
  {chat_history}
36
 
37
  USER: {message}
38
+ A: """
39
 
40
  response = llm(
41
  full_prompt,
42
+ max_tokens=4096, # Increased max tokens
43
+ temperature=0.1, # Keeping it precise
44
+ top_p=0.1,
45
  repeat_penalty=1.2,
46
+ top_k=20,
47
  stop=["USER:", "\n\n"],
48
  stream=True
49
  )
 
57
  demo = gr.ChatInterface(
58
  fn=chat,
59
  title="YugoGPT Stručni Asistent",
60
+ description="Profesionalni izvor informacija i stručne pomoći, PAŽNJA, ZNA DA LAŽE!!!",
61
  examples=[
62
  "Koji su osnovni principi relacionih baza podataka?",
63
  "Objasnite kako funkcioniše HTTP protokol",
 
71
  server_port=7860,
72
  share=False
73
  )
74
+