Spaces:

cigol123
/

YUGO-GPT

Sleeping

App Files Files Community

cigol123 commited on Dec 18, 2024

Commit

2ff10f4

verified ·

1 Parent(s): 37ef3ce

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -21

app.py CHANGED Viewed

@@ -3,48 +3,47 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="yugogpt-q4_0.gguf",
-    n_ctx=2048,
-    n_threads=4,
-    n_batch=512,
     use_mlock=True,
-    use_mmap=True
 )
 def format_chat_history(history):
     formatted_history = ""
     for user_msg, assistant_msg in history:
-        formatted_history += f"USER: {user_msg}\nASSISTANT: {assistant_msg}\n"
     return formatted_history
 def chat(message, history):
-    system_prompt = """Ti si YugoGPT, visoko precizan AI asistent koji daje isključivo proverljive informacije.
-PRAVILA RADA:
-- Dajem samo činjenično tačne informacije
-- Kada nemam dovoljno informacija, kažem: "Nemam dovoljno informacija o tome"
-- Ne nagađam i ne izmišljam
-- Koristim jasan i precizan jezik
-- Primarno komuniciram na srpskom jeziku
-- Fokusiram se na ključne, dokazive činjenice
-- Odgovaram direktno na postavljeno pitanje"""
     chat_history = format_chat_history(history)
     full_prompt = f"""SYSTEM: {system_prompt}
 KONTEKST:
 {chat_history}
 USER: {message}
-ASSISTANT: """
     response = llm(
         full_prompt,
-        max_tokens=2048,
-        temperature=0.7,  # Extremely low temperature for maximum precision
-        top_p=0.1,       # Very strict filtering
         repeat_penalty=1.2,
-        top_k=20,        # Reduced for more focused token selection
         stop=["USER:", "\n\n"],
         stream=True
     )
@@ -58,7 +57,7 @@ ASSISTANT: """
 demo = gr.ChatInterface(
     fn=chat,
     title="YugoGPT Stručni Asistent",
-    description="Precizan izvor informacija i stručne pomoći. PAŽNJA, ZNA DA LAŽE!!!",
     examples=[
         "Koji su osnovni principi relacionih baza podataka?",
         "Objasnite kako funkcioniše HTTP protokol",
@@ -72,3 +71,4 @@ if __name__ == "__main__":
         server_port=7860,
         share=False
     )

 llm = Llama(
     model_path="yugogpt-q4_0.gguf",
+    n_ctx=4096,            # Doubled context length
+    n_threads=8,           # Increased threads
+    n_batch=1024,          # Increased batch size
     use_mlock=True,
+    use_mmap=True,
+    n_gpu_layers=0,        # Set this to higher number if GPU available
+    verbose=False          # Reduced logging for better performance
 )
 def format_chat_history(history):
     formatted_history = ""
     for user_msg, assistant_msg in history:
+        formatted_history += f"USER: {user_msg}\nA: {assistant_msg}\n"
     return formatted_history
 def chat(message, history):
+    system_prompt = """Ti si YugoGPT, profesionalni AI asistent koji daje precizne i korisne informacije.
+PRAVILA:
+- Dajem jasne i konkretne informacije
+- Koristim precizan srpski jezik
+- Fokusiram se na činjenice
+- Odgovaram direktno i efikasno
+- Održavam profesionalan ton"""
     chat_history = format_chat_history(history)
     full_prompt = f"""SYSTEM: {system_prompt}
 KONTEKST:
 {chat_history}
 USER: {message}
+A: """
     response = llm(
         full_prompt,
+        max_tokens=4096,    # Increased max tokens
+        temperature=0.1,    # Keeping it precise
+        top_p=0.1,
         repeat_penalty=1.2,
+        top_k=20,
         stop=["USER:", "\n\n"],
         stream=True
     )
 demo = gr.ChatInterface(
     fn=chat,
     title="YugoGPT Stručni Asistent",
+    description="Profesionalni izvor informacija i stručne pomoći, PAŽNJA, ZNA DA LAŽE!!!",
     examples=[
         "Koji su osnovni principi relacionih baza podataka?",
         "Objasnite kako funkcioniše HTTP protokol",
         server_port=7860,
         share=False
     )