Spaces:

cigol123
/

YUGO-GPT

Sleeping

cigol123 commited on Dec 18, 2024

Commit

231afb1

verified ·

1 Parent(s): f4ffeca

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,24 +1,40 @@
 import gradio as gr
 from llama_cpp import Llama
 llm = Llama(
     model_path="yugogpt-q4_0.gguf",
-    n_ctx=2048
 )
 def chat(message, history):
     response = llm.create_completion(
-        f"USER: {message}\nASSISTANT:",
         max_tokens=512,
-        temperature=0.7
     )
     return response['choices'][0]['text']
 demo = gr.ChatInterface(
     chat,
     title="YugoGPT Chat",
 )
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 from llama_cpp import Llama
+# Initialize with optimized settings
 llm = Llama(
     model_path="yugogpt-q4_0.gguf",
+    n_ctx=2048,
+    n_batch=512,           # Increased batch size
+    n_threads=4,           # Adjust based on CPU cores
+    n_gpu_layers=0         # Set to higher number if GPU available
 )
 def chat(message, history):
+    # Improved prompt formatting
+    full_prompt = "USER: " + message + "\nASSISTANT:"
     response = llm.create_completion(
+        full_prompt,
         max_tokens=512,
+        temperature=0.7,
+        stop=["USER:", "\n"],  # Better conversation control
+        stream=False
     )
     return response['choices'][0]['text']
 demo = gr.ChatInterface(
     chat,
     title="YugoGPT Chat",
+    examples=["Hello, how are you?", "What's the weather like?"],  # Optional examples
+    cache_examples=True
 )
+# Launch with optimized settings
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    share=False
+)