cigol123 commited on
Commit
231afb1
·
verified ·
1 Parent(s): f4ffeca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -1,24 +1,40 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
 
4
  llm = Llama(
5
  model_path="yugogpt-q4_0.gguf",
6
- n_ctx=2048
 
 
 
7
  )
8
 
9
  def chat(message, history):
 
 
 
10
  response = llm.create_completion(
11
- f"USER: {message}\nASSISTANT:",
12
  max_tokens=512,
13
- temperature=0.7
 
 
14
  )
 
15
  return response['choices'][0]['text']
16
 
17
  demo = gr.ChatInterface(
18
  chat,
19
  title="YugoGPT Chat",
 
 
20
  )
21
 
22
- demo.launch(server_name="0.0.0.0", server_port=7860)
23
-
 
 
 
 
24
 
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
+ # Initialize with optimized settings
5
  llm = Llama(
6
  model_path="yugogpt-q4_0.gguf",
7
+ n_ctx=2048,
8
+ n_batch=512, # Increased batch size
9
+ n_threads=4, # Adjust based on CPU cores
10
+ n_gpu_layers=0 # Set to higher number if GPU available
11
  )
12
 
13
  def chat(message, history):
14
+ # Improved prompt formatting
15
+ full_prompt = "USER: " + message + "\nASSISTANT:"
16
+
17
  response = llm.create_completion(
18
+ full_prompt,
19
  max_tokens=512,
20
+ temperature=0.7,
21
+ stop=["USER:", "\n"], # Better conversation control
22
+ stream=False
23
  )
24
+
25
  return response['choices'][0]['text']
26
 
27
  demo = gr.ChatInterface(
28
  chat,
29
  title="YugoGPT Chat",
30
+ examples=["Hello, how are you?", "What's the weather like?"], # Optional examples
31
+ cache_examples=True
32
  )
33
 
34
+ # Launch with optimized settings
35
+ demo.launch(
36
+ server_name="0.0.0.0",
37
+ server_port=7860,
38
+ share=False
39
+ )
40