cigol123 commited on
Commit
9411e9a
·
verified ·
1 Parent(s): 56a94a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -22
app.py CHANGED
@@ -1,40 +1,26 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
- # Initialize with optimized settings
5
  llm = Llama(
6
  model_path="yugogpt-q4_0.gguf",
7
  n_ctx=2048,
8
- n_batch=512, # Increased batch size
9
- n_threads=4, # Adjust based on CPU cores
10
- n_gpu_layers=0 # Set to higher number if GPU available
11
  )
12
 
13
  def chat(message, history):
14
- # Improved prompt formatting
15
- full_prompt = "USER: " + message + "\nASSISTANT:"
16
-
17
- response = llm.create_completion(
18
- full_prompt,
19
  max_tokens=512,
20
  temperature=0.7,
21
- stop=["USER:", "\n"], # Better conversation control
22
- stream=False
23
  )
24
-
25
  return response['choices'][0]['text']
26
 
27
  demo = gr.ChatInterface(
28
- chat,
29
- title="YugoGPT Chat",
30
- examples=["Hello, how are you?", "What's the weather like?"], # Optional examples
31
- cache_examples=True
32
  )
33
 
34
- # Launch with optimized settings
35
- demo.launch(
36
- server_name="0.0.0.0",
37
- server_port=7860,
38
- share=False
39
- )
40
 
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
 
4
  llm = Llama(
5
  model_path="yugogpt-q4_0.gguf",
6
  n_ctx=2048,
7
+ n_threads=4
 
 
8
  )
9
 
10
  def chat(message, history):
11
+ response = llm(
12
+ f"USER: {message}\nASSISTANT:",
 
 
 
13
  max_tokens=512,
14
  temperature=0.7,
15
+ stop=["USER:", "\n"]
 
16
  )
 
17
  return response['choices'][0]['text']
18
 
19
  demo = gr.ChatInterface(
20
+ fn=chat,
21
+ title="YugoGPT Chat"
 
 
22
  )
23
 
24
+ if __name__ == "__main__":
25
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
26