File size: 969 Bytes
202d927
 
 
 
d6bb9af
231afb1
9411e9a
202d927
 
 
a2b69dd
 
 
 
 
 
9411e9a
a2b69dd
 
 
 
 
 
202d927
a2b69dd
202d927
 
 
9411e9a
a2b69dd
 
202d927
 
9411e9a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from llama_cpp import Llama

llm = Llama(
    model_path="yugogpt-q4_0.gguf",
    n_ctx=2048,
    n_threads=4
)

def chat(message, history):
    # Enhanced prompt template for more detailed responses
    prompt = f"""USER: {message}
ASSISTANT: Let me provide you with a comprehensive and thoughtful response.

"""
    
    response = llm(
        prompt,
        max_tokens=1024,        # Increased token limit
        temperature=0.8,        # Slightly increased creativity
        stop=["USER:"],        # Only stop at new user input
        repeat_penalty=1.2,    # Reduce repetition
        top_p=0.95            # Maintain focus while allowing creativity
    )
    
    return response['choices'][0]['text']

demo = gr.ChatInterface(
    fn=chat,
    title="YugoGPT Chat",
    description="Ask me anything - I'll provide detailed and thoughtful responses."
)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)