File size: 2,249 Bytes
cc68ebd
e509786
9256be3
cc68ebd
9256be3
860d63b
9256be3
 
 
 
 
6740bd4
 
9256be3
860d63b
9256be3
 
 
 
 
 
 
 
cc68ebd
 
 
 
 
 
 
860d63b
cc68ebd
6740bd4
cc68ebd
e509786
9256be3
 
cc68ebd
860d63b
9256be3
28f05b3
 
 
9256be3
cc68ebd
9256be3
 
 
 
 
 
cc68ebd
9256be3
 
28f05b3
 
 
 
cc68ebd
 
 
 
28f05b3
cc68ebd
 
 
 
 
 
 
 
9256be3
cc68ebd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from llama_cpp import Llama
import json
import os
import time

# Function to convert message history to prompt
def prompt_from_messages(messages):
    prompt = ''
    for message in messages:
        prompt += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n"
        prompt += f"{message['content']}<|eot_id|>{{}}"  # Corrected here
    prompt = prompt[:-10]  # Adjust the slicing accordingly
    return prompt

# Initialize the Llama model
llm = Llama.from_pretrained(
    repo_id='gallen881/Llama-3-8B-Physics_Master-GGUF',
    filename='unsloth.Q4_K_M.gguf',
    n_ctx=2048,
    verbose=False
)

# Initialize chat history
messages = [
    {
        'role': 'system',
        'content': 'You are a professional physics master. Answer physics questions directly without using any external resources.'
    }
]

# Function to handle user input and generate a response
def chat_with_physics_master(user_input):
    global messages  # Ensure we can modify the global messages variable

    # Append user message
    user_message = {'role': 'user', 'content': user_input}
    messages.append(user_message)

    # Prepare to get the response from Physics Master
    full_response = ""

    # Fetch response tokens and accumulate them
    response = llm.create_chat_completion(
        messages=messages,
        stream=True
    )

    for chunk in response:
        delta = chunk['choices'][0]['delta']
        if 'role' in delta:
            messages.append({'role': delta['role'], 'content': ''})
        elif 'content' in delta:
            token = delta['content']
            # Accumulate tokens into the full response
            full_response += token

    # Once the full response is received, append it to the chat history
    messages[-1]['content'] = full_response

    # Return the entire chat history for display
    return [(msg['role'], msg['content']) for msg in messages]

# Gradio interface
iface = gr.Interface(
    fn=chat_with_physics_master,
    inputs=gr.inputs.Textbox(label="Ask a question"),
    outputs=gr.outputs.Chatbox(label="Chat History"),
    title="Physics Master Chatbot",
    description="Ask **Physics Master** any physics-related question.",
)

# Launch the Gradio app
iface.launch()