import gradio as gr from llama_cpp import Llama import json import os import time # Function to convert message history to prompt def prompt_from_messages(messages): prompt = '' for message in messages: prompt += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n" prompt += f"{message['content']}<|eot_id|>{{}}" # Corrected here prompt = prompt[:-10] # Adjust the slicing accordingly return prompt # Initialize the Llama model llm = Llama.from_pretrained( repo_id='gallen881/Llama-3-8B-Physics_Master-GGUF', filename='unsloth.Q4_K_M.gguf', n_ctx=2048, verbose=False ) # Initialize chat history messages = [ { 'role': 'system', 'content': 'You are a professional physics master. Answer physics questions directly without using any external resources.' } ] # Function to handle user input and generate a response def chat_with_physics_master(user_input): global messages # Ensure we can modify the global messages variable # Append user message user_message = {'role': 'user', 'content': user_input} messages.append(user_message) # Prepare to get the response from Physics Master full_response = "" # Fetch response tokens and accumulate them response = llm.create_chat_completion( messages=messages, stream=True ) for chunk in response: delta = chunk['choices'][0]['delta'] if 'role' in delta: messages.append({'role': delta['role'], 'content': ''}) elif 'content' in delta: token = delta['content'] # Accumulate tokens into the full response full_response += token # Once the full response is received, append it to the chat history messages[-1]['content'] = full_response # Return the entire chat history for display return [(msg['role'], msg['content']) for msg in messages] # Gradio interface iface = gr.Interface( fn=chat_with_physics_master, inputs=gr.inputs.Textbox(label="Ask a question"), outputs=gr.outputs.Chatbox(label="Chat History"), title="Physics Master Chatbot", description="Ask **Physics Master** any physics-related question.", ) # Launch the Gradio app iface.launch()