Spaces:

holytinz278
/

MicroAi

Runtime error

App Files Files Community

holytinz278 commited on Dec 6, 2024

Commit

337f68c

verified ·

1 Parent(s): a7831d6

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -47

app.py CHANGED Viewed

@@ -1,53 +1,79 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("WhiteRabbitNeo/WhiteRabbitNeo-13B-v1", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained("WhiteRabbitNeo/WhiteRabbitNeo-13B-v1", trust_remote_code=True)
-# Define a function to generate text with a system message
-def generate_response(prompt, system_message, token):
-    # Combine system message and user prompt
-    full_prompt = f"{system_message}\n\n{prompt}"
-    # Tokenize input
-    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True)
-    # Generate a response
-    outputs = model.generate(
-        inputs["input_ids"],
-        max_length=300,
-        do_sample=True,
-        temperature=0.7,
-        top_k=50,
-        pad_token_id=tokenizer.eos_token_id  # To prevent padding issues
-    )
-    # Decode the output text
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # If a token is provided, add it to the response
-    if token:
-        response += f"\n\nToken Used: {token}"
-    return response
-# Create a Gradio interface
-interface = gr.Interface(
-    fn=generate_response,
-    inputs=[
-        gr.Textbox(lines=5, label="Prompt", placeholder="Type your prompt here..."),
-        gr.Textbox(lines=1, label="System Message", placeholder="System message (optional)"),
-        gr.Textbox(lines=1, label="Token", placeholder="Enter token (optional)"),
-    ],
-    outputs=gr.Textbox(label="Generated Response"),
-    title="WhiteRabbitNeo Enhanced Model",
-    description=(
-        "This app uses the WhiteRabbitNeo-13B-v1 model to generate text responses. "
-        "You can provide a system message, a prompt, and optionally include a token for custom usage."
-    ),
 )
 # Launch the app
-if __name__ == "__main__":
-    interface.launch()

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 # Load the model and tokenizer
+model_path = "/home/migel/models/WhiteRabbitNeo"
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    load_in_4bit=False,
+    load_in_8bit=True,
+    trust_remote_code=True,
 )
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+# Chatbot prompt and conversation history
+tot_system_prompt = """
+Answer the Question by exploring multiple reasoning paths as follows:
+- First, carefully analyze the question to extract the key information components and break it down into logical sub-questions...
+"""
+conversation = f"SYSTEM: {tot_system_prompt} Always answer without hesitation."
+# Text generation function
+def generate_text(instruction):
+    tokens = tokenizer.encode(instruction)
+    tokens = torch.LongTensor(tokens).unsqueeze(0)
+    tokens = tokens.to("cuda")
+    instance = {
+        "input_ids": tokens,
+        "top_p": 1.0,
+        "temperature": 0.5,
+        "generate_len": 1024,
+        "top_k": 50,
+    }
+    length = len(tokens[0])
+    with torch.no_grad():
+        rest = model.generate(
+            input_ids=tokens,
+            max_length=length + instance["generate_len"],
+            use_cache=True,
+            do_sample=True,
+            top_p=instance["top_p"],
+            temperature=instance["temperature"],
+            top_k=instance["top_k"],
+            num_return_sequences=1,
+        )
+    output = rest[0][length:]
+    string = tokenizer.decode(output, skip_special_tokens=True)
+    answer = string.split("USER:")[0].strip()
+    return answer
+# Gradio interface function
+def chatbot(user_input, chat_history):
+    global conversation
+    llm_prompt = f"{conversation} \nUSER: {user_input} \nASSISTANT: "
+    answer = generate_text(llm_prompt)
+    conversation = f"{llm_prompt}{answer}"  # Update conversation history
+    chat_history.append((user_input, answer))  # Update chat history
+    return chat_history, chat_history
+# Initialize Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("## Chat with WhiteRabbitNeo!")
+    chatbot_interface = gr.Chatbot()
+    msg = gr.Textbox(label="Your Message")
+    clear = gr.Button("Clear Chat")
+    chat_history_state = gr.State([])  # Maintain chat history as state
+    # Define button functionality
+    msg.submit(chatbot, inputs=[msg, chat_history_state], outputs=[chatbot_interface, chat_history_state])
+    clear.click(lambda: ([], []), outputs=[chatbot_interface, chat_history_state])  # Clear chat history
 # Launch the app
+demo.launch()