chatbot / app.py
Rafay17's picture
Update app.py
a3af99d verified
raw
history blame
2.97 kB
from transformers import AutoTokenizer, TextStreamer
from unsloth import FastLanguageModel
import torch
# Load the model and tokenizer
model_name = "Rafay17/Llama3.2_1b_customModle2" # Use your specific model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FastLanguageModel.from_pretrained(
model_name=model_name,
max_seq_length=512, # Adjust as needed
dtype="float16", # Adjust as needed
load_in_4bit=True # Adjust based on your needs
)
FastLanguageModel.for_inference(model) # Call this immediately after loading the model
# Function to generate a response
def generate_response(input_text):
# Prepare the labeled prompt for the model
labeled_prompt = (
"Please provide the response with the following labels:\n"
"Speaker: [SPEAKER]\n"
"Text: [TEXT]\n"
"Sentiment: [SENTIMENT]\n"
"Emotion: [EMOTION]\n"
"Intent: [INTENT]\n"
"Tone: [TONE]\n"
"Confidence Level: [CONFIDENCE]\n"
"Frustration Level: [FRUSTRATION]\n"
"Response Length: [LENGTH]\n"
"Action Required: [ACTION]\n"
"Interruption: [INTERRUPTION]\n"
"Cooperation Level: [COOPERATION]\n"
"Clarity: [CLARITY]\n"
"Objective: [OBJECTIVE]\n"
"Timeline: [TIMELINE]\n"
"Motivation: [MOTIVATION]\n"
"Conversation Stage: [STAGE]\n"
"Resolution: [RESOLUTION]\n"
"Context: [CONTEXT]\n"
"Urgency: [URGENCY]\n"
"Problem Type: [PROBLEM]\n"
"Key Words: [KEYWORDS]\n"
"Expected Detail: [DETAIL]\n"
"Time Gap: [TIME]\n"
"Client Expectation: [EXPECTATION]\n"
"Channel: [CHANNEL]\n"
"Power Relationship: [POWER]\n\n"
f"User Input: {input_text}\n"
"Response:"
)
# Prepare the input for the model
inputs = tokenizer(
[labeled_prompt],
return_tensors="pt",
padding=True,
truncation=True,
max_length=512, # Ensure this matches your model's max length
).to("cuda")
# Set up the text streamer to stream the generated response
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
# Generate the response
with torch.no_grad(): # Disable gradient calculation for inference
model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
streamer=text_streamer,
max_new_tokens=100, # Adjust this value as needed
pad_token_id=tokenizer.eos_token_id,
)
# Function to take user input and generate output
def user_interaction():
while True:
user_input = input("Enter conversation details (or type 'exit' to quit): ")
if user_input.lower() == 'exit':
print("Exiting the program.")
break
print("Generating response for input:")
generate_response(user_input)
# Start the user interaction
user_interaction()