Spaces:

Rafay17
/

chatbot

Build error

App Files Files Community

chatbot / app.py

Rafay17

Update app.py

a3af99d verified 6 months ago

raw

history blame

2.97 kB

	from transformers import AutoTokenizer, TextStreamer
	from unsloth import FastLanguageModel
	import torch

	# Load the model and tokenizer
	model_name = "Rafay17/Llama3.2_1b_customModle2" # Use your specific model name
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	model = FastLanguageModel.from_pretrained(
	model_name=model_name,
	max_seq_length=512, # Adjust as needed
	dtype="float16", # Adjust as needed
	load_in_4bit=True # Adjust based on your needs
	)

	FastLanguageModel.for_inference(model) # Call this immediately after loading the model

	# Function to generate a response
	def generate_response(input_text):
	# Prepare the labeled prompt for the model
	labeled_prompt = (
	"Please provide the response with the following labels:\n"
	"Speaker: [SPEAKER]\n"
	"Text: [TEXT]\n"
	"Sentiment: [SENTIMENT]\n"
	"Emotion: [EMOTION]\n"
	"Intent: [INTENT]\n"
	"Tone: [TONE]\n"
	"Confidence Level: [CONFIDENCE]\n"
	"Frustration Level: [FRUSTRATION]\n"
	"Response Length: [LENGTH]\n"
	"Action Required: [ACTION]\n"
	"Interruption: [INTERRUPTION]\n"
	"Cooperation Level: [COOPERATION]\n"
	"Clarity: [CLARITY]\n"
	"Objective: [OBJECTIVE]\n"
	"Timeline: [TIMELINE]\n"
	"Motivation: [MOTIVATION]\n"
	"Conversation Stage: [STAGE]\n"
	"Resolution: [RESOLUTION]\n"
	"Context: [CONTEXT]\n"
	"Urgency: [URGENCY]\n"
	"Problem Type: [PROBLEM]\n"
	"Key Words: [KEYWORDS]\n"
	"Expected Detail: [DETAIL]\n"
	"Time Gap: [TIME]\n"
	"Client Expectation: [EXPECTATION]\n"
	"Channel: [CHANNEL]\n"
	"Power Relationship: [POWER]\n\n"
	f"User Input: {input_text}\n"
	"Response:"
	)

	# Prepare the input for the model
	inputs = tokenizer(
	[labeled_prompt],
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=512, # Ensure this matches your model's max length
	).to("cuda")

	# Set up the text streamer to stream the generated response
	text_streamer = TextStreamer(tokenizer, skip_prompt=True)

	# Generate the response
	with torch.no_grad(): # Disable gradient calculation for inference
	model.generate(
	input_ids=inputs.input_ids,
	attention_mask=inputs.attention_mask,
	streamer=text_streamer,
	max_new_tokens=100, # Adjust this value as needed
	pad_token_id=tokenizer.eos_token_id,
	)

	# Function to take user input and generate output
	def user_interaction():
	while True:
	user_input = input("Enter conversation details (or type 'exit' to quit): ")
	if user_input.lower() == 'exit':
	print("Exiting the program.")
	break
	print("Generating response for input:")
	generate_response(user_input)

	# Start the user interaction
	user_interaction()