BidhanAcharya's picture
Create app.py
3048550 verified
model_name = "BidhanAcharya/fine-tuned-sentiment-analyzer"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")
def prepare_inference_input(review, instruction="You are good at reviewing positive, negative sentiment.\n\n"):
# Combine the instruction and input text into one string
input_text = f"{instruction}### Input:\n{review}\n### Response:"
return input_text
def analyze_sentiment(review):
# Prepare the input for inference
inference_input = prepare_inference_input(review)
# Tokenize the input
input_tensor = tokenizer([inference_input], return_tensors="pt", padding=True).to("cuda")
# Generate the output
output = model.generate(
**input_tensor,
max_new_tokens=128,
use_cache=True,
temperature=0.7,
top_p=0.9
)
# Decode the output , the output is in the form of list
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
# Regular expressions to extract the first Input and Response sections
input_pattern = r'### Input:\n(.*?)\n###'
response_pattern = r'### Response:\n(.*?)\n###'
# Extracting the Input section
input_match = re.search(input_pattern, decoded_output, re.DOTALL)
# Extracting the Response section
response_match = re.search(response_pattern, decoded_output, re.DOTALL)
# Combining the extracted input and response into a dictionary, Extract the group(1) only : because of token size the model may generate the same output multiple times
extracted_data = {
'Input': input_match.group(1).strip() if input_match else None,
'Response': response_match.group(1).strip() if response_match else None
}
return extracted_data['Response']
# Create the Gradio interface
interface = gr.Interface(
fn=analyze_sentiment,
inputs=gr.Textbox(lines=2, placeholder="Enter your review/sentiment here"),
outputs=gr.Textbox(label="Sentiment Analysis Result"),
title="Sentiment Analysis",
description="Enter a movie review to analyze its sentiment."
)
# Launch the interface
interface.launch()