CIA_Gradio / app.py
Ravilppcd07's picture
Update app.py
80bb045 verified
raw
history blame
10.4 kB
import gradio as gr
from transformers import pipeline
from keybert import KeyBERT
import pandas as pd
import re
import os
# Initialize Hugging Face pipelines
sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
classification_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ner_pipeline = pipeline("token-classification", model="dslim/bert-base-NER", grouped_entities=True)
emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
summary_generator = pipeline("summarization", model="facebook/bart-large-cnn")
# Initialize KeyBERT for topic generation
kw_model = KeyBERT()
# Define a label mapping
label_mapping = {
"LABEL_0": "NEGATIVE",
"LABEL_1": "POSITIVE"
}
# Function to generate topics using KeyBERT
def generate_topics_with_keybert(conversation):
try:
# Extract keywords as topics
keywords = kw_model.extract_keywords(conversation, keyphrase_ngram_range=(1, 2), top_n=1)
if keywords:
return keywords[0][0] # Return the top keyword/phrase
return "Topic not identified"
except Exception as e:
return {"error": f"Error in topic generation: {e}"}
# Function to generate summaries using Bart
def generate_summary(conversation):
try:
# Use the summarization model
summary = summary_generator(conversation, max_length=500, min_length=10, truncation=True)
return summary[0]["summary_text"].strip()
except Exception as e:
return {"error": f"Error in summary generation: {e}"}
# Function to analyze sentiment
def get_sentiment(text: str) -> str:
try:
result = sentiment_pipeline(text)
sentiment_label = result[0]['label']
return label_mapping.get(sentiment_label, "Unknown sentiment")
except KeyError as ke:
return f"Error in label mapping: {ke}"
except Exception as e:
return f"Error in sentiment analysis: {e}"
# Function to handle long conversations
def analyze_long_text_sentiment(text: str, chunk_size: int = 512) -> str:
# Split the text into chunks of the specified size
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
sentiments = [get_sentiment(chunk) for chunk in chunks]
# Aggregate results by counting the frequency of each sentiment
sentiment_counts = {label: sentiments.count(label) for label in set(sentiments)}
return max(sentiment_counts, key=sentiment_counts.get) # Return the most frequent sentiment
# Function to detect emotions in short text
def get_emotion(text: str) -> str:
try:
result = emotion_classifier(text)
emotion_label = result[0]['label']
return emotion_label.capitalize()
except Exception as e:
return f"Error in emotion detection: {e}"
# Function to handle long conversations for emotion detection
def analyze_long_text_emotion(text: str, chunk_size: int = 512) -> str:
# Split the text into chunks of the specified size
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
emotions = [get_emotion(chunk) for chunk in chunks]
# Aggregate results by counting the frequency of each emotion
emotion_counts = {label: emotions.count(label) for label in set(emotions)}
return max(emotion_counts, key=emotion_counts.get) # Return the most frequent emotion
# Function to classify issue
# Function to classify issue with optional user-provided labels
def classify_issue(text, user_labels=None):
try:
# Default labels if the user does not provide any
default_labels = [
"General Inquiry",
"Product Information and Features",
"Feedback and Suggestions",
"Complaint about Product or Service",
"Account or Login Issues",
"Billing, Payment, or Refund Issues",
"Technical Support and Troubleshooting",
"Subscription or Plan Changes",
"Delivery or Shipping Issues",
"Cancellation Requests",
"Warranty or Guarantee Claims",
"Promotions, Discounts, or Offers",
"Order Tracking",
"Customer Onboarding or Setup",
"Other (Uncategorized)"
]
labels = user_labels if user_labels else default_labels # Use user labels or default
result = classification_pipeline(text, candidate_labels=labels)
return result['labels'][0] # Return the top label
except Exception as e:
return f"Error in issue classification: {e}"
# Function to classify resolution status
def get_resolution_status(text):
try:
labels = ["Resolved", "Unresolved", "Escalated", "Pending"]
result = classification_pipeline(text, candidate_labels=labels)
return result['labels'][0]
except Exception as e:
return f"Error in resolution status classification: {e}"
# Function to extract named entities
def extract_entities(text):
try:
entities = ner_pipeline(text)
return [{"type": entity['entity_group'], "text": entity['word']} for entity in entities]
except Exception as e:
return {"error": f"Error in NER: {e}"}
'''
# Function to detect emotion
def detect_emotion(text):
try:
return emotion_classifier(text, truncation=True)[0]['label']
except Exception as e:
return f"Error in emotion detection: {e}"
'''
# Function to detect intent
def detect_intent(text):
try:
intents = ["Request for Help", "Complaint", "General Inquiry", "Feedback"]
return intent_classifier(text, candidate_labels=intents)['labels'][0]
except Exception as e:
return f"Error in intent classification: {e}"
# Function to calculate agent metrics
def calculate_agent_metrics(conversation):
try:
# Define keywords for metrics
agent_keywords = {
"resolved": ["resolved", "solution provided", "fixed", "closed", "completed", "problem solved"],
"escalated": ["escalated", "forwarded", "passed to another team", "escalate", "referred to supervisor"],
}
# Normalize the conversation for matching
conversation_cleaned = re.sub(r'[^\w\s]', '', conversation.lower())
# Initialize metrics
metrics = {key: 0 for key in agent_keywords}
# Count occurrences of each keyword
for key, keywords in agent_keywords.items():
for keyword in keywords:
metrics[key] += len(re.findall(rf'\b{re.escape(keyword)}\b', conversation_cleaned))
# Calculate CES score
resolved_count = metrics["resolved"]
escalated_count = metrics["escalated"]
if resolved_count + escalated_count > 0:
ces_score = (resolved_count - escalated_count) / (resolved_count + escalated_count) * 5
ces_score = max(1, min(5, round(ces_score))) # Bound CES score to 1-5
else:
# If no keywords are found, return neutral CES score
ces_score = 3
metrics["CES"] = ces_score
# Add legend to the metrics DataFrame
legend = {
"resolved": "Count of resolution-related keywords (e.g., 'resolved', 'solution provided').",
"escalated": "Count of escalation-related keywords (e.g., 'escalated', 'referred to supervisor').",
"CES": (
"Customer Effort Score (CES):\n"
"1 - High effort (poor experience, likely due to escalations).\n"
"3 - Neutral effort (no significant resolutions or escalations detected).\n"
"5 - Low effort (great experience, mostly resolved issues)."
)
}
# Combine metrics and legend for output
metrics_df = pd.DataFrame(list(metrics.items()), columns=["Metric", "Value"])
legend_df = pd.DataFrame(list(legend.items()), columns=["Metric", "Explanation"])
combined_df = pd.concat([metrics_df, legend_df], axis=1)
return combined_df
except Exception as e:
return pd.DataFrame([["Error", f"Error in agent metrics calculation: {e}"]])
# Main analysis function
def analyze_conversation(conversation, custom_labels):
# Parse custom labels if provided; otherwise, use None
user_labels = [label.strip() for label in custom_labels.split(",")] if custom_labels else None
sentiment = analyze_long_text_sentiment(conversation)
issue_category = classify_issue(conversation, user_labels=user_labels)
resolution_status = get_resolution_status(conversation)
entities = extract_entities(conversation)
emotion = analyze_long_text_emotion(conversation)
intent = detect_intent(conversation)
agent_metrics = calculate_agent_metrics(conversation)
topic = generate_topics_with_keybert(conversation)
summary = generate_summary(conversation)
return (
summary,
issue_category,
topic,
intent,
sentiment,
resolution_status,
entities,
emotion,
agent_metrics,
"Score Explanation: Higher scores indicate smoother interactions."
)
interface = gr.Interface(
fn=analyze_conversation,
inputs=[
gr.Textbox(lines=10, placeholder="Enter the conversation here...", label="Conversation"),
gr.Textbox(lines=2, placeholder="Enter custom classification labels (comma-separated, optional)", label="Custom Labels")
],
outputs=[
gr.Textbox(label="Generated Summary"),
gr.Textbox(label="Issue Category"),
gr.Textbox(label="Generated Topic"),
gr.Textbox(label="Intent"),
gr.Textbox(label="Sentiment"),
gr.Textbox(label="Resolution Status"),
gr.JSON(label="Entities"),
gr.Textbox(label="Emotion"),
gr.Dataframe(label="Agent Metrics", type="pandas"),
gr.Textbox(label="Score Explanation", interactive=False)
],
title="Customer Support Conversation Analyzer",
description=(
"Analyze customer support conversations to extract sentiment, issue category, resolution status, "
"entities, emotion, intent, agent metrics, topics, and summaries. You can also provide custom classification labels."
)
)
# Launch the Gradio app
interface.launch(debug=True)