Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from keybert import KeyBERT | |
import pandas as pd | |
import re | |
import os | |
# Initialize Hugging Face pipelines | |
sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment") | |
classification_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
ner_pipeline = pipeline("token-classification", model="dslim/bert-base-NER", grouped_entities=True) | |
emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1) | |
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
summary_generator = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Initialize KeyBERT for topic generation | |
kw_model = KeyBERT() | |
# Define a label mapping | |
label_mapping = { | |
"LABEL_0": "NEGATIVE", | |
"LABEL_1": "POSITIVE" | |
} | |
# Function to generate topics using KeyBERT | |
def generate_topics_with_keybert(conversation): | |
try: | |
# Extract keywords as topics | |
keywords = kw_model.extract_keywords(conversation, keyphrase_ngram_range=(1, 2), top_n=1) | |
if keywords: | |
return keywords[0][0] # Return the top keyword/phrase | |
return "Topic not identified" | |
except Exception as e: | |
return {"error": f"Error in topic generation: {e}"} | |
# Function to generate summaries using Bart | |
def generate_summary(conversation): | |
try: | |
# Use the summarization model | |
summary = summary_generator(conversation, max_length=500, min_length=10, truncation=True) | |
return summary[0]["summary_text"].strip() | |
except Exception as e: | |
return {"error": f"Error in summary generation: {e}"} | |
# Function to analyze sentiment | |
def get_sentiment(text: str) -> str: | |
try: | |
result = sentiment_pipeline(text) | |
sentiment_label = result[0]['label'] | |
return label_mapping.get(sentiment_label, "Unknown sentiment") | |
except KeyError as ke: | |
return f"Error in label mapping: {ke}" | |
except Exception as e: | |
return f"Error in sentiment analysis: {e}" | |
# Function to handle long conversations | |
def analyze_long_text_sentiment(text: str, chunk_size: int = 512) -> str: | |
# Split the text into chunks of the specified size | |
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
sentiments = [get_sentiment(chunk) for chunk in chunks] | |
# Aggregate results by counting the frequency of each sentiment | |
sentiment_counts = {label: sentiments.count(label) for label in set(sentiments)} | |
return max(sentiment_counts, key=sentiment_counts.get) # Return the most frequent sentiment | |
# Function to detect emotions in short text | |
def get_emotion(text: str) -> str: | |
try: | |
result = emotion_classifier(text) | |
emotion_label = result[0]['label'] | |
return emotion_label.capitalize() | |
except Exception as e: | |
return f"Error in emotion detection: {e}" | |
# Function to handle long conversations for emotion detection | |
def analyze_long_text_emotion(text: str, chunk_size: int = 512) -> str: | |
# Split the text into chunks of the specified size | |
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
emotions = [get_emotion(chunk) for chunk in chunks] | |
# Aggregate results by counting the frequency of each emotion | |
emotion_counts = {label: emotions.count(label) for label in set(emotions)} | |
return max(emotion_counts, key=emotion_counts.get) # Return the most frequent emotion | |
# Function to classify issue | |
# Function to classify issue with optional user-provided labels | |
def classify_issue(text, user_labels=None): | |
try: | |
# Default labels if the user does not provide any | |
default_labels = [ | |
"General Inquiry", | |
"Product Information and Features", | |
"Feedback and Suggestions", | |
"Complaint about Product or Service", | |
"Account or Login Issues", | |
"Billing, Payment, or Refund Issues", | |
"Technical Support and Troubleshooting", | |
"Subscription or Plan Changes", | |
"Delivery or Shipping Issues", | |
"Cancellation Requests", | |
"Warranty or Guarantee Claims", | |
"Promotions, Discounts, or Offers", | |
"Order Tracking", | |
"Customer Onboarding or Setup", | |
"Other (Uncategorized)" | |
] | |
labels = user_labels if user_labels else default_labels # Use user labels or default | |
result = classification_pipeline(text, candidate_labels=labels) | |
return result['labels'][0] # Return the top label | |
except Exception as e: | |
return f"Error in issue classification: {e}" | |
# Function to classify resolution status | |
def get_resolution_status(text): | |
try: | |
labels = ["Resolved", "Unresolved", "Escalated", "Pending"] | |
result = classification_pipeline(text, candidate_labels=labels) | |
return result['labels'][0] | |
except Exception as e: | |
return f"Error in resolution status classification: {e}" | |
# Function to extract named entities | |
def extract_entities(text): | |
try: | |
entities = ner_pipeline(text) | |
return [{"type": entity['entity_group'], "text": entity['word']} for entity in entities] | |
except Exception as e: | |
return {"error": f"Error in NER: {e}"} | |
''' | |
# Function to detect emotion | |
def detect_emotion(text): | |
try: | |
return emotion_classifier(text, truncation=True)[0]['label'] | |
except Exception as e: | |
return f"Error in emotion detection: {e}" | |
''' | |
# Function to detect intent | |
def detect_intent(text): | |
try: | |
intents = ["Request for Help", "Complaint", "General Inquiry", "Feedback"] | |
return intent_classifier(text, candidate_labels=intents)['labels'][0] | |
except Exception as e: | |
return f"Error in intent classification: {e}" | |
# Function to calculate agent metrics | |
def calculate_agent_metrics(conversation): | |
try: | |
# Define keywords for metrics | |
agent_keywords = { | |
"resolved": ["resolved", "solution provided", "fixed", "closed", "completed", "problem solved"], | |
"escalated": ["escalated", "forwarded", "passed to another team", "escalate", "referred to supervisor"], | |
} | |
# Normalize the conversation for matching | |
conversation_cleaned = re.sub(r'[^\w\s]', '', conversation.lower()) | |
# Initialize metrics | |
metrics = {key: 0 for key in agent_keywords} | |
# Count occurrences of each keyword | |
for key, keywords in agent_keywords.items(): | |
for keyword in keywords: | |
metrics[key] += len(re.findall(rf'\b{re.escape(keyword)}\b', conversation_cleaned)) | |
# Calculate CES score | |
resolved_count = metrics["resolved"] | |
escalated_count = metrics["escalated"] | |
if resolved_count + escalated_count > 0: | |
ces_score = (resolved_count - escalated_count) / (resolved_count + escalated_count) * 5 | |
ces_score = max(1, min(5, round(ces_score))) # Bound CES score to 1-5 | |
else: | |
# If no keywords are found, return neutral CES score | |
ces_score = 3 | |
metrics["CES"] = ces_score | |
# Add legend to the metrics DataFrame | |
legend = { | |
"resolved": "Count of resolution-related keywords (e.g., 'resolved', 'solution provided').", | |
"escalated": "Count of escalation-related keywords (e.g., 'escalated', 'referred to supervisor').", | |
"CES": ( | |
"Customer Effort Score (CES):\n" | |
"1 - High effort (poor experience, likely due to escalations).\n" | |
"3 - Neutral effort (no significant resolutions or escalations detected).\n" | |
"5 - Low effort (great experience, mostly resolved issues)." | |
) | |
} | |
# Combine metrics and legend for output | |
metrics_df = pd.DataFrame(list(metrics.items()), columns=["Metric", "Value"]) | |
legend_df = pd.DataFrame(list(legend.items()), columns=["Metric", "Explanation"]) | |
combined_df = pd.concat([metrics_df, legend_df], axis=1) | |
return combined_df | |
except Exception as e: | |
return pd.DataFrame([["Error", f"Error in agent metrics calculation: {e}"]]) | |
# Main analysis function | |
def analyze_conversation(conversation, custom_labels): | |
# Parse custom labels if provided; otherwise, use None | |
user_labels = [label.strip() for label in custom_labels.split(",")] if custom_labels else None | |
sentiment = analyze_long_text_sentiment(conversation) | |
issue_category = classify_issue(conversation, user_labels=user_labels) | |
resolution_status = get_resolution_status(conversation) | |
entities = extract_entities(conversation) | |
emotion = analyze_long_text_emotion(conversation) | |
intent = detect_intent(conversation) | |
agent_metrics = calculate_agent_metrics(conversation) | |
topic = generate_topics_with_keybert(conversation) | |
summary = generate_summary(conversation) | |
return ( | |
summary, | |
issue_category, | |
topic, | |
intent, | |
sentiment, | |
resolution_status, | |
entities, | |
emotion, | |
agent_metrics, | |
"Score Explanation: Higher scores indicate smoother interactions." | |
) | |
interface = gr.Interface( | |
fn=analyze_conversation, | |
inputs=[ | |
gr.Textbox(lines=10, placeholder="Enter the conversation here...", label="Conversation"), | |
gr.Textbox(lines=2, placeholder="Enter custom classification labels (comma-separated, optional)", label="Custom Labels") | |
], | |
outputs=[ | |
gr.Textbox(label="Generated Summary"), | |
gr.Textbox(label="Issue Category"), | |
gr.Textbox(label="Generated Topic"), | |
gr.Textbox(label="Intent"), | |
gr.Textbox(label="Sentiment"), | |
gr.Textbox(label="Resolution Status"), | |
gr.JSON(label="Entities"), | |
gr.Textbox(label="Emotion"), | |
gr.Dataframe(label="Agent Metrics", type="pandas"), | |
gr.Textbox(label="Score Explanation", interactive=False) | |
], | |
title="Customer Support Conversation Analyzer", | |
description=( | |
"Analyze customer support conversations to extract sentiment, issue category, resolution status, " | |
"entities, emotion, intent, agent metrics, topics, and summaries. You can also provide custom classification labels." | |
) | |
) | |
# Launch the Gradio app | |
interface.launch(debug=True) |