import gradio as gr from transformers import pipeline from keybert import KeyBERT import pandas as pd import re import os # Initialize Hugging Face pipelines sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment") classification_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") ner_pipeline = pipeline("token-classification", model="dslim/bert-base-NER", grouped_entities=True) emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1) intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") summary_generator = pipeline("summarization", model="facebook/bart-large-cnn") # Initialize KeyBERT for topic generation kw_model = KeyBERT() # Define a label mapping label_mapping = { "LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE" } # Function to generate topics using KeyBERT def generate_topics_with_keybert(conversation): try: # Extract keywords as topics keywords = kw_model.extract_keywords(conversation, keyphrase_ngram_range=(1, 2), top_n=1) if keywords: return keywords[0][0] # Return the top keyword/phrase return "Topic not identified" except Exception as e: return {"error": f"Error in topic generation: {e}"} # Function to generate summaries using Bart def generate_summary(conversation): try: # Use the summarization model summary = summary_generator(conversation, max_length=500, min_length=10, truncation=True) return summary[0]["summary_text"].strip() except Exception as e: return {"error": f"Error in summary generation: {e}"} # Function to analyze sentiment def get_sentiment(text: str) -> str: try: result = sentiment_pipeline(text) sentiment_label = result[0]['label'] return label_mapping.get(sentiment_label, "Unknown sentiment") except KeyError as ke: return f"Error in label mapping: {ke}" except Exception as e: return f"Error in sentiment analysis: {e}" # Function to handle long conversations def analyze_long_text_sentiment(text: str, chunk_size: int = 512) -> str: # Split the text into chunks of the specified size chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] sentiments = [get_sentiment(chunk) for chunk in chunks] # Aggregate results by counting the frequency of each sentiment sentiment_counts = {label: sentiments.count(label) for label in set(sentiments)} return max(sentiment_counts, key=sentiment_counts.get) # Return the most frequent sentiment ''' # Function to detect emotions in short text def get_emotion(text: str) -> str: try: result = emotion_classifier(text) emotion_label = result[0]['label'] return emotion_label.capitalize() except Exception as e: return f"Error in emotion detection: {e}" ''' def get_emotion(text: str) -> str: try: # Get the classification result result = emotion_classifier(text) # Check if the result is a list and has at least one item if result and isinstance(result, list) and len(result) > 0: emotion_label = result[0]['label'] # Access the label of the first result return emotion_label.capitalize() # Capitalize for readability return "Unknown emotion" # Fallback if no label found except Exception as e: return f"Error in emotion detection: {e}" # Function to handle long conversations for emotion detection def analyze_long_text_emotion(text: str, chunk_size: int = 512) -> str: # Split the text into chunks of the specified size chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] emotions = [get_emotion(chunk) for chunk in chunks] # Aggregate results by counting the frequency of each emotion emotion_counts = {label: emotions.count(label) for label in set(emotions)} return max(emotion_counts, key=emotion_counts.get) # Return the most frequent emotion # Function to classify issue # Function to classify issue with optional user-provided labels def classify_issue(text, user_labels=None): try: # Default labels if the user does not provide any default_labels = [ "General Inquiry", "Product Information and Features", "Feedback and Suggestions", "Complaint about Product or Service", "Account or Login Issues", "Billing, Payment, or Refund Issues", "Technical Support and Troubleshooting", "Subscription or Plan Changes", "Delivery or Shipping Issues", "Cancellation Requests", "Warranty or Guarantee Claims", "Promotions, Discounts, or Offers", "Order Tracking", "Customer Onboarding or Setup", "Other (Uncategorized)" ] labels = user_labels if user_labels else default_labels # Use user labels or default result = classification_pipeline(text, candidate_labels=labels) return result['labels'][0] # Return the top label except Exception as e: return f"Error in issue classification: {e}" # Function to classify resolution status def get_resolution_status(text): try: labels = ["Resolved", "Unresolved", "Escalated", "Pending"] result = classification_pipeline(text, candidate_labels=labels) return result['labels'][0] except Exception as e: return f"Error in resolution status classification: {e}" # Function to extract named entities def extract_entities(text): try: entities = ner_pipeline(text) return [{"type": entity['entity_group'], "text": entity['word']} for entity in entities] except Exception as e: return {"error": f"Error in NER: {e}"} ''' # Function to detect emotion def detect_emotion(text): try: return emotion_classifier(text, truncation=True)[0]['label'] except Exception as e: return f"Error in emotion detection: {e}" ''' # Function to detect intent def detect_intent(text): try: intents = ["Request for Help", "Complaint", "General Inquiry", "Feedback"] return intent_classifier(text, candidate_labels=intents)['labels'][0] except Exception as e: return f"Error in intent classification: {e}" # Function to calculate agent metrics def calculate_agent_metrics(conversation): try: # Define keywords for metrics agent_keywords = { "resolved": ["resolved", "solution provided", "fixed", "closed", "completed", "problem solved"], "escalated": ["escalated", "forwarded", "passed to another team", "escalate", "referred to supervisor"], } # Normalize the conversation for matching conversation_cleaned = re.sub(r'[^\w\s]', '', conversation.lower()) # Initialize metrics metrics = {key: 0 for key in agent_keywords} # Count occurrences of each keyword for key, keywords in agent_keywords.items(): for keyword in keywords: metrics[key] += len(re.findall(rf'\b{re.escape(keyword)}\b', conversation_cleaned)) # Calculate CES score resolved_count = metrics["resolved"] escalated_count = metrics["escalated"] if resolved_count + escalated_count > 0: ces_score = (resolved_count - escalated_count) / (resolved_count + escalated_count) * 5 ces_score = max(1, min(5, round(ces_score))) # Bound CES score to 1-5 else: # If no keywords are found, return neutral CES score ces_score = 3 metrics["CES"] = ces_score # Add legend to the metrics DataFrame legend = { "resolved": "Count of resolution-related keywords (e.g., 'resolved', 'solution provided').", "escalated": "Count of escalation-related keywords (e.g., 'escalated', 'referred to supervisor').", "CES": ( "Customer Effort Score (CES):\n" "1 - High effort (poor experience, likely due to escalations).\n" "3 - Neutral effort (no significant resolutions or escalations detected).\n" "5 - Low effort (great experience, mostly resolved issues)." ) } # Combine metrics and legend for output metrics_df = pd.DataFrame(list(metrics.items()), columns=["Metric", "Value"]) legend_df = pd.DataFrame(list(legend.items()), columns=["Metric", "Explanation"]) combined_df = pd.concat([metrics_df, legend_df], axis=1) return combined_df except Exception as e: return pd.DataFrame([["Error", f"Error in agent metrics calculation: {e}"]]) # Main analysis function def analyze_conversation(conversation, custom_labels): # Parse custom labels if provided; otherwise, use None user_labels = [label.strip() for label in custom_labels.split(",")] if custom_labels else None sentiment = analyze_long_text_sentiment(conversation) issue_category = classify_issue(conversation, user_labels=user_labels) resolution_status = get_resolution_status(conversation) entities = extract_entities(conversation) emotion = analyze_long_text_emotion(conversation) intent = detect_intent(conversation) agent_metrics = calculate_agent_metrics(conversation) topic = generate_topics_with_keybert(conversation) summary = generate_summary(conversation) return ( summary, issue_category, topic, intent, sentiment, resolution_status, entities, emotion, agent_metrics, "Score Explanation: Higher scores indicate smoother interactions." ) interface = gr.Interface( fn=analyze_conversation, inputs=[ gr.Textbox(lines=10, placeholder="Enter the conversation here...", label="Conversation"), gr.Textbox(lines=2, placeholder="Enter custom classification labels (comma-separated, optional)", label="Custom Labels") ], outputs=[ gr.Textbox(label="Generated Summary"), gr.Textbox(label="Issue Category"), gr.Textbox(label="Generated Topic"), gr.Textbox(label="Intent"), gr.Textbox(label="Sentiment"), gr.Textbox(label="Resolution Status"), gr.JSON(label="Entities"), gr.Textbox(label="Emotion"), gr.Dataframe(label="Agent Metrics", type="pandas"), gr.Textbox(label="Score Explanation", interactive=False) ], title="Customer Support Conversation Analyzer", description=( "Analyze customer support conversations to extract sentiment, issue category, resolution status, " "entities, emotion, intent, agent metrics, topics, and summaries. You can also provide custom classification labels." ) ) # Launch the Gradio app interface.launch(debug=True)