|
"""
|
|
Claude-Powered Sentiment Analyzer
|
|
Advanced sentiment analysis using Anthropic Claude API
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime
|
|
import os
|
|
import json
|
|
from anthropic import AsyncAnthropic
|
|
import time
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class SentimentAnalyzer:
|
|
"""Advanced sentiment analysis using Claude and multiple models"""
|
|
|
|
def __init__(self):
|
|
"""Initialize sentiment analyzer with Claude API"""
|
|
self.claude_client = None
|
|
self.setup_claude_client()
|
|
|
|
def setup_claude_client(self):
|
|
"""Setup Claude API client"""
|
|
try:
|
|
api_key = os.getenv("ANTHROPIC_API_KEY", "hackathon_demo_key")
|
|
self.claude_client = AsyncAnthropic(api_key=api_key)
|
|
logger.info("Claude API client initialized successfully")
|
|
except Exception as e:
|
|
logger.warning(f"Claude API setup failed: {str(e)}")
|
|
self.claude_client = None
|
|
|
|
async def analyze_sentiment_with_claude(self, texts: List[str], context: str = "") -> Dict[str, Any]:
|
|
"""
|
|
Perform advanced sentiment analysis using Claude
|
|
|
|
Args:
|
|
texts: List of texts to analyze
|
|
context: Additional context for analysis
|
|
|
|
Returns:
|
|
Comprehensive sentiment analysis
|
|
"""
|
|
if not self.claude_client:
|
|
return self._get_demo_claude_analysis(texts, context)
|
|
|
|
try:
|
|
logger.info(f"Starting Claude sentiment analysis for {len(texts)} texts")
|
|
start_time = time.time()
|
|
|
|
|
|
analysis_prompt = self._create_sentiment_prompt(texts, context)
|
|
|
|
|
|
response = await self.claude_client.messages.create(
|
|
model="claude-3.5-sonnet-20241022",
|
|
max_tokens=2000,
|
|
messages=[{
|
|
"role": "user",
|
|
"content": analysis_prompt
|
|
}]
|
|
)
|
|
|
|
|
|
analysis_text = response.content[0].text
|
|
parsed_analysis = self._parse_claude_response(analysis_text)
|
|
|
|
|
|
parsed_analysis.update({
|
|
"analysis_method": "claude_3.5_sonnet",
|
|
"processing_time": time.time() - start_time,
|
|
"context": context,
|
|
"total_texts": len(texts)
|
|
})
|
|
|
|
logger.info(f"Claude sentiment analysis completed in {parsed_analysis['processing_time']:.2f}s")
|
|
return parsed_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in Claude sentiment analysis: {str(e)}")
|
|
return {"error": str(e)}
|
|
|
|
async def comprehensive_sentiment_analysis(self, data: Dict[str, Any], feature_description: str) -> Dict[str, Any]:
|
|
"""
|
|
Perform comprehensive sentiment analysis across all data sources
|
|
|
|
Args:
|
|
data: Combined data from all collectors
|
|
feature_description: Feature being analyzed
|
|
|
|
Returns:
|
|
Comprehensive sentiment analysis
|
|
"""
|
|
try:
|
|
|
|
source_texts = {}
|
|
|
|
|
|
if "apps" in data:
|
|
app_store_texts = []
|
|
for app_name, app_data in data["apps"].items():
|
|
if "reviews" in app_data:
|
|
for review in app_data["reviews"]:
|
|
text = f"{review.get('title', '')} {review.get('content', '')}".strip()
|
|
if text:
|
|
app_store_texts.append(text)
|
|
source_texts["app_store"] = app_store_texts
|
|
|
|
|
|
if "posts" in data:
|
|
reddit_texts = []
|
|
for post in data["posts"]:
|
|
text = f"{post.get('title', '')} {post.get('selftext', '')}".strip()
|
|
if text:
|
|
reddit_texts.append(text)
|
|
source_texts["reddit"] = reddit_texts
|
|
|
|
|
|
if "articles" in data:
|
|
news_texts = []
|
|
for article in data["articles"]:
|
|
text = f"{article.get('title', '')} {article.get('description', '')}".strip()
|
|
if text:
|
|
news_texts.append(text)
|
|
source_texts["news"] = news_texts
|
|
|
|
|
|
source_analyses = {}
|
|
for source_name, texts in source_texts.items():
|
|
if texts:
|
|
context = f"Analyzing {source_name} sentiment for feature: {feature_description}"
|
|
analysis = await self.analyze_sentiment_with_claude(texts[:50], context)
|
|
source_analyses[source_name] = analysis
|
|
|
|
|
|
comprehensive_analysis = self._synthesize_multi_source_sentiment(
|
|
source_analyses, feature_description
|
|
)
|
|
|
|
return comprehensive_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in comprehensive sentiment analysis: {str(e)}")
|
|
return {"error": str(e)}
|
|
|
|
def _create_sentiment_prompt(self, texts: List[str], context: str) -> str:
|
|
"""Create a comprehensive prompt for Claude sentiment analysis"""
|
|
|
|
texts_sample = texts[:20]
|
|
texts_str = "\\n".join([f"{i+1}. {text[:200]}..." if len(text) > 200 else f"{i+1}. {text}"
|
|
for i, text in enumerate(texts_sample)])
|
|
|
|
prompt = f"""
|
|
You are an expert market sentiment analyst. Analyze the following texts for sentiment regarding product features and market reception.
|
|
|
|
Context: {context}
|
|
|
|
Texts to analyze:
|
|
{texts_str}
|
|
|
|
Please provide a comprehensive analysis in JSON format with the following structure:
|
|
|
|
{{
|
|
"overall_sentiment": {{
|
|
"dominant_sentiment": "positive/negative/neutral",
|
|
"confidence": 0.0-1.0,
|
|
"sentiment_distribution": {{
|
|
"positive_count": number,
|
|
"negative_count": number,
|
|
"neutral_count": number,
|
|
"positive_percentage": number,
|
|
"negative_percentage": number,
|
|
"neutral_percentage": number
|
|
}}
|
|
}},
|
|
"key_themes": [
|
|
{{
|
|
"theme": "theme description",
|
|
"sentiment": "positive/negative/neutral",
|
|
"frequency": number,
|
|
"example_quotes": ["quote1", "quote2"]
|
|
}}
|
|
],
|
|
"sentiment_drivers": {{
|
|
"positive_drivers": ["driver1", "driver2"],
|
|
"negative_drivers": ["driver1", "driver2"],
|
|
"neutral_aspects": ["aspect1", "aspect2"]
|
|
}},
|
|
"market_insights": {{
|
|
"user_needs": ["need1", "need2"],
|
|
"pain_points": ["pain1", "pain2"],
|
|
"feature_requests": ["request1", "request2"],
|
|
"competitive_gaps": ["gap1", "gap2"]
|
|
}},
|
|
"confidence_indicators": {{
|
|
"sample_size": {len(texts)},
|
|
"text_quality": "high/medium/low",
|
|
"consistency": 0.0-1.0,
|
|
"reliability_score": 0.0-1.0
|
|
}}
|
|
}}
|
|
|
|
Focus on:
|
|
1. Identifying genuine user sentiment vs. promotional content
|
|
2. Extracting actionable market insights
|
|
3. Understanding feature-specific feedback
|
|
4. Assessing market readiness and demand
|
|
|
|
Provide only the JSON response, no additional text.
|
|
"""
|
|
return prompt
|
|
|
|
def _parse_claude_response(self, response_text: str) -> Dict[str, Any]:
|
|
"""Parse Claude's JSON response"""
|
|
try:
|
|
|
|
cleaned_text = response_text.strip()
|
|
|
|
|
|
start_idx = cleaned_text.find('{')
|
|
end_idx = cleaned_text.rfind('}') + 1
|
|
|
|
if start_idx != -1 and end_idx > start_idx:
|
|
json_str = cleaned_text[start_idx:end_idx]
|
|
parsed = json.loads(json_str)
|
|
return parsed
|
|
else:
|
|
raise ValueError("No JSON found in response")
|
|
|
|
except (json.JSONDecodeError, ValueError) as e:
|
|
logger.error(f"Error parsing Claude response: {str(e)}")
|
|
|
|
return {
|
|
"overall_sentiment": {
|
|
"dominant_sentiment": "neutral",
|
|
"confidence": 0.5,
|
|
"sentiment_distribution": {
|
|
"positive_count": 0,
|
|
"negative_count": 0,
|
|
"neutral_count": 1
|
|
}
|
|
},
|
|
"error": f"Failed to parse response: {str(e)}",
|
|
"raw_response": response_text[:500]
|
|
}
|
|
|
|
def _synthesize_multi_source_sentiment(self, source_analyses: Dict[str, Any],
|
|
feature_description: str) -> Dict[str, Any]:
|
|
"""Synthesize sentiment analysis from multiple sources"""
|
|
|
|
synthesis = {
|
|
"feature_description": feature_description,
|
|
"source_count": len(source_analyses),
|
|
"sources_analyzed": list(source_analyses.keys()),
|
|
"cross_source_sentiment": {
|
|
"consensus_sentiment": "neutral",
|
|
"confidence": 0.0,
|
|
"source_agreement": 0.0
|
|
},
|
|
"source_specific": source_analyses,
|
|
"unified_insights": {
|
|
"common_themes": [],
|
|
"divergent_opinions": [],
|
|
"market_opportunities": [],
|
|
"risk_factors": []
|
|
},
|
|
"recommendation": {
|
|
"overall_viability": "unknown",
|
|
"confidence_level": "low",
|
|
"key_considerations": []
|
|
},
|
|
"analyzed_at": datetime.now().isoformat()
|
|
}
|
|
|
|
|
|
valid_sources = [s for s in source_analyses.values() if "error" not in s]
|
|
|
|
if valid_sources:
|
|
|
|
total_positive = 0
|
|
total_negative = 0
|
|
total_neutral = 0
|
|
total_confidence = 0
|
|
|
|
for source_analysis in valid_sources:
|
|
overall = source_analysis.get("overall_sentiment", {})
|
|
dist = overall.get("sentiment_distribution", {})
|
|
|
|
total_positive += dist.get("positive_count", 0)
|
|
total_negative += dist.get("negative_count", 0)
|
|
total_neutral += dist.get("neutral_count", 0)
|
|
total_confidence += overall.get("confidence", 0)
|
|
|
|
total_samples = total_positive + total_negative + total_neutral
|
|
|
|
if total_samples > 0:
|
|
pos_pct = (total_positive / total_samples) * 100
|
|
neg_pct = (total_negative / total_samples) * 100
|
|
neu_pct = (total_neutral / total_samples) * 100
|
|
|
|
|
|
if pos_pct > 50:
|
|
consensus = "positive"
|
|
elif neg_pct > 40:
|
|
consensus = "negative"
|
|
else:
|
|
consensus = "neutral"
|
|
|
|
|
|
avg_confidence = total_confidence / len(valid_sources)
|
|
source_agreement = self._calculate_source_agreement(valid_sources)
|
|
|
|
synthesis["cross_source_sentiment"] = {
|
|
"consensus_sentiment": consensus,
|
|
"confidence": avg_confidence,
|
|
"source_agreement": source_agreement,
|
|
"sentiment_distribution": {
|
|
"positive_percentage": pos_pct,
|
|
"negative_percentage": neg_pct,
|
|
"neutral_percentage": neu_pct,
|
|
"total_samples": total_samples
|
|
}
|
|
}
|
|
|
|
|
|
synthesis["recommendation"] = self._generate_feature_recommendation(
|
|
consensus, avg_confidence, source_agreement, valid_sources
|
|
)
|
|
|
|
return synthesis
|
|
|
|
def _calculate_source_agreement(self, source_analyses: List[Dict[str, Any]]) -> float:
|
|
"""Calculate agreement between different sources"""
|
|
if len(source_analyses) < 2:
|
|
return 1.0
|
|
|
|
sentiments = []
|
|
for analysis in source_analyses:
|
|
sentiment = analysis.get("overall_sentiment", {}).get("dominant_sentiment", "neutral")
|
|
sentiments.append(sentiment)
|
|
|
|
|
|
from collections import Counter
|
|
sentiment_counts = Counter(sentiments)
|
|
max_agreement = max(sentiment_counts.values())
|
|
agreement = max_agreement / len(sentiments)
|
|
|
|
return agreement
|
|
|
|
def _generate_feature_recommendation(self, consensus: str, confidence: float,
|
|
agreement: float, analyses: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Generate feature recommendation based on sentiment analysis"""
|
|
|
|
|
|
if consensus == "positive" and confidence > 0.7 and agreement > 0.6:
|
|
viability = "high"
|
|
conf_level = "high"
|
|
elif consensus == "positive" and confidence > 0.5:
|
|
viability = "medium"
|
|
conf_level = "medium"
|
|
elif consensus == "negative" and confidence > 0.6:
|
|
viability = "low"
|
|
conf_level = "high"
|
|
else:
|
|
viability = "uncertain"
|
|
conf_level = "low"
|
|
|
|
|
|
considerations = []
|
|
for analysis in analyses:
|
|
insights = analysis.get("market_insights", {})
|
|
considerations.extend(insights.get("pain_points", []))
|
|
considerations.extend(insights.get("feature_requests", []))
|
|
|
|
|
|
considerations = list(set(considerations))[:5]
|
|
|
|
return {
|
|
"overall_viability": viability,
|
|
"confidence_level": conf_level,
|
|
"consensus_sentiment": consensus,
|
|
"key_considerations": considerations,
|
|
"recommendation_score": (confidence + agreement) / 2,
|
|
"sample_quality": "high" if len(analyses) >= 2 else "medium"
|
|
}
|
|
|
|
def _get_demo_claude_analysis(self, texts: List[str], context: str) -> Dict[str, Any]:
|
|
"""Generate demo analysis when Claude API is not available"""
|
|
import random
|
|
|
|
|
|
positive_themes = ["easy to use", "great features", "saves time", "reliable", "innovative"]
|
|
negative_themes = ["too expensive", "poor support", "bugs", "confusing interface", "missing features"]
|
|
neutral_themes = ["average performance", "basic functionality", "standard quality"]
|
|
|
|
|
|
total_texts = len(texts)
|
|
positive_count = random.randint(int(total_texts * 0.2), int(total_texts * 0.6))
|
|
negative_count = random.randint(int(total_texts * 0.1), int(total_texts * 0.4))
|
|
neutral_count = total_texts - positive_count - negative_count
|
|
|
|
dominant = "positive" if positive_count > negative_count and positive_count > neutral_count else \
|
|
"negative" if negative_count > neutral_count else "neutral"
|
|
|
|
return {
|
|
"overall_sentiment": {
|
|
"dominant_sentiment": dominant,
|
|
"confidence": random.uniform(0.6, 0.9),
|
|
"sentiment_distribution": {
|
|
"positive_count": positive_count,
|
|
"negative_count": negative_count,
|
|
"neutral_count": neutral_count,
|
|
"positive_percentage": (positive_count / total_texts) * 100,
|
|
"negative_percentage": (negative_count / total_texts) * 100,
|
|
"neutral_percentage": (neutral_count / total_texts) * 100
|
|
}
|
|
},
|
|
"key_themes": [
|
|
{
|
|
"theme": random.choice(positive_themes if dominant == "positive" else negative_themes),
|
|
"sentiment": dominant,
|
|
"frequency": random.randint(3, 8),
|
|
"example_quotes": [f"Demo quote about {context}"]
|
|
}
|
|
],
|
|
"sentiment_drivers": {
|
|
"positive_drivers": random.sample(positive_themes, 2),
|
|
"negative_drivers": random.sample(negative_themes, 2),
|
|
"neutral_aspects": random.sample(neutral_themes, 2)
|
|
},
|
|
"market_insights": {
|
|
"user_needs": ["better integration", "cost efficiency"],
|
|
"pain_points": ["complexity", "limited features"],
|
|
"feature_requests": ["mobile app", "API access"],
|
|
"competitive_gaps": ["automation", "user experience"]
|
|
},
|
|
"confidence_indicators": {
|
|
"sample_size": total_texts,
|
|
"text_quality": "medium",
|
|
"consistency": random.uniform(0.6, 0.9),
|
|
"reliability_score": random.uniform(0.7, 0.9)
|
|
},
|
|
"demo_data": True
|
|
}
|
|
|
|
|
|
async def test_sentiment_analyzer():
|
|
"""Test function for SentimentAnalyzer"""
|
|
analyzer = SentimentAnalyzer()
|
|
|
|
|
|
test_texts = [
|
|
"This feature would be amazing for our restaurant!",
|
|
"I don't think this is worth the cost",
|
|
"It's okay, nothing special but works fine",
|
|
"Excellent idea, we need this ASAP",
|
|
"Too complicated for small businesses"
|
|
]
|
|
|
|
print("Testing Claude sentiment analysis...")
|
|
result = await analyzer.analyze_sentiment_with_claude(
|
|
test_texts,
|
|
"AI voice ordering feature for restaurants"
|
|
)
|
|
print(f"Claude analysis: {result.get('overall_sentiment', {}).get('dominant_sentiment', 'unknown')} sentiment")
|
|
|
|
return result
|
|
|
|
if __name__ == "__main__":
|
|
|
|
asyncio.run(test_sentiment_analyzer())
|
|
|