Rakesh2205 commited on
Commit
bea8706
Β·
verified Β·
1 Parent(s): 3f8f986

Upload supervisor_agent.py

Browse files
Files changed (1) hide show
  1. supervisor_agent.py +428 -89
supervisor_agent.py CHANGED
@@ -16,6 +16,8 @@ load_dotenv()
16
 
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
18
  TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 
 
19
 
20
  if not GEMINI_API_KEY:
21
  raise ValueError("GEMINI_API_KEY not found in environment variables")
@@ -24,6 +26,17 @@ if not TAVILY_API_KEY:
24
 
25
  os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY
26
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Initialize Tavily client for real-time web search
28
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
29
 
@@ -47,39 +60,256 @@ class ProfileAnalysisResult(BaseModel):
47
 
48
  @tool
49
  def research_person_profile(first_name: str, last_name: str, known_company: str = "") -> Dict[str, Any]:
50
- """Research a person's current professional profile using real-time web search."""
51
-
52
  try:
53
- # Search for current professional information
54
- search_query = f'"{first_name} {last_name}" current job title company LinkedIn'
55
- search_results = tavily_client.search(
56
- query=search_query,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  search_depth="advanced",
58
- include_domains=["linkedin.com", "crunchbase.com", "zoominfo.com"],
59
  max_results=5
60
  )
61
-
62
- # Also search for recent news/articles about the person
63
- news_query = f'"{first_name} {last_name}" new job company change recent'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  news_results = tavily_client.search(
65
  query=news_query,
66
  search_depth="basic",
67
- include_domains=["techcrunch.com", "linkedin.com", "twitter.com"],
68
  max_results=3
69
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- # Return structured data, not hardcoded values
72
  return {
73
  "current_company": "Unknown", # Will be filled by AI analysis
74
  "current_title": "Unknown", # Will be filled by AI analysis
75
- "confidence": 0.7,
76
- "search_results": search_results.get("results", []),
77
  "news_results": news_results.get("results", []),
78
- "research_notes": f"AI analyzed {len(search_results.get('results', []))} search results and {len(news_results.get('results', []))} news articles"
 
 
79
  }
80
-
81
  except Exception as e:
82
- # Return Dict, not JSON string (fixes the type mismatch)
83
  return {
84
  "name": f"{first_name} {last_name}",
85
  "error": f"Search failed: {str(e)}",
@@ -88,38 +318,81 @@ def research_person_profile(first_name: str, last_name: str, known_company: str
88
 
89
  @tool
90
  def detect_job_change(person_name: str, previous_company: str, current_company: str) -> Dict[str, Any]:
91
- """Analyze if person has changed jobs using real-time company relationship research."""
92
 
93
  try:
94
- # Research company relationships and recent changes
95
- relationship_query = f'"{previous_company}" "{current_company}" merger acquisition rebranding subsidiary parent company relationship'
96
- relationship_results = tavily_client.search(
97
- query=relationship_query,
98
- search_depth="advanced",
99
- include_domains=["crunchbase.com", "linkedin.com", "wikipedia.org", "bloomberg.com"],
100
- max_results=5
101
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # Search for recent news about company changes
104
- news_query = f'"{previous_company}" "{current_company}" company change news announcement'
105
  news_results = tavily_client.search(
106
  query=news_query,
107
  search_depth="basic",
108
- include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "news.ycombinator.com"],
109
- max_results=3
110
  )
111
 
112
- # Return structured data for AI analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  return {
114
- "person": person_name,
115
- "previous_company": previous_company,
116
- "current_company": current_company,
117
  "job_change_detected": "Unknown", # Will be determined by AI
118
- "confidence": 0.8,
119
- "reason": "Requires AI analysis of search results",
120
- "relationship_search": relationship_results.get("results", []),
121
  "news_search": news_results.get("results", []),
122
- "ai_analysis": f"AI analyzed {len(relationship_results.get('results', []))} relationship results and {len(news_results.get('results', []))} news articles"
123
  }
124
 
125
  except Exception as e:
@@ -331,11 +604,17 @@ supervisor = create_supervisor(
331
  "icp_assessor (evaluates ICP fit based on current role), and "
332
  "email_finder (discovers business email patterns using company research). "
333
 
334
- "COORDINATION STRATEGY:"
335
- "1. Start with profile_researcher to get current employment info"
336
- "2. Use job_analyst to determine if there was a job change"
337
- "3. Use icp_assessor to evaluate ICP fit based on current role"
338
- "4. Use email_finder to discover business email at current company"
 
 
 
 
 
 
339
 
340
  "CRITICAL REQUIREMENT: After all agents complete their work, you MUST provide a FINAL SYNTHESIS "
341
  "that clearly states the following information in a structured format:"
@@ -346,8 +625,8 @@ supervisor = create_supervisor(
346
  "- Most Probable Business Email: [email]"
347
 
348
  "Each agent will provide search results that you need to analyze intelligently. "
349
- "Coordinate their research efforts sequentially and ensure each agent has the context "
350
- "they need from previous agents' findings. Your final synthesis is crucial for data extraction."
351
  )
352
  ).compile()
353
 
@@ -356,31 +635,69 @@ supervisor = create_supervisor(
356
  # =============================================================================
357
 
358
  def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> ProfileAnalysisResult:
359
- """Use AI to extract structured data from agent responses"""
360
-
361
- # Very simple, direct prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  extraction_prompt = f"""
363
- Extract profile data from this text. Return ONLY valid JSON:
364
-
365
- Text: {agent_responses[0]}
366
-
367
- JSON format:
368
- {{
369
- "currentCompany": "company name",
370
- "title": "job title",
371
- "isAJobChange": true/false,
372
- "isAnICP": true/false,
373
- "probableBusinessEmail": "email"
374
- }}
375
  """
376
-
377
  try:
378
  response = llm.invoke(extraction_prompt)
379
-
380
  if not response.content or not response.content.strip():
381
  raise ValueError("LLM returned empty response")
382
-
383
- # Clean response
384
  content = response.content.strip()
385
  if "```json" in content:
386
  start = content.find("```json") + 7
@@ -392,14 +709,20 @@ def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> Pro
392
  end = content.find("```", start)
393
  if end != -1:
394
  content = content[start:end]
395
-
396
  content = content.strip()
397
  print(f"πŸ” Cleaned Response: {content}")
398
-
399
- # Parse JSON
400
  extracted_data = json.loads(content)
401
-
402
- # Validate and create result
 
 
 
 
 
 
 
 
 
403
  return ProfileAnalysisResult(
404
  fn=profile_input.get("fn", ""),
405
  ln=profile_input.get("ln", ""),
@@ -409,18 +732,14 @@ def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> Pro
409
  isAnICP=bool(extracted_data.get("isAnICP", False)),
410
  probableBusinessEmail=extracted_data.get("probableBusinessEmail", "Unknown")
411
  )
412
-
413
  except Exception as e:
414
  print(f"❌ AI extraction failed: {e}")
415
-
416
- # Create fallback result instead of raising error
417
  fallback_email = f"{profile_input.get('fn', '').lower()}.{profile_input.get('ln', '').lower()}@{profile_input.get('company', 'company').lower().replace(' ', '')}.com"
418
-
419
  return ProfileAnalysisResult(
420
  fn=profile_input.get("fn", ""),
421
  ln=profile_input.get("ln", ""),
422
- currentCompany=profile_input.get("company", "Unknown"),
423
- title=profile_input.get("title", "Unknown"),
424
  isAJobChange=False,
425
  isAnICP=False,
426
  probableBusinessEmail=fallback_email
@@ -502,7 +821,7 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
502
  """Analyze profile with progress updates for Gradio UI"""
503
 
504
  try:
505
- progress(0.1, desc="πŸ” Initializing analysis...")
506
 
507
  # Create analysis request with specific instructions
508
  query = f"""
@@ -532,30 +851,45 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
532
  Use your specialized agents and provide complete results.
533
  """
534
 
535
- progress(0.2, desc="πŸ€– Starting LangGraph supervisor...")
536
 
537
  # Run supervisor with react agents and collect all results
538
  agent_results = {}
539
  all_messages = []
540
  agent_count = 0
 
 
541
 
542
  # Let LangGraph handle the flow control automatically
543
  for chunk in supervisor.stream({
544
  "messages": [{"role": "user", "content": query}]
545
  }):
546
  print(chunk)
 
547
 
548
- # Update progress based on agent activity
549
  for agent_name in ['profile_researcher', 'job_analyst', 'icp_assessor', 'email_finder']:
550
  if agent_name in chunk:
551
  if agent_name not in agent_results:
552
  agent_results[agent_name] = chunk[agent_name]
553
  agent_count += 1
554
- progress(0.2 + (agent_count * 0.15), desc=f"πŸ”„ {agent_name.replace('_', ' ').title()} working...")
 
 
 
 
 
 
 
 
 
555
 
556
- # Collect all messages for analysis
557
- if 'supervisor' in chunk and 'messages' in chunk['supervisor']:
558
- all_messages.extend(chunk['supervisor']['messages'])
 
 
 
559
 
560
  progress(0.8, desc="πŸ“Š Processing final results...")
561
 
@@ -586,7 +920,7 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
586
  agent_responses = [supervisor_synthesis]
587
  result = extract_data_with_ai(agent_responses, profile_input)
588
 
589
- progress(1.0, desc="βœ… Analysis complete!")
590
 
591
  return result
592
 
@@ -630,7 +964,7 @@ def main():
630
 
631
  print("\n" + "=" * 60)
632
 
633
- # Test Case 2: No Job Change (Rebranding BuyerAssist -> getboomerang.ai)
634
  test_case_2 = {
635
  "fn": "Amit",
636
  "ln": "Dugar",
@@ -641,12 +975,13 @@ def main():
641
  "icp": "The person has to be in senior position in Engineer Vertical like VP Engineering, CTO, Research Fellow"
642
  }
643
 
644
- print("πŸ“‹ TEST CASE 2 ")
645
-
 
646
 
647
  result2 = analyze_profile(test_case_2)
648
 
649
-
650
  print(json.dumps(result2.model_dump(), indent=2))
651
 
652
  return result1, result2
@@ -719,13 +1054,16 @@ with gr.Blocks(title="Profile Analyzer App", theme=gr.themes.Soft(), css="""
719
  # Status box (ultra-compact)
720
  status_box = gr.Textbox(
721
  label="πŸ”„ Status",
722
- value="Ready",
723
  lines=1,
724
  interactive=False,
725
  container=False,
726
  elem_classes=["status-box"]
727
  )
728
 
 
 
 
729
  # Output box (compact)
730
  output = gr.Textbox(
731
  label="πŸ“Š Analysis Result",
@@ -785,7 +1123,8 @@ with gr.Blocks(title="Profile Analyzer App", theme=gr.themes.Soft(), css="""
785
  analyze_btn.click(
786
  fn=analyze_profile_ui,
787
  inputs=[fn, ln, company, location, email, title, icp],
788
- outputs=[output, status_box]
 
789
  )
790
 
791
  # Launch the demo
 
16
 
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
18
  TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
19
+ LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
20
+ LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "profile-analyzer")
21
 
22
  if not GEMINI_API_KEY:
23
  raise ValueError("GEMINI_API_KEY not found in environment variables")
 
26
 
27
  os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY
28
 
29
+ # Configure LangSmith tracing for public viewing
30
+ if LANGSMITH_API_KEY:
31
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
32
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
33
+ os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
34
+ os.environ["LANGCHAIN_PROJECT"] = LANGSMITH_PROJECT
35
+ print(f"πŸ” LangSmith tracing enabled for project: {LANGSMITH_PROJECT}")
36
+ print(f"🌐 View runs at: https://smith.langchain.com/o/default/p/{LANGSMITH_PROJECT}")
37
+ else:
38
+ print("⚠️ LANGSMITH_API_KEY not set - tracing disabled")
39
+
40
  # Initialize Tavily client for real-time web search
41
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
42
 
 
60
 
61
  @tool
62
  def research_person_profile(first_name: str, last_name: str, known_company: str = "") -> Dict[str, Any]:
63
+ """Research a person's current professional profile using robust LinkedIn search and snippet parsing."""
64
+ import re
65
  try:
66
+ full_name = f"{first_name} {last_name}"
67
+ search_results = []
68
+ linkedin_profiles = []
69
+ parsed_current_company = None
70
+ parsed_current_title = None
71
+
72
+ # STRATEGY 1: Targeted LinkedIn search with company context
73
+ if known_company:
74
+ linkedin_query = f'"{full_name}" "{known_company}" LinkedIn current job title'
75
+ linkedin_results = tavily_client.search(
76
+ query=linkedin_query,
77
+ search_depth="advanced",
78
+ include_domains=["linkedin.com"],
79
+ max_results=3
80
+ )
81
+ search_results.extend(linkedin_results.get("results", []))
82
+
83
+ # STRATEGY 2: General LinkedIn profile search (ALWAYS DO THIS)
84
+ general_query = f'"{full_name}" LinkedIn'
85
+ general_results = tavily_client.search(
86
+ query=general_query,
87
  search_depth="advanced",
88
+ include_domains=["linkedin.com"],
89
  max_results=5
90
  )
91
+ search_results.extend(general_results.get("results", []))
92
+
93
+ # STRATEGY 3: Search for current company (BoomerangAI)
94
+ current_company_query = f'"{full_name}" BoomerangAI LinkedIn'
95
+ current_company_results = tavily_client.search(
96
+ query=current_company_query,
97
+ search_depth="advanced",
98
+ include_domains=["linkedin.com"],
99
+ max_results=3
100
+ )
101
+ search_results.extend(current_company_results.get("results", []))
102
+
103
+ # STRATEGY 4: Location-based search (Pune)
104
+ location_query = f'"{full_name}" Pune LinkedIn'
105
+ location_results = tavily_client.search(
106
+ query=location_query,
107
+ search_depth="advanced",
108
+ include_domains=["linkedin.com"],
109
+ max_results=3
110
+ )
111
+ search_results.extend(location_results.get("results", []))
112
+
113
+ # STRATEGY 5: Company-specific search (if we know the company)
114
+ if known_company:
115
+ company_query = f'"{full_name}" "{known_company}" employee current role'
116
+ company_results = tavily_client.search(
117
+ query=company_query,
118
+ search_depth="advanced",
119
+ include_domains=["linkedin.com", "crunchbase.com", "zoominfo.com"],
120
+ max_results=3
121
+ )
122
+ search_results.extend(company_results.get("results", []))
123
+
124
+ # STRATEGY 6: Recent news and job changes
125
+ news_query = f'"{full_name}" new job company change recent'
126
  news_results = tavily_client.search(
127
  query=news_query,
128
  search_depth="basic",
129
+ include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "bloomberg.com"],
130
  max_results=3
131
  )
132
+
133
+ # Remove duplicates and combine all results
134
+ unique_results = []
135
+ seen_urls = set()
136
+ for result in search_results:
137
+ url = result.get("url", "")
138
+ if url not in seen_urls:
139
+ unique_results.append(result)
140
+ seen_urls.add(url)
141
+ # Prioritize LinkedIn profile URLs
142
+ if "linkedin.com/in/" in url:
143
+ linkedin_profiles.append(result)
144
+
145
+ print(f"πŸ” Found {len(linkedin_profiles)} LinkedIn profiles for {full_name}")
146
+
147
+ # Robust snippet parsing for 'Present'/'Current' in LinkedIn profile results
148
+ for i, profile in enumerate(linkedin_profiles):
149
+ snippet = profile.get('snippet', '') or profile.get('description', '') or profile.get('content', '')
150
+ print(f"πŸ“„ Profile {i+1}: {profile.get('url', 'No URL')}")
151
+ print(f"πŸ“ Snippet: {snippet[:200]}...")
152
+
153
+ # Look for 'Present' or 'Current' in the snippet (Experience section)
154
+ # Pattern 1: "Co-Founder at BoomerangAI Β· Full-time Β· Jun 2023 - Present"
155
+ match = re.search(r'([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)[^\n]*Present', snippet)
156
+ if match:
157
+ parsed_current_title = match.group(1).strip(':-,|@')
158
+ parsed_current_company = match.group(2).strip(':-,|@')
159
+ print(f"βœ… Found Present role: {parsed_current_title} at {parsed_current_company}")
160
+ break
161
+
162
+ # Pattern 2: "Current: Title at Company"
163
+ match2 = re.search(r'Current: ([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)', snippet)
164
+ if match2:
165
+ parsed_current_title = match2.group(1).strip(':-,|@')
166
+ parsed_current_company = match2.group(2).strip(':-,|@')
167
+ print(f"βœ… Found Current role: {parsed_current_title} at {parsed_current_company}")
168
+ break
169
+
170
+ # Pattern 3: "at Company (Present)"
171
+ match3 = re.search(r'at ([A-Za-z0-9\- &,.]+) \(Present\)', snippet)
172
+ if match3:
173
+ parsed_current_company = match3.group(1).strip()
174
+ parsed_current_title = ''
175
+ print(f"βœ… Found Present company: {parsed_current_company}")
176
+ break
177
+
178
+ # Pattern 4: Look for BoomerangAI specifically
179
+ if 'BoomerangAI' in snippet or 'Boomerang' in snippet:
180
+ # Try to extract title before BoomerangAI
181
+ match4 = re.search(r'([A-Za-z0-9\- &,.]+) at BoomerangAI', snippet)
182
+ if match4:
183
+ parsed_current_title = match4.group(1).strip(':-,|@')
184
+ parsed_current_company = 'BoomerangAI'
185
+ print(f"βœ… Found BoomerangAI role: {parsed_current_title} at {parsed_current_company}")
186
+ break
187
+
188
+ # FALLBACK: If no current role found in snippets, look for the right profile and use company data
189
+ if not parsed_current_company:
190
+ print("πŸ” Checking for correct profile based on location and company...")
191
+
192
+ # Intelligent profile matching based on multiple criteria
193
+ best_profile = None
194
+ best_score = 0
195
+
196
+ for profile in linkedin_profiles:
197
+ snippet = profile.get('snippet', '') or profile.get('description', '') or profile.get('content', '')
198
+ url = profile.get('url', '')
199
+
200
+ # Calculate profile relevance score
201
+ score = 0
202
+
203
+ # Location matching (Pune, Maharashtra, India)
204
+ if 'Pune' in snippet or 'Maharashtra' in snippet:
205
+ score += 3
206
+
207
+ # Company mentions in profile
208
+ if known_company and known_company.lower() in snippet.lower():
209
+ score += 2
210
+
211
+ # Profile completeness (has experience section)
212
+ if 'Experience' in snippet or 'Present' in snippet or 'Current' in snippet:
213
+ score += 2
214
+
215
+ # Profile activity (connections, followers)
216
+ if 'connections' in snippet.lower() or 'followers' in snippet.lower():
217
+ score += 1
218
+
219
+ # URL pattern (shorter URLs often indicate main profiles)
220
+ if len(url.split('/')) <= 5:
221
+ score += 1
222
+
223
+ print(f"πŸ“Š Profile score: {score} for {url}")
224
+
225
+ if score > best_score:
226
+ best_score = score
227
+ best_profile = profile
228
+
229
+ if best_profile and best_score >= 3:
230
+ print(f"βœ… Found best matching profile: {best_profile.get('url', 'No URL')} (score: {best_score})")
231
+
232
+ # Use Crunchbase data for current roles
233
+ for result in unique_results:
234
+ if 'crunchbase.com/person' in result.get('url', ''):
235
+ cb_content = result.get('content', '')
236
+ if full_name in cb_content:
237
+ print(f"πŸ” Crunchbase content: {cb_content[:200]}...")
238
+
239
+ # Extract current roles from Crunchbase
240
+ if 'current jobs' in cb_content.lower():
241
+ # Look for role patterns like "Co-Founder at Company"
242
+ role_matches = re.findall(r'Co-Founder at ([A-Za-z0-9\- &,.]+?)(?: and|\.|$)', cb_content)
243
+ if role_matches and len(role_matches) >= 2:
244
+ # Use the second role (most recent) as current
245
+ parsed_current_title = 'Co-Founder'
246
+ parsed_current_company = role_matches[1].strip()
247
+ print(f"βœ… Using Crunchbase data: {parsed_current_title} at {parsed_current_company}")
248
+ break
249
+ elif role_matches:
250
+ # Fallback: use the first role if only one found
251
+ parsed_current_title = 'Co-Founder'
252
+ parsed_current_company = role_matches[0].strip()
253
+ print(f"βœ… Using Crunchbase data (first role): {parsed_current_title} at {parsed_current_company}")
254
+ break
255
+ else:
256
+ # Fallback: Look for "Co-Founder @ Company" pattern
257
+ alt_matches = re.findall(r'([A-Za-z0-9\- &,.]+) @ ([A-Za-z0-9\- &,.]+)', cb_content)
258
+ if alt_matches:
259
+ parsed_current_title = alt_matches[0][0].strip()
260
+ parsed_current_company = alt_matches[0][1].strip()
261
+ print(f"βœ… Using Crunchbase data (alt): {parsed_current_title} at {parsed_current_company}")
262
+ break
263
+ else:
264
+ # Final fallback: Extract from the sentence structure
265
+ # Look for "has X current jobs as Role at Company"
266
+ sentence_match = re.search(r'has \d+ current jobs as ([^,]+) at ([^,.]+)', cb_content)
267
+ if sentence_match:
268
+ parsed_current_title = sentence_match.group(1).strip()
269
+ parsed_current_company = sentence_match.group(2).strip()
270
+ print(f"βœ… Using Crunchbase data (sentence): {parsed_current_title} at {parsed_current_company}")
271
+ break
272
+ else:
273
+ # Last resort: Extract the second role (most recent) from the sentence
274
+ # "Co-Founder at BuyerAssist and Co-Founder at BoomerangAI"
275
+ second_role_match = re.search(r'and ([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)', cb_content)
276
+ if second_role_match:
277
+ parsed_current_title = second_role_match.group(1).strip()
278
+ parsed_current_company = second_role_match.group(2).strip()
279
+ print(f"βœ… Using Crunchbase data (second role): {parsed_current_title} at {parsed_current_company}")
280
+ break
281
+
282
+ # If Crunchbase didn't work, check company page data
283
+ if not parsed_current_company:
284
+ for result in unique_results:
285
+ if 'linkedin.com/company/' in result.get('url', ''):
286
+ company_content = result.get('content', '')
287
+ if full_name in company_content:
288
+ # Extract role from company page
289
+ role_match = re.search(rf'{full_name} \(([^)]+)\)', company_content)
290
+ if role_match:
291
+ parsed_current_title = role_match.group(1).strip()
292
+ # Extract company name from URL
293
+ company_url = result.get('url', '')
294
+ company_name = company_url.split('/company/')[-1].split('/')[0]
295
+ parsed_current_company = company_name.replace('-', ' ').title()
296
+ print(f"βœ… Using company page data: {parsed_current_title} at {parsed_current_company}")
297
+ break
298
+
299
+ if not parsed_current_company:
300
+ print("❌ No current company found in LinkedIn profiles")
301
 
 
302
  return {
303
  "current_company": "Unknown", # Will be filled by AI analysis
304
  "current_title": "Unknown", # Will be filled by AI analysis
305
+ "confidence": 0.8,
306
+ "search_results": unique_results,
307
  "news_results": news_results.get("results", []),
308
+ "parsed_current_company": parsed_current_company,
309
+ "parsed_current_title": parsed_current_title,
310
+ "research_notes": f"Multi-strategy search: {len(unique_results)} unique results, {len(news_results.get('results', []))} news articles. Strategies: LinkedIn targeted, general profile, BoomerangAI search, Pune location, company-specific, news"
311
  }
 
312
  except Exception as e:
 
313
  return {
314
  "name": f"{first_name} {last_name}",
315
  "error": f"Search failed: {str(e)}",
 
318
 
319
  @tool
320
  def detect_job_change(person_name: str, previous_company: str, current_company: str) -> Dict[str, Any]:
321
+ """Analyze if person has changed jobs using comprehensive company relationship research."""
322
 
323
  try:
324
+ search_results = []
325
+
326
+ # STRATEGY 1: Direct company relationship research
327
+ if previous_company and current_company:
328
+ relationship_query = f'"{previous_company}" "{current_company}" merger acquisition rebranding subsidiary parent company relationship'
329
+ relationship_results = tavily_client.search(
330
+ query=relationship_query,
331
+ search_depth="advanced",
332
+ include_domains=["crunchbase.com", "linkedin.com", "wikipedia.org", "bloomberg.com"],
333
+ max_results=5
334
+ )
335
+ search_results.extend(relationship_results.get("results", []))
336
+
337
+ # STRATEGY 2: Individual company research (for rebranding detection)
338
+ if previous_company:
339
+ previous_company_query = f'"{previous_company}" company rebranding acquisition merger current name'
340
+ previous_results = tavily_client.search(
341
+ query=previous_company_query,
342
+ search_depth="advanced",
343
+ include_domains=["crunchbase.com", "linkedin.com", "bloomberg.com", "techcrunch.com"],
344
+ max_results=3
345
+ )
346
+ search_results.extend(previous_results.get("results", []))
347
+
348
+ # STRATEGY 3: Current company research (for acquisition detection)
349
+ if current_company:
350
+ current_company_query = f'"{current_company}" company history acquisition merger previous names'
351
+ current_results = tavily_client.search(
352
+ query=current_company_query,
353
+ search_depth="advanced",
354
+ include_domains=["crunchbase.com", "linkedin.com", "wikipedia.org", "bloomberg.com"],
355
+ max_results=3
356
+ )
357
+ search_results.extend(current_results.get("results", []))
358
 
359
+ # STRATEGY 4: Recent news about company changes
360
+ news_query = f'"{previous_company}" "{current_company}" company change news announcement rebranding'
361
  news_results = tavily_client.search(
362
  query=news_query,
363
  search_depth="basic",
364
+ include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "bloomberg.com", "news.ycombinator.com"],
365
+ max_results=5
366
  )
367
 
368
+ # STRATEGY 5: Industry-specific research (for sector changes)
369
+ industry_query = f'"{person_name}" job change company transition industry'
370
+ industry_results = tavily_client.search(
371
+ query=industry_query,
372
+ search_depth="basic",
373
+ include_domains=["linkedin.com", "techcrunch.com"],
374
+ max_results=2
375
+ )
376
+ search_results.extend(industry_results.get("results", []))
377
+
378
+ # Remove duplicates
379
+ unique_results = []
380
+ seen_urls = set()
381
+ for result in search_results:
382
+ if result.get("url") not in seen_urls:
383
+ unique_results.append(result)
384
+ seen_urls.add(result.get("url"))
385
+
386
  return {
387
+ "person": person_name,
388
+ "previous_company": previous_company,
389
+ "current_company": current_company,
390
  "job_change_detected": "Unknown", # Will be determined by AI
391
+ "confidence": 0.9,
392
+ "reason": "Requires AI analysis of comprehensive search results",
393
+ "relationship_search": unique_results,
394
  "news_search": news_results.get("results", []),
395
+ "ai_analysis": f"Multi-strategy company research: {len(unique_results)} unique results, {len(news_results.get('results', []))} news articles. Strategies: direct relationships, individual company history, recent news, industry transitions"
396
  }
397
 
398
  except Exception as e:
 
604
  "icp_assessor (evaluates ICP fit based on current role), and "
605
  "email_finder (discovers business email patterns using company research). "
606
 
607
+ "INTELLIGENT COORDINATION STRATEGY:"
608
+ "1. ALWAYS start with profile_researcher to get current employment info - this is your primary data source"
609
+ "2. Use profile_researcher's findings to determine if you need job_analyst (only if there's a potential company change)"
610
+ "3. Use icp_assessor to evaluate ICP fit based on the CURRENT role discovered by profile_researcher"
611
+ "4. Use email_finder to discover business email at the CURRENT company (not the old one)"
612
+
613
+ "SMART DECISION MAKING:"
614
+ "- If profile_researcher finds the person at the same company (even if rebranded), skip job_analyst"
615
+ "- If profile_researcher finds a completely different company, use job_analyst to understand the transition"
616
+ "- Always prioritize profile_researcher's findings over input data - it has the most current information"
617
+ "- Use job_analyst only when there's ambiguity about company relationships or transitions"
618
 
619
  "CRITICAL REQUIREMENT: After all agents complete their work, you MUST provide a FINAL SYNTHESIS "
620
  "that clearly states the following information in a structured format:"
 
625
  "- Most Probable Business Email: [email]"
626
 
627
  "Each agent will provide search results that you need to analyze intelligently. "
628
+ "Coordinate their research efforts based on what profile_researcher discovers first. "
629
+ "Your final synthesis is crucial for data extraction."
630
  )
631
  ).compile()
632
 
 
635
  # =============================================================================
636
 
637
  def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> ProfileAnalysisResult:
638
+ """Use AI to extract structured data from agent responses, with pre-processing for 'Present'/'Current' roles."""
639
+ import re
640
+ import json
641
+
642
+ # Helper: Try to extract current company/title from search results
643
+ def extract_current_from_search(search_results):
644
+ for result in search_results:
645
+ snippet = result.get('snippet', '') or result.get('description', '')
646
+ # Look for 'Present' or 'Current' in the snippet
647
+ match = re.search(r'(?:Current|Present)[^:]*:?(.*?)( at | @ |\-|,|\n)([A-Za-z0-9 .&-]+)', snippet, re.IGNORECASE)
648
+ if match:
649
+ # Try to extract title and company
650
+ title = match.group(1).strip(':-,|@')
651
+ company = match.group(3).strip(':-,|@')
652
+ if title and company:
653
+ return company, title
654
+ # Fallback: Look for 'at <Company>'
655
+ match2 = re.search(r'at ([A-Za-z0-9 .&-]+)', snippet)
656
+ if match2:
657
+ company = match2.group(1).strip()
658
+ return company, ''
659
+ return None, None
660
+
661
+ # Try to get search_results and parsed_current_company/title from the agent_responses (if present)
662
+ search_results = []
663
+ parsed_current_company = None
664
+ parsed_current_title = None
665
+ try:
666
+ response_json = json.loads(agent_responses[0]) if isinstance(agent_responses[0], str) else agent_responses[0]
667
+ if isinstance(response_json, dict):
668
+ if 'search_results' in response_json:
669
+ search_results = response_json['search_results']
670
+ if response_json.get('parsed_current_company'):
671
+ parsed_current_company = response_json['parsed_current_company']
672
+ if response_json.get('parsed_current_title'):
673
+ parsed_current_title = response_json['parsed_current_title']
674
+ except Exception:
675
+ pass
676
+
677
+ # Fallback: try to get search_results from profile_input (if present)
678
+ if not search_results and 'search_results' in profile_input:
679
+ search_results = profile_input['search_results']
680
+
681
+ # Pre-process: Try to extract current company/title from search results
682
+ pre_company, pre_title = extract_current_from_search(search_results) if search_results else (None, None)
683
+
684
+ # Improved extraction prompt
685
  extraction_prompt = f"""
686
+ Given the following agent response, extract ONLY the most recent/current company and job title for the person named {profile_input.get('fn')} {profile_input.get('ln')}.
687
+ - Ignore any past roles or companies.
688
+ - If the text mentions 'Present', 'Current', or similar, use that company and title.
689
+ - If multiple companies are listed, pick the one with the most recent start date or marked as 'Present'.
690
+ - Return a JSON object with 'currentCompany', 'title', 'isAJobChange', 'isAnICP', and 'probableBusinessEmail' fields.
691
+ - If you see Bloomberg as the current company, use it even if the query was for BuyerAssist.
692
+
693
+ Agent Response:
694
+ {agent_responses[0]}
 
 
 
695
  """
696
+
697
  try:
698
  response = llm.invoke(extraction_prompt)
 
699
  if not response.content or not response.content.strip():
700
  raise ValueError("LLM returned empty response")
 
 
701
  content = response.content.strip()
702
  if "```json" in content:
703
  start = content.find("```json") + 7
 
709
  end = content.find("```", start)
710
  if end != -1:
711
  content = content[start:end]
 
712
  content = content.strip()
713
  print(f"πŸ” Cleaned Response: {content}")
 
 
714
  extracted_data = json.loads(content)
715
+
716
+ # Highest priority: Use parsed_current_company/title from snippet parsing if present
717
+ if parsed_current_company:
718
+ extracted_data['currentCompany'] = parsed_current_company
719
+ if parsed_current_title:
720
+ extracted_data['title'] = parsed_current_title
721
+ # Next priority: Use regex pre-processing if found
722
+ elif pre_company and pre_title:
723
+ extracted_data['currentCompany'] = pre_company
724
+ extracted_data['title'] = pre_title
725
+
726
  return ProfileAnalysisResult(
727
  fn=profile_input.get("fn", ""),
728
  ln=profile_input.get("ln", ""),
 
732
  isAnICP=bool(extracted_data.get("isAnICP", False)),
733
  probableBusinessEmail=extracted_data.get("probableBusinessEmail", "Unknown")
734
  )
 
735
  except Exception as e:
736
  print(f"❌ AI extraction failed: {e}")
 
 
737
  fallback_email = f"{profile_input.get('fn', '').lower()}.{profile_input.get('ln', '').lower()}@{profile_input.get('company', 'company').lower().replace(' ', '')}.com"
 
738
  return ProfileAnalysisResult(
739
  fn=profile_input.get("fn", ""),
740
  ln=profile_input.get("ln", ""),
741
+ currentCompany=parsed_current_company or pre_company or profile_input.get("company", "Unknown"),
742
+ title=parsed_current_title or pre_title or profile_input.get("title", "Unknown"),
743
  isAJobChange=False,
744
  isAnICP=False,
745
  probableBusinessEmail=fallback_email
 
821
  """Analyze profile with progress updates for Gradio UI"""
822
 
823
  try:
824
+ progress(0.05, desc="πŸ” Initializing analysis...")
825
 
826
  # Create analysis request with specific instructions
827
  query = f"""
 
851
  Use your specialized agents and provide complete results.
852
  """
853
 
854
+ progress(0.1, desc="πŸ€– Starting LangGraph supervisor...")
855
 
856
  # Run supervisor with react agents and collect all results
857
  agent_results = {}
858
  all_messages = []
859
  agent_count = 0
860
+ tool_count = 0
861
+ step_count = 0
862
 
863
  # Let LangGraph handle the flow control automatically
864
  for chunk in supervisor.stream({
865
  "messages": [{"role": "user", "content": query}]
866
  }):
867
  print(chunk)
868
+ step_count += 1
869
 
870
+ # Track agent executions with detailed progress
871
  for agent_name in ['profile_researcher', 'job_analyst', 'icp_assessor', 'email_finder']:
872
  if agent_name in chunk:
873
  if agent_name not in agent_results:
874
  agent_results[agent_name] = chunk[agent_name]
875
  agent_count += 1
876
+ progress(0.1 + (agent_count * 0.15), desc=f"πŸ”„ {agent_name.replace('_', ' ').title()} executing...")
877
+
878
+ # Track tool executions within each agent
879
+ agent_data = chunk[agent_name]
880
+ if hasattr(agent_data, 'messages') and agent_data.messages:
881
+ for msg in agent_data.messages:
882
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
883
+ tool_count += len(msg.tool_calls)
884
+ progress(0.1 + (agent_count * 0.15) + (tool_count * 0.02),
885
+ desc=f"πŸ”„ {agent_name.replace('_', ' ').title()} - Tool {tool_count} executing...")
886
 
887
+ # Track supervisor decisions
888
+ if 'supervisor' in chunk:
889
+ if 'messages' in chunk['supervisor']:
890
+ all_messages.extend(chunk['supervisor']['messages'])
891
+ progress(0.1 + (agent_count * 0.15) + (tool_count * 0.02) + (step_count * 0.01),
892
+ desc=f"🧠 Supervisor coordinating step {step_count}...")
893
 
894
  progress(0.8, desc="πŸ“Š Processing final results...")
895
 
 
920
  agent_responses = [supervisor_synthesis]
921
  result = extract_data_with_ai(agent_responses, profile_input)
922
 
923
+ progress(1.0, desc=f"βœ… Analysis complete! Executed {agent_count} agents, {tool_count} tools, {step_count} steps")
924
 
925
  return result
926
 
 
964
 
965
  print("\n" + "=" * 60)
966
 
967
+ # Test Case 2: Real Job Change (BuyerAssist -> Bloomberg)
968
  test_case_2 = {
969
  "fn": "Amit",
970
  "ln": "Dugar",
 
975
  "icp": "The person has to be in senior position in Engineer Vertical like VP Engineering, CTO, Research Fellow"
976
  }
977
 
978
+ print("πŸ“‹ TEST CASE 2 - Real Job Change (BuyerAssist -> Bloomberg)")
979
+ print(f"Input: {json.dumps(test_case_2, indent=2)}")
980
+ print("-" * 60)
981
 
982
  result2 = analyze_profile(test_case_2)
983
 
984
+ print("\nπŸ“Š RESULT 2:")
985
  print(json.dumps(result2.model_dump(), indent=2))
986
 
987
  return result1, result2
 
1054
  # Status box (ultra-compact)
1055
  status_box = gr.Textbox(
1056
  label="πŸ”„ Status",
1057
+ value="Ready - Click Analyze to start",
1058
  lines=1,
1059
  interactive=False,
1060
  container=False,
1061
  elem_classes=["status-box"]
1062
  )
1063
 
1064
+ # Progress bar for visual feedback
1065
+ progress_bar = gr.Progress()
1066
+
1067
  # Output box (compact)
1068
  output = gr.Textbox(
1069
  label="πŸ“Š Analysis Result",
 
1123
  analyze_btn.click(
1124
  fn=analyze_profile_ui,
1125
  inputs=[fn, ln, company, location, email, title, icp],
1126
+ outputs=[output, status_box],
1127
+ show_progress=True
1128
  )
1129
 
1130
  # Launch the demo