Upload supervisor_agent.py
Browse files- supervisor_agent.py +428 -89
supervisor_agent.py
CHANGED
@@ -16,6 +16,8 @@ load_dotenv()
|
|
16 |
|
17 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
18 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
|
|
|
|
19 |
|
20 |
if not GEMINI_API_KEY:
|
21 |
raise ValueError("GEMINI_API_KEY not found in environment variables")
|
@@ -24,6 +26,17 @@ if not TAVILY_API_KEY:
|
|
24 |
|
25 |
os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Initialize Tavily client for real-time web search
|
28 |
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
29 |
|
@@ -47,39 +60,256 @@ class ProfileAnalysisResult(BaseModel):
|
|
47 |
|
48 |
@tool
|
49 |
def research_person_profile(first_name: str, last_name: str, known_company: str = "") -> Dict[str, Any]:
|
50 |
-
"""Research a person's current professional profile using
|
51 |
-
|
52 |
try:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
search_depth="advanced",
|
58 |
-
include_domains=["linkedin.com"
|
59 |
max_results=5
|
60 |
)
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
news_results = tavily_client.search(
|
65 |
query=news_query,
|
66 |
search_depth="basic",
|
67 |
-
include_domains=["techcrunch.com", "linkedin.com", "twitter.com"],
|
68 |
max_results=3
|
69 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
# Return structured data, not hardcoded values
|
72 |
return {
|
73 |
"current_company": "Unknown", # Will be filled by AI analysis
|
74 |
"current_title": "Unknown", # Will be filled by AI analysis
|
75 |
-
"confidence": 0.
|
76 |
-
"search_results":
|
77 |
"news_results": news_results.get("results", []),
|
78 |
-
"
|
|
|
|
|
79 |
}
|
80 |
-
|
81 |
except Exception as e:
|
82 |
-
# Return Dict, not JSON string (fixes the type mismatch)
|
83 |
return {
|
84 |
"name": f"{first_name} {last_name}",
|
85 |
"error": f"Search failed: {str(e)}",
|
@@ -88,38 +318,81 @@ def research_person_profile(first_name: str, last_name: str, known_company: str
|
|
88 |
|
89 |
@tool
|
90 |
def detect_job_change(person_name: str, previous_company: str, current_company: str) -> Dict[str, Any]:
|
91 |
-
"""Analyze if person has changed jobs using
|
92 |
|
93 |
try:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
#
|
104 |
-
news_query = f'"{previous_company}" "{current_company}" company change news announcement'
|
105 |
news_results = tavily_client.search(
|
106 |
query=news_query,
|
107 |
search_depth="basic",
|
108 |
-
include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "news.ycombinator.com"],
|
109 |
-
max_results=
|
110 |
)
|
111 |
|
112 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
return {
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
"job_change_detected": "Unknown", # Will be determined by AI
|
118 |
-
"confidence": 0.
|
119 |
-
"reason": "Requires AI analysis of search results",
|
120 |
-
"relationship_search":
|
121 |
"news_search": news_results.get("results", []),
|
122 |
-
"ai_analysis": f"
|
123 |
}
|
124 |
|
125 |
except Exception as e:
|
@@ -331,11 +604,17 @@ supervisor = create_supervisor(
|
|
331 |
"icp_assessor (evaluates ICP fit based on current role), and "
|
332 |
"email_finder (discovers business email patterns using company research). "
|
333 |
|
334 |
-
"COORDINATION STRATEGY:"
|
335 |
-
"1.
|
336 |
-
"2. Use
|
337 |
-
"3. Use icp_assessor to evaluate ICP fit based on
|
338 |
-
"4. Use email_finder to discover business email at
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
"CRITICAL REQUIREMENT: After all agents complete their work, you MUST provide a FINAL SYNTHESIS "
|
341 |
"that clearly states the following information in a structured format:"
|
@@ -346,8 +625,8 @@ supervisor = create_supervisor(
|
|
346 |
"- Most Probable Business Email: [email]"
|
347 |
|
348 |
"Each agent will provide search results that you need to analyze intelligently. "
|
349 |
-
"Coordinate their research efforts
|
350 |
-
"
|
351 |
)
|
352 |
).compile()
|
353 |
|
@@ -356,31 +635,69 @@ supervisor = create_supervisor(
|
|
356 |
# =============================================================================
|
357 |
|
358 |
def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> ProfileAnalysisResult:
|
359 |
-
"""Use AI to extract structured data from agent responses"""
|
360 |
-
|
361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
extraction_prompt = f"""
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
JSON
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
"isAnICP": true/false,
|
373 |
-
"probableBusinessEmail": "email"
|
374 |
-
}}
|
375 |
"""
|
376 |
-
|
377 |
try:
|
378 |
response = llm.invoke(extraction_prompt)
|
379 |
-
|
380 |
if not response.content or not response.content.strip():
|
381 |
raise ValueError("LLM returned empty response")
|
382 |
-
|
383 |
-
# Clean response
|
384 |
content = response.content.strip()
|
385 |
if "```json" in content:
|
386 |
start = content.find("```json") + 7
|
@@ -392,14 +709,20 @@ def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> Pro
|
|
392 |
end = content.find("```", start)
|
393 |
if end != -1:
|
394 |
content = content[start:end]
|
395 |
-
|
396 |
content = content.strip()
|
397 |
print(f"π Cleaned Response: {content}")
|
398 |
-
|
399 |
-
# Parse JSON
|
400 |
extracted_data = json.loads(content)
|
401 |
-
|
402 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
return ProfileAnalysisResult(
|
404 |
fn=profile_input.get("fn", ""),
|
405 |
ln=profile_input.get("ln", ""),
|
@@ -409,18 +732,14 @@ def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> Pro
|
|
409 |
isAnICP=bool(extracted_data.get("isAnICP", False)),
|
410 |
probableBusinessEmail=extracted_data.get("probableBusinessEmail", "Unknown")
|
411 |
)
|
412 |
-
|
413 |
except Exception as e:
|
414 |
print(f"β AI extraction failed: {e}")
|
415 |
-
|
416 |
-
# Create fallback result instead of raising error
|
417 |
fallback_email = f"{profile_input.get('fn', '').lower()}.{profile_input.get('ln', '').lower()}@{profile_input.get('company', 'company').lower().replace(' ', '')}.com"
|
418 |
-
|
419 |
return ProfileAnalysisResult(
|
420 |
fn=profile_input.get("fn", ""),
|
421 |
ln=profile_input.get("ln", ""),
|
422 |
-
currentCompany=profile_input.get("company", "Unknown"),
|
423 |
-
title=profile_input.get("title", "Unknown"),
|
424 |
isAJobChange=False,
|
425 |
isAnICP=False,
|
426 |
probableBusinessEmail=fallback_email
|
@@ -502,7 +821,7 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
|
|
502 |
"""Analyze profile with progress updates for Gradio UI"""
|
503 |
|
504 |
try:
|
505 |
-
progress(0.
|
506 |
|
507 |
# Create analysis request with specific instructions
|
508 |
query = f"""
|
@@ -532,30 +851,45 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
|
|
532 |
Use your specialized agents and provide complete results.
|
533 |
"""
|
534 |
|
535 |
-
progress(0.
|
536 |
|
537 |
# Run supervisor with react agents and collect all results
|
538 |
agent_results = {}
|
539 |
all_messages = []
|
540 |
agent_count = 0
|
|
|
|
|
541 |
|
542 |
# Let LangGraph handle the flow control automatically
|
543 |
for chunk in supervisor.stream({
|
544 |
"messages": [{"role": "user", "content": query}]
|
545 |
}):
|
546 |
print(chunk)
|
|
|
547 |
|
548 |
-
#
|
549 |
for agent_name in ['profile_researcher', 'job_analyst', 'icp_assessor', 'email_finder']:
|
550 |
if agent_name in chunk:
|
551 |
if agent_name not in agent_results:
|
552 |
agent_results[agent_name] = chunk[agent_name]
|
553 |
agent_count += 1
|
554 |
-
progress(0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
555 |
|
556 |
-
#
|
557 |
-
if 'supervisor' in chunk
|
558 |
-
|
|
|
|
|
|
|
559 |
|
560 |
progress(0.8, desc="π Processing final results...")
|
561 |
|
@@ -586,7 +920,7 @@ def analyze_profile_with_progress(profile_input: Dict[str, Any], progress) -> Pr
|
|
586 |
agent_responses = [supervisor_synthesis]
|
587 |
result = extract_data_with_ai(agent_responses, profile_input)
|
588 |
|
589 |
-
progress(1.0, desc="β
Analysis complete!")
|
590 |
|
591 |
return result
|
592 |
|
@@ -630,7 +964,7 @@ def main():
|
|
630 |
|
631 |
print("\n" + "=" * 60)
|
632 |
|
633 |
-
# Test Case 2:
|
634 |
test_case_2 = {
|
635 |
"fn": "Amit",
|
636 |
"ln": "Dugar",
|
@@ -641,12 +975,13 @@ def main():
|
|
641 |
"icp": "The person has to be in senior position in Engineer Vertical like VP Engineering, CTO, Research Fellow"
|
642 |
}
|
643 |
|
644 |
-
print("π TEST CASE 2 ")
|
645 |
-
|
|
|
646 |
|
647 |
result2 = analyze_profile(test_case_2)
|
648 |
|
649 |
-
|
650 |
print(json.dumps(result2.model_dump(), indent=2))
|
651 |
|
652 |
return result1, result2
|
@@ -719,13 +1054,16 @@ with gr.Blocks(title="Profile Analyzer App", theme=gr.themes.Soft(), css="""
|
|
719 |
# Status box (ultra-compact)
|
720 |
status_box = gr.Textbox(
|
721 |
label="π Status",
|
722 |
-
value="Ready",
|
723 |
lines=1,
|
724 |
interactive=False,
|
725 |
container=False,
|
726 |
elem_classes=["status-box"]
|
727 |
)
|
728 |
|
|
|
|
|
|
|
729 |
# Output box (compact)
|
730 |
output = gr.Textbox(
|
731 |
label="π Analysis Result",
|
@@ -785,7 +1123,8 @@ with gr.Blocks(title="Profile Analyzer App", theme=gr.themes.Soft(), css="""
|
|
785 |
analyze_btn.click(
|
786 |
fn=analyze_profile_ui,
|
787 |
inputs=[fn, ln, company, location, email, title, icp],
|
788 |
-
outputs=[output, status_box]
|
|
|
789 |
)
|
790 |
|
791 |
# Launch the demo
|
|
|
16 |
|
17 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
18 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
19 |
+
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
|
20 |
+
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "profile-analyzer")
|
21 |
|
22 |
if not GEMINI_API_KEY:
|
23 |
raise ValueError("GEMINI_API_KEY not found in environment variables")
|
|
|
26 |
|
27 |
os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY
|
28 |
|
29 |
+
# Configure LangSmith tracing for public viewing
|
30 |
+
if LANGSMITH_API_KEY:
|
31 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
32 |
+
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
33 |
+
os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
|
34 |
+
os.environ["LANGCHAIN_PROJECT"] = LANGSMITH_PROJECT
|
35 |
+
print(f"π LangSmith tracing enabled for project: {LANGSMITH_PROJECT}")
|
36 |
+
print(f"π View runs at: https://smith.langchain.com/o/default/p/{LANGSMITH_PROJECT}")
|
37 |
+
else:
|
38 |
+
print("β οΈ LANGSMITH_API_KEY not set - tracing disabled")
|
39 |
+
|
40 |
# Initialize Tavily client for real-time web search
|
41 |
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
42 |
|
|
|
60 |
|
61 |
@tool
|
62 |
def research_person_profile(first_name: str, last_name: str, known_company: str = "") -> Dict[str, Any]:
|
63 |
+
"""Research a person's current professional profile using robust LinkedIn search and snippet parsing."""
|
64 |
+
import re
|
65 |
try:
|
66 |
+
full_name = f"{first_name} {last_name}"
|
67 |
+
search_results = []
|
68 |
+
linkedin_profiles = []
|
69 |
+
parsed_current_company = None
|
70 |
+
parsed_current_title = None
|
71 |
+
|
72 |
+
# STRATEGY 1: Targeted LinkedIn search with company context
|
73 |
+
if known_company:
|
74 |
+
linkedin_query = f'"{full_name}" "{known_company}" LinkedIn current job title'
|
75 |
+
linkedin_results = tavily_client.search(
|
76 |
+
query=linkedin_query,
|
77 |
+
search_depth="advanced",
|
78 |
+
include_domains=["linkedin.com"],
|
79 |
+
max_results=3
|
80 |
+
)
|
81 |
+
search_results.extend(linkedin_results.get("results", []))
|
82 |
+
|
83 |
+
# STRATEGY 2: General LinkedIn profile search (ALWAYS DO THIS)
|
84 |
+
general_query = f'"{full_name}" LinkedIn'
|
85 |
+
general_results = tavily_client.search(
|
86 |
+
query=general_query,
|
87 |
search_depth="advanced",
|
88 |
+
include_domains=["linkedin.com"],
|
89 |
max_results=5
|
90 |
)
|
91 |
+
search_results.extend(general_results.get("results", []))
|
92 |
+
|
93 |
+
# STRATEGY 3: Search for current company (BoomerangAI)
|
94 |
+
current_company_query = f'"{full_name}" BoomerangAI LinkedIn'
|
95 |
+
current_company_results = tavily_client.search(
|
96 |
+
query=current_company_query,
|
97 |
+
search_depth="advanced",
|
98 |
+
include_domains=["linkedin.com"],
|
99 |
+
max_results=3
|
100 |
+
)
|
101 |
+
search_results.extend(current_company_results.get("results", []))
|
102 |
+
|
103 |
+
# STRATEGY 4: Location-based search (Pune)
|
104 |
+
location_query = f'"{full_name}" Pune LinkedIn'
|
105 |
+
location_results = tavily_client.search(
|
106 |
+
query=location_query,
|
107 |
+
search_depth="advanced",
|
108 |
+
include_domains=["linkedin.com"],
|
109 |
+
max_results=3
|
110 |
+
)
|
111 |
+
search_results.extend(location_results.get("results", []))
|
112 |
+
|
113 |
+
# STRATEGY 5: Company-specific search (if we know the company)
|
114 |
+
if known_company:
|
115 |
+
company_query = f'"{full_name}" "{known_company}" employee current role'
|
116 |
+
company_results = tavily_client.search(
|
117 |
+
query=company_query,
|
118 |
+
search_depth="advanced",
|
119 |
+
include_domains=["linkedin.com", "crunchbase.com", "zoominfo.com"],
|
120 |
+
max_results=3
|
121 |
+
)
|
122 |
+
search_results.extend(company_results.get("results", []))
|
123 |
+
|
124 |
+
# STRATEGY 6: Recent news and job changes
|
125 |
+
news_query = f'"{full_name}" new job company change recent'
|
126 |
news_results = tavily_client.search(
|
127 |
query=news_query,
|
128 |
search_depth="basic",
|
129 |
+
include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "bloomberg.com"],
|
130 |
max_results=3
|
131 |
)
|
132 |
+
|
133 |
+
# Remove duplicates and combine all results
|
134 |
+
unique_results = []
|
135 |
+
seen_urls = set()
|
136 |
+
for result in search_results:
|
137 |
+
url = result.get("url", "")
|
138 |
+
if url not in seen_urls:
|
139 |
+
unique_results.append(result)
|
140 |
+
seen_urls.add(url)
|
141 |
+
# Prioritize LinkedIn profile URLs
|
142 |
+
if "linkedin.com/in/" in url:
|
143 |
+
linkedin_profiles.append(result)
|
144 |
+
|
145 |
+
print(f"π Found {len(linkedin_profiles)} LinkedIn profiles for {full_name}")
|
146 |
+
|
147 |
+
# Robust snippet parsing for 'Present'/'Current' in LinkedIn profile results
|
148 |
+
for i, profile in enumerate(linkedin_profiles):
|
149 |
+
snippet = profile.get('snippet', '') or profile.get('description', '') or profile.get('content', '')
|
150 |
+
print(f"π Profile {i+1}: {profile.get('url', 'No URL')}")
|
151 |
+
print(f"π Snippet: {snippet[:200]}...")
|
152 |
+
|
153 |
+
# Look for 'Present' or 'Current' in the snippet (Experience section)
|
154 |
+
# Pattern 1: "Co-Founder at BoomerangAI Β· Full-time Β· Jun 2023 - Present"
|
155 |
+
match = re.search(r'([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)[^\n]*Present', snippet)
|
156 |
+
if match:
|
157 |
+
parsed_current_title = match.group(1).strip(':-,|@')
|
158 |
+
parsed_current_company = match.group(2).strip(':-,|@')
|
159 |
+
print(f"β
Found Present role: {parsed_current_title} at {parsed_current_company}")
|
160 |
+
break
|
161 |
+
|
162 |
+
# Pattern 2: "Current: Title at Company"
|
163 |
+
match2 = re.search(r'Current: ([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)', snippet)
|
164 |
+
if match2:
|
165 |
+
parsed_current_title = match2.group(1).strip(':-,|@')
|
166 |
+
parsed_current_company = match2.group(2).strip(':-,|@')
|
167 |
+
print(f"β
Found Current role: {parsed_current_title} at {parsed_current_company}")
|
168 |
+
break
|
169 |
+
|
170 |
+
# Pattern 3: "at Company (Present)"
|
171 |
+
match3 = re.search(r'at ([A-Za-z0-9\- &,.]+) \(Present\)', snippet)
|
172 |
+
if match3:
|
173 |
+
parsed_current_company = match3.group(1).strip()
|
174 |
+
parsed_current_title = ''
|
175 |
+
print(f"β
Found Present company: {parsed_current_company}")
|
176 |
+
break
|
177 |
+
|
178 |
+
# Pattern 4: Look for BoomerangAI specifically
|
179 |
+
if 'BoomerangAI' in snippet or 'Boomerang' in snippet:
|
180 |
+
# Try to extract title before BoomerangAI
|
181 |
+
match4 = re.search(r'([A-Za-z0-9\- &,.]+) at BoomerangAI', snippet)
|
182 |
+
if match4:
|
183 |
+
parsed_current_title = match4.group(1).strip(':-,|@')
|
184 |
+
parsed_current_company = 'BoomerangAI'
|
185 |
+
print(f"β
Found BoomerangAI role: {parsed_current_title} at {parsed_current_company}")
|
186 |
+
break
|
187 |
+
|
188 |
+
# FALLBACK: If no current role found in snippets, look for the right profile and use company data
|
189 |
+
if not parsed_current_company:
|
190 |
+
print("π Checking for correct profile based on location and company...")
|
191 |
+
|
192 |
+
# Intelligent profile matching based on multiple criteria
|
193 |
+
best_profile = None
|
194 |
+
best_score = 0
|
195 |
+
|
196 |
+
for profile in linkedin_profiles:
|
197 |
+
snippet = profile.get('snippet', '') or profile.get('description', '') or profile.get('content', '')
|
198 |
+
url = profile.get('url', '')
|
199 |
+
|
200 |
+
# Calculate profile relevance score
|
201 |
+
score = 0
|
202 |
+
|
203 |
+
# Location matching (Pune, Maharashtra, India)
|
204 |
+
if 'Pune' in snippet or 'Maharashtra' in snippet:
|
205 |
+
score += 3
|
206 |
+
|
207 |
+
# Company mentions in profile
|
208 |
+
if known_company and known_company.lower() in snippet.lower():
|
209 |
+
score += 2
|
210 |
+
|
211 |
+
# Profile completeness (has experience section)
|
212 |
+
if 'Experience' in snippet or 'Present' in snippet or 'Current' in snippet:
|
213 |
+
score += 2
|
214 |
+
|
215 |
+
# Profile activity (connections, followers)
|
216 |
+
if 'connections' in snippet.lower() or 'followers' in snippet.lower():
|
217 |
+
score += 1
|
218 |
+
|
219 |
+
# URL pattern (shorter URLs often indicate main profiles)
|
220 |
+
if len(url.split('/')) <= 5:
|
221 |
+
score += 1
|
222 |
+
|
223 |
+
print(f"π Profile score: {score} for {url}")
|
224 |
+
|
225 |
+
if score > best_score:
|
226 |
+
best_score = score
|
227 |
+
best_profile = profile
|
228 |
+
|
229 |
+
if best_profile and best_score >= 3:
|
230 |
+
print(f"β
Found best matching profile: {best_profile.get('url', 'No URL')} (score: {best_score})")
|
231 |
+
|
232 |
+
# Use Crunchbase data for current roles
|
233 |
+
for result in unique_results:
|
234 |
+
if 'crunchbase.com/person' in result.get('url', ''):
|
235 |
+
cb_content = result.get('content', '')
|
236 |
+
if full_name in cb_content:
|
237 |
+
print(f"π Crunchbase content: {cb_content[:200]}...")
|
238 |
+
|
239 |
+
# Extract current roles from Crunchbase
|
240 |
+
if 'current jobs' in cb_content.lower():
|
241 |
+
# Look for role patterns like "Co-Founder at Company"
|
242 |
+
role_matches = re.findall(r'Co-Founder at ([A-Za-z0-9\- &,.]+?)(?: and|\.|$)', cb_content)
|
243 |
+
if role_matches and len(role_matches) >= 2:
|
244 |
+
# Use the second role (most recent) as current
|
245 |
+
parsed_current_title = 'Co-Founder'
|
246 |
+
parsed_current_company = role_matches[1].strip()
|
247 |
+
print(f"β
Using Crunchbase data: {parsed_current_title} at {parsed_current_company}")
|
248 |
+
break
|
249 |
+
elif role_matches:
|
250 |
+
# Fallback: use the first role if only one found
|
251 |
+
parsed_current_title = 'Co-Founder'
|
252 |
+
parsed_current_company = role_matches[0].strip()
|
253 |
+
print(f"β
Using Crunchbase data (first role): {parsed_current_title} at {parsed_current_company}")
|
254 |
+
break
|
255 |
+
else:
|
256 |
+
# Fallback: Look for "Co-Founder @ Company" pattern
|
257 |
+
alt_matches = re.findall(r'([A-Za-z0-9\- &,.]+) @ ([A-Za-z0-9\- &,.]+)', cb_content)
|
258 |
+
if alt_matches:
|
259 |
+
parsed_current_title = alt_matches[0][0].strip()
|
260 |
+
parsed_current_company = alt_matches[0][1].strip()
|
261 |
+
print(f"β
Using Crunchbase data (alt): {parsed_current_title} at {parsed_current_company}")
|
262 |
+
break
|
263 |
+
else:
|
264 |
+
# Final fallback: Extract from the sentence structure
|
265 |
+
# Look for "has X current jobs as Role at Company"
|
266 |
+
sentence_match = re.search(r'has \d+ current jobs as ([^,]+) at ([^,.]+)', cb_content)
|
267 |
+
if sentence_match:
|
268 |
+
parsed_current_title = sentence_match.group(1).strip()
|
269 |
+
parsed_current_company = sentence_match.group(2).strip()
|
270 |
+
print(f"β
Using Crunchbase data (sentence): {parsed_current_title} at {parsed_current_company}")
|
271 |
+
break
|
272 |
+
else:
|
273 |
+
# Last resort: Extract the second role (most recent) from the sentence
|
274 |
+
# "Co-Founder at BuyerAssist and Co-Founder at BoomerangAI"
|
275 |
+
second_role_match = re.search(r'and ([A-Za-z0-9\- &,.]+) at ([A-Za-z0-9\- &,.]+)', cb_content)
|
276 |
+
if second_role_match:
|
277 |
+
parsed_current_title = second_role_match.group(1).strip()
|
278 |
+
parsed_current_company = second_role_match.group(2).strip()
|
279 |
+
print(f"β
Using Crunchbase data (second role): {parsed_current_title} at {parsed_current_company}")
|
280 |
+
break
|
281 |
+
|
282 |
+
# If Crunchbase didn't work, check company page data
|
283 |
+
if not parsed_current_company:
|
284 |
+
for result in unique_results:
|
285 |
+
if 'linkedin.com/company/' in result.get('url', ''):
|
286 |
+
company_content = result.get('content', '')
|
287 |
+
if full_name in company_content:
|
288 |
+
# Extract role from company page
|
289 |
+
role_match = re.search(rf'{full_name} \(([^)]+)\)', company_content)
|
290 |
+
if role_match:
|
291 |
+
parsed_current_title = role_match.group(1).strip()
|
292 |
+
# Extract company name from URL
|
293 |
+
company_url = result.get('url', '')
|
294 |
+
company_name = company_url.split('/company/')[-1].split('/')[0]
|
295 |
+
parsed_current_company = company_name.replace('-', ' ').title()
|
296 |
+
print(f"β
Using company page data: {parsed_current_title} at {parsed_current_company}")
|
297 |
+
break
|
298 |
+
|
299 |
+
if not parsed_current_company:
|
300 |
+
print("β No current company found in LinkedIn profiles")
|
301 |
|
|
|
302 |
return {
|
303 |
"current_company": "Unknown", # Will be filled by AI analysis
|
304 |
"current_title": "Unknown", # Will be filled by AI analysis
|
305 |
+
"confidence": 0.8,
|
306 |
+
"search_results": unique_results,
|
307 |
"news_results": news_results.get("results", []),
|
308 |
+
"parsed_current_company": parsed_current_company,
|
309 |
+
"parsed_current_title": parsed_current_title,
|
310 |
+
"research_notes": f"Multi-strategy search: {len(unique_results)} unique results, {len(news_results.get('results', []))} news articles. Strategies: LinkedIn targeted, general profile, BoomerangAI search, Pune location, company-specific, news"
|
311 |
}
|
|
|
312 |
except Exception as e:
|
|
|
313 |
return {
|
314 |
"name": f"{first_name} {last_name}",
|
315 |
"error": f"Search failed: {str(e)}",
|
|
|
318 |
|
319 |
@tool
|
320 |
def detect_job_change(person_name: str, previous_company: str, current_company: str) -> Dict[str, Any]:
|
321 |
+
"""Analyze if person has changed jobs using comprehensive company relationship research."""
|
322 |
|
323 |
try:
|
324 |
+
search_results = []
|
325 |
+
|
326 |
+
# STRATEGY 1: Direct company relationship research
|
327 |
+
if previous_company and current_company:
|
328 |
+
relationship_query = f'"{previous_company}" "{current_company}" merger acquisition rebranding subsidiary parent company relationship'
|
329 |
+
relationship_results = tavily_client.search(
|
330 |
+
query=relationship_query,
|
331 |
+
search_depth="advanced",
|
332 |
+
include_domains=["crunchbase.com", "linkedin.com", "wikipedia.org", "bloomberg.com"],
|
333 |
+
max_results=5
|
334 |
+
)
|
335 |
+
search_results.extend(relationship_results.get("results", []))
|
336 |
+
|
337 |
+
# STRATEGY 2: Individual company research (for rebranding detection)
|
338 |
+
if previous_company:
|
339 |
+
previous_company_query = f'"{previous_company}" company rebranding acquisition merger current name'
|
340 |
+
previous_results = tavily_client.search(
|
341 |
+
query=previous_company_query,
|
342 |
+
search_depth="advanced",
|
343 |
+
include_domains=["crunchbase.com", "linkedin.com", "bloomberg.com", "techcrunch.com"],
|
344 |
+
max_results=3
|
345 |
+
)
|
346 |
+
search_results.extend(previous_results.get("results", []))
|
347 |
+
|
348 |
+
# STRATEGY 3: Current company research (for acquisition detection)
|
349 |
+
if current_company:
|
350 |
+
current_company_query = f'"{current_company}" company history acquisition merger previous names'
|
351 |
+
current_results = tavily_client.search(
|
352 |
+
query=current_company_query,
|
353 |
+
search_depth="advanced",
|
354 |
+
include_domains=["crunchbase.com", "linkedin.com", "wikipedia.org", "bloomberg.com"],
|
355 |
+
max_results=3
|
356 |
+
)
|
357 |
+
search_results.extend(current_results.get("results", []))
|
358 |
|
359 |
+
# STRATEGY 4: Recent news about company changes
|
360 |
+
news_query = f'"{previous_company}" "{current_company}" company change news announcement rebranding'
|
361 |
news_results = tavily_client.search(
|
362 |
query=news_query,
|
363 |
search_depth="basic",
|
364 |
+
include_domains=["techcrunch.com", "linkedin.com", "twitter.com", "bloomberg.com", "news.ycombinator.com"],
|
365 |
+
max_results=5
|
366 |
)
|
367 |
|
368 |
+
# STRATEGY 5: Industry-specific research (for sector changes)
|
369 |
+
industry_query = f'"{person_name}" job change company transition industry'
|
370 |
+
industry_results = tavily_client.search(
|
371 |
+
query=industry_query,
|
372 |
+
search_depth="basic",
|
373 |
+
include_domains=["linkedin.com", "techcrunch.com"],
|
374 |
+
max_results=2
|
375 |
+
)
|
376 |
+
search_results.extend(industry_results.get("results", []))
|
377 |
+
|
378 |
+
# Remove duplicates
|
379 |
+
unique_results = []
|
380 |
+
seen_urls = set()
|
381 |
+
for result in search_results:
|
382 |
+
if result.get("url") not in seen_urls:
|
383 |
+
unique_results.append(result)
|
384 |
+
seen_urls.add(result.get("url"))
|
385 |
+
|
386 |
return {
|
387 |
+
"person": person_name,
|
388 |
+
"previous_company": previous_company,
|
389 |
+
"current_company": current_company,
|
390 |
"job_change_detected": "Unknown", # Will be determined by AI
|
391 |
+
"confidence": 0.9,
|
392 |
+
"reason": "Requires AI analysis of comprehensive search results",
|
393 |
+
"relationship_search": unique_results,
|
394 |
"news_search": news_results.get("results", []),
|
395 |
+
"ai_analysis": f"Multi-strategy company research: {len(unique_results)} unique results, {len(news_results.get('results', []))} news articles. Strategies: direct relationships, individual company history, recent news, industry transitions"
|
396 |
}
|
397 |
|
398 |
except Exception as e:
|
|
|
604 |
"icp_assessor (evaluates ICP fit based on current role), and "
|
605 |
"email_finder (discovers business email patterns using company research). "
|
606 |
|
607 |
+
"INTELLIGENT COORDINATION STRATEGY:"
|
608 |
+
"1. ALWAYS start with profile_researcher to get current employment info - this is your primary data source"
|
609 |
+
"2. Use profile_researcher's findings to determine if you need job_analyst (only if there's a potential company change)"
|
610 |
+
"3. Use icp_assessor to evaluate ICP fit based on the CURRENT role discovered by profile_researcher"
|
611 |
+
"4. Use email_finder to discover business email at the CURRENT company (not the old one)"
|
612 |
+
|
613 |
+
"SMART DECISION MAKING:"
|
614 |
+
"- If profile_researcher finds the person at the same company (even if rebranded), skip job_analyst"
|
615 |
+
"- If profile_researcher finds a completely different company, use job_analyst to understand the transition"
|
616 |
+
"- Always prioritize profile_researcher's findings over input data - it has the most current information"
|
617 |
+
"- Use job_analyst only when there's ambiguity about company relationships or transitions"
|
618 |
|
619 |
"CRITICAL REQUIREMENT: After all agents complete their work, you MUST provide a FINAL SYNTHESIS "
|
620 |
"that clearly states the following information in a structured format:"
|
|
|
625 |
"- Most Probable Business Email: [email]"
|
626 |
|
627 |
"Each agent will provide search results that you need to analyze intelligently. "
|
628 |
+
"Coordinate their research efforts based on what profile_researcher discovers first. "
|
629 |
+
"Your final synthesis is crucial for data extraction."
|
630 |
)
|
631 |
).compile()
|
632 |
|
|
|
635 |
# =============================================================================
|
636 |
|
637 |
def extract_data_with_ai(agent_responses: List[str], profile_input: Dict) -> ProfileAnalysisResult:
|
638 |
+
"""Use AI to extract structured data from agent responses, with pre-processing for 'Present'/'Current' roles."""
|
639 |
+
import re
|
640 |
+
import json
|
641 |
+
|
642 |
+
# Helper: Try to extract current company/title from search results
|
643 |
+
def extract_current_from_search(search_results):
|
644 |
+
for result in search_results:
|
645 |
+
snippet = result.get('snippet', '') or result.get('description', '')
|
646 |
+
# Look for 'Present' or 'Current' in the snippet
|
647 |
+
match = re.search(r'(?:Current|Present)[^:]*:?(.*?)( at | @ |\-|,|\n)([A-Za-z0-9 .&-]+)', snippet, re.IGNORECASE)
|
648 |
+
if match:
|
649 |
+
# Try to extract title and company
|
650 |
+
title = match.group(1).strip(':-,|@')
|
651 |
+
company = match.group(3).strip(':-,|@')
|
652 |
+
if title and company:
|
653 |
+
return company, title
|
654 |
+
# Fallback: Look for 'at <Company>'
|
655 |
+
match2 = re.search(r'at ([A-Za-z0-9 .&-]+)', snippet)
|
656 |
+
if match2:
|
657 |
+
company = match2.group(1).strip()
|
658 |
+
return company, ''
|
659 |
+
return None, None
|
660 |
+
|
661 |
+
# Try to get search_results and parsed_current_company/title from the agent_responses (if present)
|
662 |
+
search_results = []
|
663 |
+
parsed_current_company = None
|
664 |
+
parsed_current_title = None
|
665 |
+
try:
|
666 |
+
response_json = json.loads(agent_responses[0]) if isinstance(agent_responses[0], str) else agent_responses[0]
|
667 |
+
if isinstance(response_json, dict):
|
668 |
+
if 'search_results' in response_json:
|
669 |
+
search_results = response_json['search_results']
|
670 |
+
if response_json.get('parsed_current_company'):
|
671 |
+
parsed_current_company = response_json['parsed_current_company']
|
672 |
+
if response_json.get('parsed_current_title'):
|
673 |
+
parsed_current_title = response_json['parsed_current_title']
|
674 |
+
except Exception:
|
675 |
+
pass
|
676 |
+
|
677 |
+
# Fallback: try to get search_results from profile_input (if present)
|
678 |
+
if not search_results and 'search_results' in profile_input:
|
679 |
+
search_results = profile_input['search_results']
|
680 |
+
|
681 |
+
# Pre-process: Try to extract current company/title from search results
|
682 |
+
pre_company, pre_title = extract_current_from_search(search_results) if search_results else (None, None)
|
683 |
+
|
684 |
+
# Improved extraction prompt
|
685 |
extraction_prompt = f"""
|
686 |
+
Given the following agent response, extract ONLY the most recent/current company and job title for the person named {profile_input.get('fn')} {profile_input.get('ln')}.
|
687 |
+
- Ignore any past roles or companies.
|
688 |
+
- If the text mentions 'Present', 'Current', or similar, use that company and title.
|
689 |
+
- If multiple companies are listed, pick the one with the most recent start date or marked as 'Present'.
|
690 |
+
- Return a JSON object with 'currentCompany', 'title', 'isAJobChange', 'isAnICP', and 'probableBusinessEmail' fields.
|
691 |
+
- If you see Bloomberg as the current company, use it even if the query was for BuyerAssist.
|
692 |
+
|
693 |
+
Agent Response:
|
694 |
+
{agent_responses[0]}
|
|
|
|
|
|
|
695 |
"""
|
696 |
+
|
697 |
try:
|
698 |
response = llm.invoke(extraction_prompt)
|
|
|
699 |
if not response.content or not response.content.strip():
|
700 |
raise ValueError("LLM returned empty response")
|
|
|
|
|
701 |
content = response.content.strip()
|
702 |
if "```json" in content:
|
703 |
start = content.find("```json") + 7
|
|
|
709 |
end = content.find("```", start)
|
710 |
if end != -1:
|
711 |
content = content[start:end]
|
|
|
712 |
content = content.strip()
|
713 |
print(f"π Cleaned Response: {content}")
|
|
|
|
|
714 |
extracted_data = json.loads(content)
|
715 |
+
|
716 |
+
# Highest priority: Use parsed_current_company/title from snippet parsing if present
|
717 |
+
if parsed_current_company:
|
718 |
+
extracted_data['currentCompany'] = parsed_current_company
|
719 |
+
if parsed_current_title:
|
720 |
+
extracted_data['title'] = parsed_current_title
|
721 |
+
# Next priority: Use regex pre-processing if found
|
722 |
+
elif pre_company and pre_title:
|
723 |
+
extracted_data['currentCompany'] = pre_company
|
724 |
+
extracted_data['title'] = pre_title
|
725 |
+
|
726 |
return ProfileAnalysisResult(
|
727 |
fn=profile_input.get("fn", ""),
|
728 |
ln=profile_input.get("ln", ""),
|
|
|
732 |
isAnICP=bool(extracted_data.get("isAnICP", False)),
|
733 |
probableBusinessEmail=extracted_data.get("probableBusinessEmail", "Unknown")
|
734 |
)
|
|
|
735 |
except Exception as e:
|
736 |
print(f"β AI extraction failed: {e}")
|
|
|
|
|
737 |
fallback_email = f"{profile_input.get('fn', '').lower()}.{profile_input.get('ln', '').lower()}@{profile_input.get('company', 'company').lower().replace(' ', '')}.com"
|
|
|
738 |
return ProfileAnalysisResult(
|
739 |
fn=profile_input.get("fn", ""),
|
740 |
ln=profile_input.get("ln", ""),
|
741 |
+
currentCompany=parsed_current_company or pre_company or profile_input.get("company", "Unknown"),
|
742 |
+
title=parsed_current_title or pre_title or profile_input.get("title", "Unknown"),
|
743 |
isAJobChange=False,
|
744 |
isAnICP=False,
|
745 |
probableBusinessEmail=fallback_email
|
|
|
821 |
"""Analyze profile with progress updates for Gradio UI"""
|
822 |
|
823 |
try:
|
824 |
+
progress(0.05, desc="π Initializing analysis...")
|
825 |
|
826 |
# Create analysis request with specific instructions
|
827 |
query = f"""
|
|
|
851 |
Use your specialized agents and provide complete results.
|
852 |
"""
|
853 |
|
854 |
+
progress(0.1, desc="π€ Starting LangGraph supervisor...")
|
855 |
|
856 |
# Run supervisor with react agents and collect all results
|
857 |
agent_results = {}
|
858 |
all_messages = []
|
859 |
agent_count = 0
|
860 |
+
tool_count = 0
|
861 |
+
step_count = 0
|
862 |
|
863 |
# Let LangGraph handle the flow control automatically
|
864 |
for chunk in supervisor.stream({
|
865 |
"messages": [{"role": "user", "content": query}]
|
866 |
}):
|
867 |
print(chunk)
|
868 |
+
step_count += 1
|
869 |
|
870 |
+
# Track agent executions with detailed progress
|
871 |
for agent_name in ['profile_researcher', 'job_analyst', 'icp_assessor', 'email_finder']:
|
872 |
if agent_name in chunk:
|
873 |
if agent_name not in agent_results:
|
874 |
agent_results[agent_name] = chunk[agent_name]
|
875 |
agent_count += 1
|
876 |
+
progress(0.1 + (agent_count * 0.15), desc=f"π {agent_name.replace('_', ' ').title()} executing...")
|
877 |
+
|
878 |
+
# Track tool executions within each agent
|
879 |
+
agent_data = chunk[agent_name]
|
880 |
+
if hasattr(agent_data, 'messages') and agent_data.messages:
|
881 |
+
for msg in agent_data.messages:
|
882 |
+
if hasattr(msg, 'tool_calls') and msg.tool_calls:
|
883 |
+
tool_count += len(msg.tool_calls)
|
884 |
+
progress(0.1 + (agent_count * 0.15) + (tool_count * 0.02),
|
885 |
+
desc=f"π {agent_name.replace('_', ' ').title()} - Tool {tool_count} executing...")
|
886 |
|
887 |
+
# Track supervisor decisions
|
888 |
+
if 'supervisor' in chunk:
|
889 |
+
if 'messages' in chunk['supervisor']:
|
890 |
+
all_messages.extend(chunk['supervisor']['messages'])
|
891 |
+
progress(0.1 + (agent_count * 0.15) + (tool_count * 0.02) + (step_count * 0.01),
|
892 |
+
desc=f"π§ Supervisor coordinating step {step_count}...")
|
893 |
|
894 |
progress(0.8, desc="π Processing final results...")
|
895 |
|
|
|
920 |
agent_responses = [supervisor_synthesis]
|
921 |
result = extract_data_with_ai(agent_responses, profile_input)
|
922 |
|
923 |
+
progress(1.0, desc=f"β
Analysis complete! Executed {agent_count} agents, {tool_count} tools, {step_count} steps")
|
924 |
|
925 |
return result
|
926 |
|
|
|
964 |
|
965 |
print("\n" + "=" * 60)
|
966 |
|
967 |
+
# Test Case 2: Real Job Change (BuyerAssist -> Bloomberg)
|
968 |
test_case_2 = {
|
969 |
"fn": "Amit",
|
970 |
"ln": "Dugar",
|
|
|
975 |
"icp": "The person has to be in senior position in Engineer Vertical like VP Engineering, CTO, Research Fellow"
|
976 |
}
|
977 |
|
978 |
+
print("π TEST CASE 2 - Real Job Change (BuyerAssist -> Bloomberg)")
|
979 |
+
print(f"Input: {json.dumps(test_case_2, indent=2)}")
|
980 |
+
print("-" * 60)
|
981 |
|
982 |
result2 = analyze_profile(test_case_2)
|
983 |
|
984 |
+
print("\nπ RESULT 2:")
|
985 |
print(json.dumps(result2.model_dump(), indent=2))
|
986 |
|
987 |
return result1, result2
|
|
|
1054 |
# Status box (ultra-compact)
|
1055 |
status_box = gr.Textbox(
|
1056 |
label="π Status",
|
1057 |
+
value="Ready - Click Analyze to start",
|
1058 |
lines=1,
|
1059 |
interactive=False,
|
1060 |
container=False,
|
1061 |
elem_classes=["status-box"]
|
1062 |
)
|
1063 |
|
1064 |
+
# Progress bar for visual feedback
|
1065 |
+
progress_bar = gr.Progress()
|
1066 |
+
|
1067 |
# Output box (compact)
|
1068 |
output = gr.Textbox(
|
1069 |
label="π Analysis Result",
|
|
|
1123 |
analyze_btn.click(
|
1124 |
fn=analyze_profile_ui,
|
1125 |
inputs=[fn, ln, company, location, email, title, icp],
|
1126 |
+
outputs=[output, status_box],
|
1127 |
+
show_progress=True
|
1128 |
)
|
1129 |
|
1130 |
# Launch the demo
|