File size: 30,086 Bytes
c96cc05
0885c85
 
 
9db0a76
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9db0a76
 
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9db0a76
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9db0a76
 
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9db0a76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9db0a76
 
 
 
 
 
 
 
 
 
 
 
0885c85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
from __future__ import annotations
import os
import chainlit as cl
import pandas as pd
from typing import List, Dict, Any, TypedDict, Callable, Annotated, Literal, Optional, Union, Tuple, TypeVar
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langgraph.graph import StateGraph, END
from langchain.tools import Tool
from tavily import TavilyClient
from dotenv import load_dotenv
import json
import asyncio
import time
from functools import wraps
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnableConfig
from langchain_core.runnables.utils import Output
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from dataclasses import dataclass, field
from state import FounderAnalysisState

# Load environment variables
load_dotenv()

# Validate API keys
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please add it to your .env file.")
if not os.getenv("TAVILY_API_KEY"):
    raise ValueError("TAVILY_API_KEY not found in environment variables. Please add it to your .env file.")

# Configuration
COLLECTION_NAME = "founders"
VECTOR_DIM = 1536  # OpenAI embedding dimension
EMBEDDING_MODEL = "text-embedding-3-small"
LLM_MODEL = "gpt-4o-mini"
MAX_RELEVANT_CHUNKS = 3
SIMILARITY_THRESHOLD = 0.75
DEFAULT_TIMEOUT = 60  # Default timeout in seconds
API_RATE_LIMIT_DELAY = 1  # Delay between API calls in seconds

StateType = TypeVar("StateType", bound=Dict[str, Any])

# Decorator for adding timeouts to async functions
def async_timeout(timeout_seconds=DEFAULT_TIMEOUT):
    def decorator(func):
        @wraps(func)
        async def wrapper(*args, **kwargs):
            try:
                return await asyncio.wait_for(func(*args, **kwargs), timeout=timeout_seconds)
            except asyncio.TimeoutError:
                # Create a meaningful timeout message
                func_name = func.__name__
                await cl.Message(content=f"⏱️ Operation timed out: {func_name} took longer than {timeout_seconds} seconds").send()
                # Return appropriate error state if the function was expecting to return a state
                if "state" in kwargs:
                    return {**kwargs["state"], "error": f"Operation timed out after {timeout_seconds} seconds"}
                raise
        return wrapper
    return decorator

# Rate limiter for API calls
async def rate_limit():
    """Simple rate limiter to prevent API throttling"""
    await asyncio.sleep(API_RATE_LIMIT_DELAY)

class VectorStore:
    def __init__(self):
        self.client = QdrantClient(":memory:")  # In-memory Qdrant instance
        self._create_collection()

    def _create_collection(self):
        """Create the founders collection if it doesn't exist."""
        self.client.recreate_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=VectorParams(size=VECTOR_DIM, distance=Distance.COSINE)
        )

    def upsert_profiles(self, embeddings: List[List[float]], metadata: List[Dict[str, Any]]):
        """Upsert founder profiles with their embeddings and metadata."""
        points = [
            PointStruct(
                id=idx,
                vector=embedding,
                payload=metadata[idx]
            )
            for idx, embedding in enumerate(embeddings)
        ]
        self.client.upsert(
            collection_name=COLLECTION_NAME,
            points=points
        )

    def search_profiles(self, query_vector: List[float], limit: int = 5) -> List[Dict[str, Any]]:
        """Search for similar profiles using the query vector."""
        results = self.client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_vector,
            limit=limit
        )
        return [hit.payload for hit in results]

    def get_profile_by_metadata(self, metadata_key: str, metadata_value: Any) -> List[Dict[str, Any]]:
        """Retrieve profiles based on metadata filtering."""
        from qdrant_client.http import models as rest
        
        filter_condition = rest.Filter(
            must=[
                rest.FieldCondition(
                    key=metadata_key,
                    match=rest.MatchValue(value=metadata_value)
                )
            ]
        )
        
        results = self.client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition
        )[0]
        
        return [point.payload for point in results]

class FounderAnalysisSystem:
    def __init__(self):
        self.embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)
        self.vector_store = VectorStore()
        self.llm = ChatOpenAI(model=LLM_MODEL, timeout=DEFAULT_TIMEOUT)
        self.tavily_client = TavilyClient()
        self.workflow = self._create_workflow()
        self.progress_message = None

    def _create_workflow(self) -> StateGraph:
        """Create the LangGraph workflow for founder analysis."""
        # Use a simple dict type for the state graph
        workflow = StateGraph(dict)
        
        # Add nodes to the graph
        workflow.add_node("process_query", self.process_query)
        workflow.add_node("vector_search", self.vector_search)
        workflow.add_node("filter_by_metadata", self.filter_by_metadata)
        workflow.add_node("web_search", self.web_search)
        workflow.add_node("analyze_profiles", self.analyze_profiles)
        workflow.add_node("format_response", self.format_response)
        
        # Add conditional edges
        workflow.add_conditional_edges(
            "process_query",
            self.query_router,
            {
                "search": "vector_search",
                "filter": "filter_by_metadata",
                "error": END
            }
        )
        
        # Add standard edges
        workflow.add_edge("vector_search", "web_search")
        workflow.add_edge("filter_by_metadata", "web_search")
        workflow.add_edge("web_search", "analyze_profiles")
        workflow.add_edge("analyze_profiles", "format_response")
        workflow.add_edge("format_response", END)
        
        # Set entry point
        workflow.set_entry_point("process_query")
        
        return workflow

    async def update_progress(self, message, step, total_steps):
        """Update the progress message to show the system is still working"""
        progress_text = f"⏳ {message} (Step {step}/{total_steps})"
        if self.progress_message is None:
            self.progress_message = cl.Message(content=progress_text)
            await self.progress_message.send()
        else:
            # Fix: Use update() without content parameter, then set content property
            await self.progress_message.update()
            self.progress_message.content = progress_text

    @async_timeout(30)  # 30 second timeout for query processing
    async def process_query(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Process the user query and determine the query type."""
        # Initialize state if needed
        if not isinstance(state, dict):
            state = {}
        
        state.update({
            "query": state.get("query", ""),
            "query_type": "",
            "filter_key": "",
            "filter_value": "",
            "retrieved_profiles": [],
            "web_search_results": [],
            "analysis_results": [],
            "final_response": {},
            "error": ""
        })
        
        query = state["query"]
        
        # Log the processing step
        await self.update_progress("Processing your query...", 1, 5)
        
        # Check if it's a filter command
        if query.lower().startswith("filter:") or query.lower().startswith("filter "):
            # Remove the filter prefix and trim whitespace
            filter_text = query.replace("filter:", "").replace("filter ", "").strip()
            
            # Check if there's a colon separator for key:value format
            if ":" in filter_text:
                parts = filter_text.split(":", 1)
                filter_key, filter_value = parts
                
                # Provide a helpful message if the filter value is empty
                if not filter_value.strip():
                    return {
                        **state,
                        "error": f"Please provide a value to filter by. Example: filter:{filter_key}:value"
                    }
                
                return {
                    **state,
                    "query_type": "filter",
                    "filter_key": filter_key.strip(),
                    "filter_value": filter_value.strip()
                }
            else:
                # If no specific key is provided, search across all fields
                filter_value = filter_text
                
                # Provide a helpful message if the filter value is empty
                if not filter_value.strip():
                    return {
                        **state,
                        "error": "Please provide a value to filter by. Example: filter:Location:San Francisco"
                    }
                
                return {
                    **state,
                    "query_type": "filter",
                    "filter_key": "all_fields",  # Special value to indicate searching across all fields
                    "filter_value": filter_value.strip()
                }
        else:
            return {**state, "query_type": "search"}

    def query_router(self, state: FounderAnalysisState) -> str:
        """Route to the appropriate node based on query type."""
        if "error" in state and state["error"]:
            return "error"
        return state["query_type"]

    @async_timeout(45)  # 45 second timeout for vector search
    async def vector_search(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Search for similar profiles using vector similarity."""
        query = state["query"]
        
        # Log the vector search step
        await self.update_progress("Searching for relevant founder profiles...", 2, 5)
        
        try:
            # Convert query to embedding
            query_embedding = self.embeddings.embed_query(query)
            
            # Search for similar profiles
            profiles = self.vector_store.search_profiles(query_embedding, limit=3)
            
            if not profiles:
                return {
                    **state,
                    "retrieved_profiles": [],
                    "error": "No matching profiles found."
                }
            
            return {**state, "retrieved_profiles": profiles}
        except Exception as e:
            return {**state, "error": f"Error during vector search: {str(e)}"}

    @async_timeout(45)  # 45 second timeout for metadata filtering
    async def filter_by_metadata(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Filter profiles by metadata."""
        filter_key = state["filter_key"]
        filter_value = state["filter_value"]
        
        # Log the filtering step
        if filter_key == "all_fields":
            await self.update_progress(f"Searching for '{filter_value}' across all profile fields...", 2, 5)
        else:
            await self.update_progress(f"Filtering profiles by {filter_key}: '{filter_value}'...", 2, 5)
        
        try:
            # Get all profiles first
            from qdrant_client.http import models as rest
            
            # Get all profiles from the collection
            results = self.vector_store.client.scroll(
                collection_name=COLLECTION_NAME,
                limit=100  # Adjust this limit based on your expected dataset size
            )[0]
            
            all_profiles = [point.payload for point in results]
            search_value = filter_value.lower()
            
            # Perform flexible filtering in Python
            filtered_profiles = []
            
            # Special case for searching across all fields
            if filter_key == "all_fields":
                for profile in all_profiles:
                    # Search across all fields in the profile
                    for key, value in profile.items():
                        if value and search_value in str(value).lower():
                            filtered_profiles.append(profile)
                            break  # Found a match, move to next profile
            else:
                # Regular field-specific search
                for profile in all_profiles:
                    # Check if the key exists in the profile
                    if filter_key in profile:
                        profile_value = str(profile[filter_key]).lower()
                        
                        # Check for partial match (case-insensitive)
                        if search_value in profile_value:
                            filtered_profiles.append(profile)
            
            if not filtered_profiles:
                if filter_key == "all_fields":
                    error_msg = f"No profiles found matching '{filter_value}' in any field"
                else:
                    error_msg = f"No profiles found matching '{filter_value}' in {filter_key} field"
                
                return {
                    **state,
                    "retrieved_profiles": [],
                    "error": error_msg
                }
            
            return {**state, "retrieved_profiles": filtered_profiles[:3]}  # Limit to 3 profiles
        except Exception as e:
            return {**state, "error": f"Error during metadata filtering: {str(e)}"}

    @async_timeout(90)  # 90 second timeout for web search
    async def web_search(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Gather additional information from web search."""
        profiles = state["retrieved_profiles"]
        
        if not profiles:
            return {**state, "web_search_results": []}
        
        await self.update_progress("Gathering additional information from web search...", 3, 5)
        
        web_search_results = []
        
        for i, profile in enumerate(profiles):
            name = profile.get("Full Name", "")
            position = profile.get("Current Position", "")
            company = profile.get("Company", "")
            
            # Update progress for each profile
            await self.update_progress(f"Searching web for info about {name} ({i+1}/{len(profiles)})...", 3, 5)
            
            search_query = f"{name} {position} {company}"
            try:
                results = self.tavily_client.search(
                    query=search_query, 
                    search_depth="advanced"
                ).get("results", [])
                
                web_search_results.append({
                    "profile_name": name,
                    "search_results": results
                })
                
                # Rate limit between API calls
                if i < len(profiles) - 1:
                    await rate_limit()
                
            except Exception as e:
                await cl.Message(content=f"⚠️ Error searching for {name}: {str(e)}").send()
        
        return {**state, "web_search_results": web_search_results}

    @async_timeout(120)  # 2 minute timeout for analysis
    async def analyze_profiles(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Analyze profiles with additional context."""
        profiles = state["retrieved_profiles"]
        web_results = state["web_search_results"]
        
        if not profiles:
            return {**state, "analysis_results": []}
        
        await self.update_progress("Analyzing profiles and generating recommendations...", 4, 5)
        
        analysis_results = []
        
        for i, profile in enumerate(profiles):
            name = profile.get("Full Name", "")
            
            # Find matching web results
            additional_info = []
            for result in web_results:
                if result["profile_name"] == name:
                    additional_info = result["search_results"]
                    break
            
            # Update progress for each profile
            await self.update_progress(f"Analyzing profile for {name} ({i+1}/{len(profiles)})...", 4, 5)
            
            # Extract social media and online presence
            linkedin = profile.get("LinkedIn", "")
            twitter = profile.get("Twitter", "")
            website = profile.get("Website", "")
            
            analysis_prompt = f"""
            Based on the following founder profile and additional information, analyze what types of companies 
            this person would be best suited to found. Consider their experience, skills, background, and online presence.
            
            Profile: {json.dumps(profile, indent=2)}
            Additional Information: {json.dumps(additional_info, indent=2)}
            
            Provide a detailed analysis including:
            1. Recommended industry sectors based on their expertise and background
            2. Type of company (B2B, B2C, etc.) that would align with their experience
            3. Key strengths that would contribute to success as a founder
            4. Potential challenges to consider based on their profile
            5. How their network and online presence could benefit their venture
            6. Specific opportunities or niches they might be well-positioned to address
            
            Be specific and provide actionable insights based on the information available.
            """
            
            try:
                response = self.llm.invoke([HumanMessage(content=analysis_prompt)])
                
                analysis_results.append({
                    "founder_name": name,
                    "analysis": response.content,
                    "profile": profile,
                    "additional_info": additional_info
                })
                
                # Rate limit between API calls
                if i < len(profiles) - 1:
                    await rate_limit()
                
            except Exception as e:
                await cl.Message(content=f"⚠️ Error analyzing {name}: {str(e)}").send()
        
        return {**state, "analysis_results": analysis_results}

    @async_timeout(30)  # 30 second timeout for formatting
    async def format_response(self, state: FounderAnalysisState) -> FounderAnalysisState:
        """Format the final response for display."""
        analysis_results = state["analysis_results"]
        
        await self.update_progress("Formatting final results...", 5, 5)
        
        # Clear the progress message
        self.progress_message = None
        
        if not analysis_results:
            if "error" in state and state["error"]:
                await cl.Message(content=f"❌ {state['error']}").send()
            else:
                await cl.Message(content="❌ No results to display.").send()
            return {**state, "final_response": {"status": "error", "message": state.get("error", "No results")}}
        
        for result in analysis_results:
            founder_name = result["founder_name"]
            profile = result["profile"]
            analysis = result["analysis"]
            
            # Build profile summary with basic information
            profile_summary = f"""
            🎯 Profile Summary:
            
            - Name: {profile.get('Full Name', '')}
            - Current Position: {profile.get('Current Position', '')}
            - Company: {profile.get('Company', '')}
            - Location: {profile.get('Location', '')}
            """
            
            # Add LinkedIn profile with proper URL formatting
            if profile.get('LinkedIn') and profile.get('LinkedIn').strip():
                linkedin_url = profile.get('LinkedIn')
                # Make sure the URL has the proper format
                if not linkedin_url.startswith('http'):
                    linkedin_url = f"https://{linkedin_url}"
                profile_summary += f"- LinkedIn: {linkedin_url}\n"
            
            # Add any other social profiles or websites
            if profile.get('Twitter') and profile.get('Twitter').strip():
                twitter_url = profile.get('Twitter')
                if not twitter_url.startswith('http'):
                    twitter_url = f"https://{twitter_url}"
                profile_summary += f"- Twitter: {twitter_url}\n"
                
            if profile.get('Website') and profile.get('Website').strip():
                website_url = profile.get('Website')
                if not website_url.startswith('http'):
                    website_url = f"https://{website_url}"
                profile_summary += f"- Website: {website_url}\n"
            
            # Format the analysis
            analysis_text = f"""
            πŸ“Š Analysis:
            
            {analysis}
            """
            
            # Create elements for structured display using Text instead of Markdown
            elements = [
                cl.Text(content=profile_summary),
                cl.Text(content=analysis_text)
            ]
            
            await cl.Message(
                content=f"Analysis for {founder_name}:",
                elements=elements
            ).send()
        
        await cl.Message(content="βœ… Analysis complete!").send()
        
        return {**state, "final_response": {"status": "success", "results": analysis_results}}

    @async_timeout(120)  # 2 minute timeout for loading profiles
    async def load_profiles(self, file):
        """Load and embed founder profiles from uploaded CSV."""
        # Read CSV file
        df = pd.read_csv(file)
        
        # Convert DataFrame rows to list of dictionaries
        profiles = df.to_dict('records')
        
        # Create more comprehensive text representations for embedding
        texts = []
        for p in profiles:
            # Build a rich text representation including all available fields
            text_parts = []
            
            # Add core identity information
            if p.get('Full Name'):
                text_parts.append(f"Name: {p.get('Full Name')}")
            
            if p.get('Current Position'):
                text_parts.append(f"Position: {p.get('Current Position')}")
                
            if p.get('Company'):
                text_parts.append(f"Company: {p.get('Company')}")
                
            if p.get('Location'):
                text_parts.append(f"Location: {p.get('Location')}")
            
            # Add contact and social media information
            if p.get('LinkedIn'):
                text_parts.append(f"LinkedIn: {p.get('LinkedIn')}")
                
            if p.get('Twitter'):
                text_parts.append(f"Twitter: {p.get('Twitter')}")
                
            if p.get('Website'):
                text_parts.append(f"Website: {p.get('Website')}")
                
            if p.get('Email'):
                text_parts.append(f"Email: {p.get('Email')}")
            
            # Add detailed professional information
            if p.get('About'):
                text_parts.append(f"About: {p.get('About')}")
                
            if p.get('Skills'):
                text_parts.append(f"Skills: {p.get('Skills')}")
                
            if p.get('Experience'):
                text_parts.append(f"Experience: {p.get('Experience')}")
                
            if p.get('Education'):
                text_parts.append(f"Education: {p.get('Education')}")
            
            # Add any industry or sector information
            if p.get('Industry'):
                text_parts.append(f"Industry: {p.get('Industry')}")
                
            if p.get('Sector'):
                text_parts.append(f"Sector: {p.get('Sector')}")
            
            # Add any entrepreneurial information
            if p.get('Previous Startups'):
                text_parts.append(f"Previous Startups: {p.get('Previous Startups')}")
                
            if p.get('Funding History'):
                text_parts.append(f"Funding History: {p.get('Funding History')}")
            
            # Add any additional fields that might be in the CSV
            for key, value in p.items():
                if (key not in ['Full Name', 'Current Position', 'Company', 'Location', 
                               'LinkedIn', 'Twitter', 'Website', 'Email',
                               'About', 'Skills', 'Experience', 'Education', 
                               'Industry', 'Sector', 'Previous Startups', 'Funding History'] 
                    and value and str(value).lower() != 'nan'):
                    text_parts.append(f"{key}: {value}")
            
            # Join all parts with newlines for better separation
            text = "\n".join(text_parts)
            texts.append(text)
            
            # Log the first few profiles to help with debugging
            if len(texts) <= 3:
                print(f"Profile {len(texts)} text representation:\n{text}\n")
        
        # Generate embeddings
        embeddings = self.embeddings.embed_documents(texts)
        
        # Store in vector database
        self.vector_store.upsert_profiles(embeddings, profiles)
        
        return len(profiles)

    @async_timeout(300)  # 5 minute overall timeout for the entire process
    async def process_message(self, query: str):
        """Process a user message through the workflow."""
        # Reset progress message
        self.progress_message = None
        
        # Initialize the state as a simple dictionary
        state = {
            "query": query,
            "query_type": "",
            "filter_key": "",
            "filter_value": "",
            "retrieved_profiles": [],
            "web_search_results": [],
            "analysis_results": [],
            "final_response": {},
            "error": ""
        }
        
        try:
            # Manually execute the workflow nodes in sequence
            # First process the query
            state = await self.process_query(state)
            
            # Route based on query type
            next_node = self.query_router(state)
            
            if next_node == "error":
                await cl.Message(content=f"❌ {state['error']}").send()
                return
            
            # Execute the appropriate search method
            if next_node == "search":
                state = await self.vector_search(state)
            elif next_node == "filter":
                state = await self.filter_by_metadata(state)
            
            # Check for errors after search
            if state.get("error"):
                await cl.Message(content=f"❌ {state['error']}").send()
                return
            
            # Continue with the rest of the workflow
            state = await self.web_search(state)
            state = await self.analyze_profiles(state)
            state = await self.format_response(state)
            
        except asyncio.TimeoutError:
            await cl.Message(content="❌ The operation timed out. Please try a simpler query or try again later.").send()
        except Exception as e:
            await cl.Message(content=f"❌ Error processing request: {str(e)}").send()

# Initialize the system
system = FounderAnalysisSystem()

@cl.on_chat_start
async def start():
    """Initialize the chat session and prompt for CSV upload."""
    await cl.Message(
        content="πŸ‘‹ Welcome to the Founder Analysis System! Please upload your CSV file with founder profiles."
    ).send()
    
    files = await cl.AskFileMessage(
        content="Please upload your CSV file",
        accept=["text/csv"],
        max_size_mb=10
    ).send()

    if not files:
        await cl.Message(
            content="No file was uploaded. Please try again."
        ).send()
        return

    file = files[0]
    
    # Show loading message
    msg = cl.Message(content=f"⏳ Processing {file.name}...")
    await msg.send()

    try:
        # Load the profiles with timeout
        num_profiles = await asyncio.wait_for(system.load_profiles(file.path), timeout=120)
        
        await cl.Message(
            content=f"βœ… Successfully loaded {num_profiles} founder profiles!\n\n" + 
                    "You can now:\n\n" + 
                    "1. **Search for founders by expertise**:\n" +
                    "   Example: `AI experts in healthcare`\n\n" +
                    "2. **Filter by specific fields**:\n" +
                    "   Example: `filter:Location:San Francisco`\n" +
                    "   Example: `filter:Skills:Machine Learning`\n\n" +
                    "3. **Search across all fields**:\n" +
                    "   Example: `filter:Stanford`\n" +
                    "   Example: `filter blockchain`\n\n" +
                    "4. **Get founder recommendations**:\n" +
                    "   Example: `recommend founders for fintech startup`"
        ).send()
    except asyncio.TimeoutError:
        await cl.Message(content="❌ Loading profiles timed out. The CSV file might be too large or complex.").send()
    except Exception as e:
        await cl.Message(content=f"❌ Error loading profiles: {str(e)}").send()

@cl.on_message
async def main(message: cl.Message):
    """Handle user messages and provide responses."""
    await system.process_message(message.content)