Spaces:

liuyuelintop
/

career_chatbots

Running

App Files Files Community

liuyuelintop commited on Aug 9

Commit

9064719

verified ·

1 Parent(s): fb856e1

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

__pycache__/content.cpython-312.pyc +0 -0
app.py +294 -80
content.py +82 -0
me/career/career.pdf +0 -0
me/career/summary.txt +3 -0

__pycache__/content.cpython-312.pyc ADDED Viewed

Binary file (4.14 kB). View file

app.py CHANGED Viewed

@@ -1,137 +1,351 @@
 from dotenv import load_dotenv
 from openai import OpenAI
 import json
 import os
 import requests
-from pypdf import PdfReader
 import gradio as gr
 load_dotenv(override=True)
-def push(text):
-    requests.post(
-        "https://api.pushover.net/1/messages.json",
-        data={
-            "token": os.getenv("PUSHOVER_TOKEN"),
-            "user": os.getenv("PUSHOVER_USER"),
-            "message": text,
-        }
-    )
 def record_user_details(email, name="Name not provided", notes="not provided"):
-    push(f"Recording {name} with email {email} and notes {notes}")
     return {"recorded": "ok"}
-def record_unknown_question(question):
-    push(f"Recording {question}")
     return {"recorded": "ok"}
 record_user_details_json = {
     "name": "record_user_details",
-    "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
     "parameters": {
         "type": "object",
         "properties": {
-            "email": {
-                "type": "string",
-                "description": "The email address of this user"
-            },
-            "name": {
-                "type": "string",
-                "description": "The user's name, if they provided it"
-            }
-            ,
-            "notes": {
-                "type": "string",
-                "description": "Any additional information about the conversation that's worth recording to give context"
-            }
         },
         "required": ["email"],
         "additionalProperties": False
     }
 }
-record_unknown_question_json = {
-    "name": "record_unknown_question",
-    "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
     "parameters": {
         "type": "object",
         "properties": {
-            "question": {
-                "type": "string",
-                "description": "The question that couldn't be answered"
-            },
         },
         "required": ["question"],
         "additionalProperties": False
     }
 }
-tools = [{"type": "function", "function": record_user_details_json},
-        {"type": "function", "function": record_unknown_question_json}]
-class Me:
     def __init__(self):
         self.openai = OpenAI(
             api_key=os.getenv("GOOGLE_API_KEY"),
             base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
         )
-        self.name = "Yuelin Liu"
-        reader = PdfReader("me/linkedin.pdf")
-        self.linkedin = ""
-        for page in reader.pages:
-            text = page.extract_text()
-            if text:
-                self.linkedin += text
-        with open("me/summary.txt", "r", encoding="utf-8") as f:
-            self.summary = f.read()
     def handle_tool_call(self, tool_calls):
         results = []
         for tool_call in tool_calls:
             tool_name = tool_call.function.name
-            arguments = json.loads(tool_call.function.arguments)
-            print(f"Tool called: {tool_name}", flush=True)
-            tool = globals().get(tool_name)
-            result = tool(**arguments) if tool else {}
-            results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
         return results
-    def system_prompt(self):
-        system_prompt = f"You are acting as {self.name}. You are answering questions on {self.name}'s website, \
-particularly questions related to {self.name}'s career, background, skills and experience. \
-Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible. \
-You are given a summary of {self.name}'s background and LinkedIn profile which you can use to answer questions. \
-Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
-If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
-If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. "
-        system_prompt += f"\n\n## Summary:\n{self.summary}\n\n## LinkedIn Profile:\n{self.linkedin}\n\n"
-        system_prompt += f"With this context, please chat with the user, always staying in character as {self.name}."
-        return system_prompt
     def chat(self, message, history):
-        messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
-        done = False
-        while not done:
-            response = self.openai.chat.completions.create(model="gemini-2.5-flash", messages=messages, tools=tools)
-            if response.choices[0].finish_reason=="tool_calls":
-                message = response.choices[0].message
-                tool_calls = message.tool_calls
-                results = self.handle_tool_call(tool_calls)
-                messages.append(message)
-                messages.extend(results)
-            else:
-                done = True
-        return response.choices[0].message.content
 if __name__ == "__main__":
     me = Me()
     gr.ChatInterface(me.chat, type="messages").launch()

+# app.py
+# Minimal, extensible chatbot with two modes: "career" and "personal".
+# - Safe tools, with Pushover only for career gaps
+# - Simple router: career | personal | contact_exchange | other
+# - Canonical "Why hire you?" pitch (AI/Agents emphasized)
+# - Rate-limited, de-duped notifications
+#
+# Requires a .env with:
+#   GOOGLE_API_KEY=...
+#   PUSHOVER_TOKEN=...
+#   PUSHOVER_USER=...
 from dotenv import load_dotenv
 from openai import OpenAI
 import json
 import os
+import re
+import time
+from collections import deque
 import requests
 import gradio as gr
+from content import ContentStore, Doc
 load_dotenv(override=True)
+# ============================== Pushover utils ===============================
+PUSH_WINDOW_SECONDS = 3600          # rate window (1 hour)
+PUSH_MAX_IN_WINDOW = 5              # max pushes per hour
+PUSH_DEDUPE_SECONDS = 6 * 3600      # suppress identical messages for 6 hours
+_recent_pushes = deque()            # (timestamp, message)
+_last_seen = {}                     # message -> last_ts
+def _should_push(message: str) -> bool:
+    now = time.time()
+    # De-dupe identical messages
+    last = _last_seen.get(message)
+    if last and now - last < PUSH_DEDUPE_SECONDS:
+        return False
+    # Windowed rate limit
+    while _recent_pushes and now - _recent_pushes[0][0] > PUSH_WINDOW_SECONDS:
+        _recent_pushes.popleft()
+    if len(_recent_pushes) >= PUSH_MAX_IN_WINDOW:
+        return False
+    _recent_pushes.append((now, message))
+    _last_seen[message] = now
+    return True
+def push(text: str):
+    if not _should_push(text):
+        return
+    try:
+        requests.post(
+            "https://api.pushover.net/1/messages.json",
+            data={
+                "token": os.getenv("PUSHOVER_TOKEN"),
+                "user": os.getenv("PUSHOVER_USER"),
+                "message": text[:1024],
+            },
+            timeout=10,
+        )
+    except Exception as e:
+        print(f"[WARN] Pushover failed: {e}", flush=True)
+# ============================== Tools (safe) =================================
 def record_user_details(email, name="Name not provided", notes="not provided"):
+    # Contact info is valuable -> notify
+    push(f"Contact: {name} | {email} | {notes}")
     return {"recorded": "ok"}
+def record_resume_gap(question, why_missing="not specified", mode="career"):
+    # Only career gaps notify
+    if mode == "career":
+        push(f"Gap[career]: {question} | reason: {why_missing}")
     return {"recorded": "ok"}
 record_user_details_json = {
     "name": "record_user_details",
+    "description": "Record that a user shared their email to get in touch.",
     "parameters": {
         "type": "object",
         "properties": {
+            "email": {"type": "string", "description": "User email"},
+            "name": {"type": "string", "description": "User name if provided"},
+            "notes": {"type": "string", "description": "Context or notes from chat"}
         },
         "required": ["email"],
         "additionalProperties": False
     }
 }
+record_resume_gap_json = {
+    "name": "record_resume_gap",
+    "description": "Use only when a question in the active mode cannot be answered from the documents.",
     "parameters": {
         "type": "object",
         "properties": {
+            "question": {"type": "string"},
+            "why_missing": {"type": "string"},
+            "mode": {"type": "string", "enum": ["career", "personal"], "default": "career"}
         },
         "required": ["question"],
         "additionalProperties": False
     }
 }
+TOOLS = [{"type": "function", "function": record_user_details_json},
+         {"type": "function", "function": record_resume_gap_json}]
+TOOL_IMPL = {
+    "record_user_details": record_user_details,
+    "record_resume_gap": record_resume_gap,
+}
+# ============================== Canonical answer =============================
+USE_CANONICAL_WHY_HIRE = True
+WHY_HIRE_REGEX = re.compile(
+    r"(why\s+(should|would)\s*(we\s+)?hire\s+you|why\s+hire\s+you|why\s+are\s+you\s+a\s+fit)",
+    re.I
+)
+def canonical_why_hire_pitch() -> str:
+    # Grounded, concise pitch featuring AI/Agents work.
+    return (
+        "I ship reliable product fast—and I’m doubling that impact with AI. Recently I built CodeCraft, "
+        "a real-time online IDE (Next.js 15, TypeScript, Convex, Clerk) deployed on Vercel, and I’ve also "
+        "engineered an agentic career chatbot with tool calling, routing, and safe notifications. On the AI side, "
+        "I work hands-on with LLMs for retrieval and tool use, and I prototype agentic workflows using frameworks "
+        "like LangChain and modern SDKs. I focus on measurable results: tight feedback loops, lean CI/CD, and clean "
+        "interfaces that make teams faster without sacrificing quality. I communicate clearly, break work into "
+        "milestones, and own outcomes end-to-end. If you need someone who can pick up context quickly and turn ideas "
+        "into production software—especially where AI can move the needle—I’ll add value from week one."
+    )
+def maybe_canonical(message: str, mode: str):
+    if USE_CANONICAL_WHY_HIRE and mode == "career" and WHY_HIRE_REGEX.search(message):
+        return canonical_why_hire_pitch()
+    return None
+# ============================== Router schema ===============================
+ROUTER_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "intent": {
+            "type": "string",
+            "enum": ["career", "personal", "contact_exchange", "other"]
+        },
+        "reason": {"type": "string"}
+    },
+    "required": ["intent"]
+}
+# ============================== App core ====================================
+BOUNDARY_REPLY = (
+    "I’m here to talk about my experience, projects, and skills. "
+    "If you have a career-related question, I’m happy to help."
+)
+class Me:
     def __init__(self):
+        self.name = "Yuelin Liu"
         self.openai = OpenAI(
             api_key=os.getenv("GOOGLE_API_KEY"),
             base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
         )
+        # Content store (two modes only)
+        self.content = ContentStore()
+        # Put career.pdf + summary.txt here (and any other work docs)
+        self.content.load_folder("me/career",  "career")
+        # Merge everything else (hobby/life/projects/education) into personal/
+        self.content.load_folder("me/personal","personal")
+        # Optional: quick startup log (comment out if noisy)
+        self._log_loaded_docs()
+    # ---------- Router / moderation ----------
+    def classify(self, message: str):
+        system = (
+            "Classify the user's message. "
+            "Return JSON with field 'intent' ∈ {career, personal, contact_exchange, other}. "
+            "Use 'career' for resume/skills/projects/tech stack/salary expectations; "
+            "use 'personal' for hobbies/life background/interests; "
+            "use 'contact_exchange' when the user shares or asks for an email; "
+            "use 'other' for off-topic/harassment/spam. Return ONLY JSON."
+        )
+        resp = self.openai.chat.completions.create(
+            model="gemini-2.5-flash",
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": message}
+            ],
+            # If response_format isn't supported, fallback below still keeps things safe.
+            response_format={"type": "json_schema", "json_schema": {"name": "router", "schema": ROUTER_SCHEMA}},
+            temperature=0.2,
+            top_p=0.9
+        )
+        try:
+            return json.loads(resp.choices[0].message.content)
+        except Exception:
+            return {"intent": "career", "reason": "fallback"}
+    # ---------- Tool handler ----------
+    def _safe_parse_args(self,raw: str):
+        # Be VERY forgiving with model-produced args
+        try:
+            return json.loads(raw)
+        except Exception:
+            try:
+                # common hiccup: single quotes
+                return json.loads(raw.replace("'", '"'))
+            except Exception:
+                # last resort: ignore args but keep running
+                print(f"[WARN] Unable to parse tool args: {raw}", flush=True)
+                return {}
     def handle_tool_call(self, tool_calls):
         results = []
         for tool_call in tool_calls:
             tool_name = tool_call.function.name
+            raw_args = tool_call.function.arguments or "{}"
+            args = self._safe_parse_args(raw_args)
+            impl = TOOL_IMPL.get(tool_name)
+            if not impl:
+                print(f"[WARN] Unknown tool: {tool_name}", flush=True)
+                results.append({
+                    "role": "tool",
+                    "content": json.dumps({"error": f"unknown tool {tool_name}"}),
+                    "tool_call_id": tool_call.id
+                })
+                continue
+            try:
+                out = impl(**args)
+            except TypeError as e:
+                # model sent unexpected params; try calling with only those the function accepts
+                import inspect
+                sig = inspect.signature(impl)
+                filtered = {k: v for k, v in args.items() if k in sig.parameters}
+                try:
+                    out = impl(**filtered)
+                except Exception as e2:
+                    print(f"[ERROR] Tool '{tool_name}' failed: {e2}", flush=True)
+                    out = {"error": "tool execution failed"}
+            except Exception as e:
+                print(f"[ERROR] Tool '{tool_name}' crashed: {e}", flush=True)
+                out = {"error": "tool execution crashed"}
+            results.append({
+                "role": "tool",
+                "content": json.dumps(out),
+                "tool_call_id": tool_call.id
+            })
         return results
+    # ---------- Prompt assembly ----------
+    def build_context_for_mode(self, mode: str):
+        domain = "career" if mode == "career" else "personal"
+        return self.content.join_domain_text([domain])
+    def system_prompt(self, mode: str):
+        domain_text = self.build_context_for_mode(mode)
+        scope = "career" if mode == "career" else "personal"
+        return f"""You are acting as {self.name}.
+Answer only using {scope} information below. Do not invent personal facts outside these documents.
+Strict tool policy:
+- Use record_resume_gap ONLY for career questions you cannot answer from these documents.
+- Do NOT record or notify for off-topic, harassing, sexual, discriminatory, or spam content.
+- If the user provides contact details or asks to follow up, ask for an email and call record_user_details.
+Be concise and professional. Gently redirect to career topics when appropriate.
+## Documents
+{domain_text}
+"""
+    # ---------- Chat entrypoint ----------
     def chat(self, message, history):
+        try:
+            route = self.classify(message)
+            intent = route.get("intent", "career")
+            if intent == "other":
+                return BOUNDARY_REPLY
+            mode = "career" if intent == "career" else ("career" if intent == "contact_exchange" else "personal")
+            canon = maybe_canonical(message, mode)
+            if canon:
+                return canon
+            messages = [{"role": "system", "content": self.system_prompt(mode)}] \
+                    + history + [{"role": "user", "content": message}]
+            while True:
+                response = self.openai.chat.completions.create(
+                    model="gemini-2.5-flash",
+                    messages=messages,
+                    tools=TOOLS,
+                    temperature=0.2,
+                    top_p=0.9
+                )
+                choice = response.choices[0]
+                if choice.finish_reason == "tool_calls":
+                    results = self.handle_tool_call(choice.message.tool_calls)
+                    messages.append(choice.message)
+                    messages.extend(results)
+                    continue
+                return choice.message.content or "Thanks—I've noted that."
+        except Exception as e:
+            # Fail-closed, keep UI stable
+            print(f"[FATAL] Chat turn failed: {e}", flush=True)
+            return "Oops, something went wrong on my side. Please ask that again—I've reset my context."
+    # ---------- Optional: startup log ----------
+    def _log_loaded_docs(self):
+        # Prints how many docs loaded per domain; helpful for sanity checks.
+        by_domain = self.content.by_domain
+        for domain, docs in by_domain.items():
+            print(f"[LOAD] Domain '{domain}': {len(docs)} document(s)")
+            for d in docs:
+                print(f"       - {d.title}")
+# ============================== Gradio UI ====================================
 if __name__ == "__main__":
     me = Me()
     gr.ChatInterface(me.chat, type="messages").launch()

content.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# content.py
+# Simple content registry for two modes: "career" and "personal".
+# Auto-loads PDF/TXT/MD files from folders and concatenates text for prompts.
+import os
+import glob
+from dataclasses import dataclass
+from typing import List, Dict, Iterable, Optional
+from pypdf import PdfReader
+@dataclass
+class Doc:
+    domain: str          # "career" or "personal"
+    title: str
+    text: str
+    source_path: str
+class ContentStore:
+    def __init__(self):
+        self.docs: List[Doc] = []
+        self.by_domain: Dict[str, List[Doc]] = {}
+    # ---------- Loading ----------
+    def add_doc(self, doc: Doc):
+        self.docs.append(doc)
+        self.by_domain.setdefault(doc.domain, []).append(doc)
+    def load_folder(self, folder: str, domain: str):
+        """
+        Load all files in a folder into a domain.
+        Supported: .pdf, .txt, .md, .markdown
+        """
+        os.makedirs(folder, exist_ok=True)
+        for path in glob.glob(os.path.join(folder, "*")):
+            if os.path.isdir(path):
+                # If you keep subfolders inside personal/, this will skip them.
+                # You can recurse if you like—just call load_folder on subdirs.
+                continue
+            text = self._extract_text(path)
+            if not text:
+                continue
+            title = os.path.basename(path)
+            self.add_doc(Doc(domain=domain, title=title, text=text, source_path=path))
+    def _extract_text(self, path: str) -> str:
+        lower = path.lower()
+        if lower.endswith(".pdf"):
+            out = []
+            try:
+                reader = PdfReader(path)
+                for p in reader.pages:
+                    t = p.extract_text()
+                    if t:
+                        out.append(t)
+            except Exception:
+                return ""
+            return "\n".join(out)
+        if lower.endswith((".txt", ".md", ".markdown")):
+            try:
+                with open(path, "r", encoding="utf-8") as f:
+                    return f.read()
+            except Exception:
+                return ""
+        return ""
+    # ---------- Retrieval ----------
+    def join_domain_text(self, domains: Optional[Iterable[str]]) -> str:
+        """
+        Concatenate documents for the selected domains.
+        If domains is None/empty, defaults to ["career"].
+        """
+        if not domains:
+            domains = ["career"]
+        chunks: List[str] = []
+        for d in domains:
+            for doc in self.by_domain.get(d, []):
+                chunks.append(f"### {doc.title}\n{doc.text}\n")
+        return "\n".join(chunks)

me/career/career.pdf ADDED Viewed

Binary file (86.9 kB). View file

me/career/summary.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ My name is Yuelin Liu. I see myself as a digital craftsperson, a builder who thrives on transforming complex problems into elegant, high-performance software. Based in Melbourne, my work is focused on the entire product lifecycle—from architecting scalable backends to shipping polished user interfaces.
2	+
3	+ I began by mastering the full stack, but my curiosity has pulled me towards the frontier of AI engineering. My focus now is on bridging the gap between the immense power of Large Language Models and the practical, human-centered products they can enable. I'm driven by the challenge of building not just features, but real-world solutions.