Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- __pycache__/content.cpython-312.pyc +0 -0
- app.py +294 -80
- content.py +82 -0
- me/career/career.pdf +0 -0
- me/career/summary.txt +3 -0
__pycache__/content.cpython-312.pyc
ADDED
|
Binary file (4.14 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,137 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
from openai import OpenAI
|
| 3 |
import json
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
| 5 |
import requests
|
| 6 |
-
from pypdf import PdfReader
|
| 7 |
import gradio as gr
|
| 8 |
|
|
|
|
| 9 |
|
| 10 |
load_dotenv(override=True)
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def record_user_details(email, name="Name not provided", notes="not provided"):
|
| 24 |
-
|
|
|
|
| 25 |
return {"recorded": "ok"}
|
| 26 |
|
| 27 |
-
def
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
return {"recorded": "ok"}
|
| 30 |
|
| 31 |
record_user_details_json = {
|
| 32 |
"name": "record_user_details",
|
| 33 |
-
"description": "
|
| 34 |
"parameters": {
|
| 35 |
"type": "object",
|
| 36 |
"properties": {
|
| 37 |
-
"email": {
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
},
|
| 41 |
-
"name": {
|
| 42 |
-
"type": "string",
|
| 43 |
-
"description": "The user's name, if they provided it"
|
| 44 |
-
}
|
| 45 |
-
,
|
| 46 |
-
"notes": {
|
| 47 |
-
"type": "string",
|
| 48 |
-
"description": "Any additional information about the conversation that's worth recording to give context"
|
| 49 |
-
}
|
| 50 |
},
|
| 51 |
"required": ["email"],
|
| 52 |
"additionalProperties": False
|
| 53 |
}
|
| 54 |
}
|
| 55 |
|
| 56 |
-
|
| 57 |
-
"name": "
|
| 58 |
-
"description": "
|
| 59 |
"parameters": {
|
| 60 |
"type": "object",
|
| 61 |
"properties": {
|
| 62 |
-
"question": {
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
},
|
| 66 |
},
|
| 67 |
"required": ["question"],
|
| 68 |
"additionalProperties": False
|
| 69 |
}
|
| 70 |
}
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def __init__(self):
|
|
|
|
| 79 |
self.openai = OpenAI(
|
| 80 |
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 81 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 82 |
)
|
| 83 |
-
self.name = "Yuelin Liu"
|
| 84 |
-
reader = PdfReader("me/linkedin.pdf")
|
| 85 |
-
self.linkedin = ""
|
| 86 |
-
for page in reader.pages:
|
| 87 |
-
text = page.extract_text()
|
| 88 |
-
if text:
|
| 89 |
-
self.linkedin += text
|
| 90 |
-
with open("me/summary.txt", "r", encoding="utf-8") as f:
|
| 91 |
-
self.summary = f.read()
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def handle_tool_call(self, tool_calls):
|
| 95 |
results = []
|
| 96 |
for tool_call in tool_calls:
|
| 97 |
tool_name = tool_call.function.name
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return results
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def chat(self, message, history):
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
if __name__ == "__main__":
|
| 135 |
me = Me()
|
| 136 |
gr.ChatInterface(me.chat, type="messages").launch()
|
| 137 |
-
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# Minimal, extensible chatbot with two modes: "career" and "personal".
|
| 3 |
+
# - Safe tools, with Pushover only for career gaps
|
| 4 |
+
# - Simple router: career | personal | contact_exchange | other
|
| 5 |
+
# - Canonical "Why hire you?" pitch (AI/Agents emphasized)
|
| 6 |
+
# - Rate-limited, de-duped notifications
|
| 7 |
+
#
|
| 8 |
+
# Requires a .env with:
|
| 9 |
+
# GOOGLE_API_KEY=...
|
| 10 |
+
# PUSHOVER_TOKEN=...
|
| 11 |
+
# PUSHOVER_USER=...
|
| 12 |
+
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
from openai import OpenAI
|
| 15 |
import json
|
| 16 |
import os
|
| 17 |
+
import re
|
| 18 |
+
import time
|
| 19 |
+
from collections import deque
|
| 20 |
import requests
|
|
|
|
| 21 |
import gradio as gr
|
| 22 |
|
| 23 |
+
from content import ContentStore, Doc
|
| 24 |
|
| 25 |
load_dotenv(override=True)
|
| 26 |
|
| 27 |
+
# ============================== Pushover utils ===============================
|
| 28 |
+
|
| 29 |
+
PUSH_WINDOW_SECONDS = 3600 # rate window (1 hour)
|
| 30 |
+
PUSH_MAX_IN_WINDOW = 5 # max pushes per hour
|
| 31 |
+
PUSH_DEDUPE_SECONDS = 6 * 3600 # suppress identical messages for 6 hours
|
| 32 |
+
|
| 33 |
+
_recent_pushes = deque() # (timestamp, message)
|
| 34 |
+
_last_seen = {} # message -> last_ts
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _should_push(message: str) -> bool:
|
| 38 |
+
now = time.time()
|
| 39 |
+
|
| 40 |
+
# De-dupe identical messages
|
| 41 |
+
last = _last_seen.get(message)
|
| 42 |
+
if last and now - last < PUSH_DEDUPE_SECONDS:
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
# Windowed rate limit
|
| 46 |
+
while _recent_pushes and now - _recent_pushes[0][0] > PUSH_WINDOW_SECONDS:
|
| 47 |
+
_recent_pushes.popleft()
|
| 48 |
|
| 49 |
+
if len(_recent_pushes) >= PUSH_MAX_IN_WINDOW:
|
| 50 |
+
return False
|
| 51 |
+
|
| 52 |
+
_recent_pushes.append((now, message))
|
| 53 |
+
_last_seen[message] = now
|
| 54 |
+
return True
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def push(text: str):
|
| 58 |
+
if not _should_push(text):
|
| 59 |
+
return
|
| 60 |
+
try:
|
| 61 |
+
requests.post(
|
| 62 |
+
"https://api.pushover.net/1/messages.json",
|
| 63 |
+
data={
|
| 64 |
+
"token": os.getenv("PUSHOVER_TOKEN"),
|
| 65 |
+
"user": os.getenv("PUSHOVER_USER"),
|
| 66 |
+
"message": text[:1024],
|
| 67 |
+
},
|
| 68 |
+
timeout=10,
|
| 69 |
+
)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"[WARN] Pushover failed: {e}", flush=True)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# ============================== Tools (safe) =================================
|
| 75 |
|
| 76 |
def record_user_details(email, name="Name not provided", notes="not provided"):
|
| 77 |
+
# Contact info is valuable -> notify
|
| 78 |
+
push(f"Contact: {name} | {email} | {notes}")
|
| 79 |
return {"recorded": "ok"}
|
| 80 |
|
| 81 |
+
def record_resume_gap(question, why_missing="not specified", mode="career"):
|
| 82 |
+
# Only career gaps notify
|
| 83 |
+
if mode == "career":
|
| 84 |
+
push(f"Gap[career]: {question} | reason: {why_missing}")
|
| 85 |
return {"recorded": "ok"}
|
| 86 |
|
| 87 |
record_user_details_json = {
|
| 88 |
"name": "record_user_details",
|
| 89 |
+
"description": "Record that a user shared their email to get in touch.",
|
| 90 |
"parameters": {
|
| 91 |
"type": "object",
|
| 92 |
"properties": {
|
| 93 |
+
"email": {"type": "string", "description": "User email"},
|
| 94 |
+
"name": {"type": "string", "description": "User name if provided"},
|
| 95 |
+
"notes": {"type": "string", "description": "Context or notes from chat"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
},
|
| 97 |
"required": ["email"],
|
| 98 |
"additionalProperties": False
|
| 99 |
}
|
| 100 |
}
|
| 101 |
|
| 102 |
+
record_resume_gap_json = {
|
| 103 |
+
"name": "record_resume_gap",
|
| 104 |
+
"description": "Use only when a question in the active mode cannot be answered from the documents.",
|
| 105 |
"parameters": {
|
| 106 |
"type": "object",
|
| 107 |
"properties": {
|
| 108 |
+
"question": {"type": "string"},
|
| 109 |
+
"why_missing": {"type": "string"},
|
| 110 |
+
"mode": {"type": "string", "enum": ["career", "personal"], "default": "career"}
|
|
|
|
| 111 |
},
|
| 112 |
"required": ["question"],
|
| 113 |
"additionalProperties": False
|
| 114 |
}
|
| 115 |
}
|
| 116 |
|
| 117 |
+
TOOLS = [{"type": "function", "function": record_user_details_json},
|
| 118 |
+
{"type": "function", "function": record_resume_gap_json}]
|
| 119 |
+
|
| 120 |
+
TOOL_IMPL = {
|
| 121 |
+
"record_user_details": record_user_details,
|
| 122 |
+
"record_resume_gap": record_resume_gap,
|
| 123 |
+
}
|
| 124 |
|
| 125 |
|
| 126 |
+
# ============================== Canonical answer =============================
|
| 127 |
+
|
| 128 |
+
USE_CANONICAL_WHY_HIRE = True
|
| 129 |
+
|
| 130 |
+
WHY_HIRE_REGEX = re.compile(
|
| 131 |
+
r"(why\s+(should|would)\s*(we\s+)?hire\s+you|why\s+hire\s+you|why\s+are\s+you\s+a\s+fit)",
|
| 132 |
+
re.I
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
def canonical_why_hire_pitch() -> str:
|
| 136 |
+
# Grounded, concise pitch featuring AI/Agents work.
|
| 137 |
+
return (
|
| 138 |
+
"I ship reliable product fast—and I’m doubling that impact with AI. Recently I built CodeCraft, "
|
| 139 |
+
"a real-time online IDE (Next.js 15, TypeScript, Convex, Clerk) deployed on Vercel, and I’ve also "
|
| 140 |
+
"engineered an agentic career chatbot with tool calling, routing, and safe notifications. On the AI side, "
|
| 141 |
+
"I work hands-on with LLMs for retrieval and tool use, and I prototype agentic workflows using frameworks "
|
| 142 |
+
"like LangChain and modern SDKs. I focus on measurable results: tight feedback loops, lean CI/CD, and clean "
|
| 143 |
+
"interfaces that make teams faster without sacrificing quality. I communicate clearly, break work into "
|
| 144 |
+
"milestones, and own outcomes end-to-end. If you need someone who can pick up context quickly and turn ideas "
|
| 145 |
+
"into production software—especially where AI can move the needle—I’ll add value from week one."
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
def maybe_canonical(message: str, mode: str):
|
| 149 |
+
if USE_CANONICAL_WHY_HIRE and mode == "career" and WHY_HIRE_REGEX.search(message):
|
| 150 |
+
return canonical_why_hire_pitch()
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ============================== Router schema ===============================
|
| 155 |
+
|
| 156 |
+
ROUTER_SCHEMA = {
|
| 157 |
+
"type": "object",
|
| 158 |
+
"properties": {
|
| 159 |
+
"intent": {
|
| 160 |
+
"type": "string",
|
| 161 |
+
"enum": ["career", "personal", "contact_exchange", "other"]
|
| 162 |
+
},
|
| 163 |
+
"reason": {"type": "string"}
|
| 164 |
+
},
|
| 165 |
+
"required": ["intent"]
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
|
| 169 |
+
# ============================== App core ====================================
|
| 170 |
+
|
| 171 |
+
BOUNDARY_REPLY = (
|
| 172 |
+
"I’m here to talk about my experience, projects, and skills. "
|
| 173 |
+
"If you have a career-related question, I’m happy to help."
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
class Me:
|
| 177 |
def __init__(self):
|
| 178 |
+
self.name = "Yuelin Liu"
|
| 179 |
self.openai = OpenAI(
|
| 180 |
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 181 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 182 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
# Content store (two modes only)
|
| 185 |
+
self.content = ContentStore()
|
| 186 |
+
# Put career.pdf + summary.txt here (and any other work docs)
|
| 187 |
+
self.content.load_folder("me/career", "career")
|
| 188 |
+
# Merge everything else (hobby/life/projects/education) into personal/
|
| 189 |
+
self.content.load_folder("me/personal","personal")
|
| 190 |
+
|
| 191 |
+
# Optional: quick startup log (comment out if noisy)
|
| 192 |
+
self._log_loaded_docs()
|
| 193 |
+
|
| 194 |
+
# ---------- Router / moderation ----------
|
| 195 |
+
|
| 196 |
+
def classify(self, message: str):
|
| 197 |
+
system = (
|
| 198 |
+
"Classify the user's message. "
|
| 199 |
+
"Return JSON with field 'intent' ∈ {career, personal, contact_exchange, other}. "
|
| 200 |
+
"Use 'career' for resume/skills/projects/tech stack/salary expectations; "
|
| 201 |
+
"use 'personal' for hobbies/life background/interests; "
|
| 202 |
+
"use 'contact_exchange' when the user shares or asks for an email; "
|
| 203 |
+
"use 'other' for off-topic/harassment/spam. Return ONLY JSON."
|
| 204 |
+
)
|
| 205 |
+
resp = self.openai.chat.completions.create(
|
| 206 |
+
model="gemini-2.5-flash",
|
| 207 |
+
messages=[
|
| 208 |
+
{"role": "system", "content": system},
|
| 209 |
+
{"role": "user", "content": message}
|
| 210 |
+
],
|
| 211 |
+
# If response_format isn't supported, fallback below still keeps things safe.
|
| 212 |
+
response_format={"type": "json_schema", "json_schema": {"name": "router", "schema": ROUTER_SCHEMA}},
|
| 213 |
+
temperature=0.2,
|
| 214 |
+
top_p=0.9
|
| 215 |
+
)
|
| 216 |
+
try:
|
| 217 |
+
return json.loads(resp.choices[0].message.content)
|
| 218 |
+
except Exception:
|
| 219 |
+
return {"intent": "career", "reason": "fallback"}
|
| 220 |
+
|
| 221 |
+
# ---------- Tool handler ----------
|
| 222 |
+
|
| 223 |
+
def _safe_parse_args(self,raw: str):
|
| 224 |
+
# Be VERY forgiving with model-produced args
|
| 225 |
+
try:
|
| 226 |
+
return json.loads(raw)
|
| 227 |
+
except Exception:
|
| 228 |
+
try:
|
| 229 |
+
# common hiccup: single quotes
|
| 230 |
+
return json.loads(raw.replace("'", '"'))
|
| 231 |
+
except Exception:
|
| 232 |
+
# last resort: ignore args but keep running
|
| 233 |
+
print(f"[WARN] Unable to parse tool args: {raw}", flush=True)
|
| 234 |
+
return {}
|
| 235 |
|
| 236 |
def handle_tool_call(self, tool_calls):
|
| 237 |
results = []
|
| 238 |
for tool_call in tool_calls:
|
| 239 |
tool_name = tool_call.function.name
|
| 240 |
+
raw_args = tool_call.function.arguments or "{}"
|
| 241 |
+
args = self._safe_parse_args(raw_args)
|
| 242 |
+
|
| 243 |
+
impl = TOOL_IMPL.get(tool_name)
|
| 244 |
+
if not impl:
|
| 245 |
+
print(f"[WARN] Unknown tool: {tool_name}", flush=True)
|
| 246 |
+
results.append({
|
| 247 |
+
"role": "tool",
|
| 248 |
+
"content": json.dumps({"error": f"unknown tool {tool_name}"}),
|
| 249 |
+
"tool_call_id": tool_call.id
|
| 250 |
+
})
|
| 251 |
+
continue
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
out = impl(**args)
|
| 255 |
+
except TypeError as e:
|
| 256 |
+
# model sent unexpected params; try calling with only those the function accepts
|
| 257 |
+
import inspect
|
| 258 |
+
sig = inspect.signature(impl)
|
| 259 |
+
filtered = {k: v for k, v in args.items() if k in sig.parameters}
|
| 260 |
+
try:
|
| 261 |
+
out = impl(**filtered)
|
| 262 |
+
except Exception as e2:
|
| 263 |
+
print(f"[ERROR] Tool '{tool_name}' failed: {e2}", flush=True)
|
| 264 |
+
out = {"error": "tool execution failed"}
|
| 265 |
+
except Exception as e:
|
| 266 |
+
print(f"[ERROR] Tool '{tool_name}' crashed: {e}", flush=True)
|
| 267 |
+
out = {"error": "tool execution crashed"}
|
| 268 |
+
|
| 269 |
+
results.append({
|
| 270 |
+
"role": "tool",
|
| 271 |
+
"content": json.dumps(out),
|
| 272 |
+
"tool_call_id": tool_call.id
|
| 273 |
+
})
|
| 274 |
return results
|
| 275 |
+
|
| 276 |
+
# ---------- Prompt assembly ----------
|
| 277 |
+
|
| 278 |
+
def build_context_for_mode(self, mode: str):
|
| 279 |
+
domain = "career" if mode == "career" else "personal"
|
| 280 |
+
return self.content.join_domain_text([domain])
|
| 281 |
+
|
| 282 |
+
def system_prompt(self, mode: str):
|
| 283 |
+
domain_text = self.build_context_for_mode(mode)
|
| 284 |
+
scope = "career" if mode == "career" else "personal"
|
| 285 |
+
return f"""You are acting as {self.name}.
|
| 286 |
+
Answer only using {scope} information below. Do not invent personal facts outside these documents.
|
| 287 |
+
|
| 288 |
+
Strict tool policy:
|
| 289 |
+
- Use record_resume_gap ONLY for career questions you cannot answer from these documents.
|
| 290 |
+
- Do NOT record or notify for off-topic, harassing, sexual, discriminatory, or spam content.
|
| 291 |
+
- If the user provides contact details or asks to follow up, ask for an email and call record_user_details.
|
| 292 |
+
|
| 293 |
+
Be concise and professional. Gently redirect to career topics when appropriate.
|
| 294 |
+
|
| 295 |
+
## Documents
|
| 296 |
+
{domain_text}
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
# ---------- Chat entrypoint ----------
|
| 300 |
+
|
| 301 |
def chat(self, message, history):
|
| 302 |
+
try:
|
| 303 |
+
route = self.classify(message)
|
| 304 |
+
intent = route.get("intent", "career")
|
| 305 |
+
if intent == "other":
|
| 306 |
+
return BOUNDARY_REPLY
|
| 307 |
+
mode = "career" if intent == "career" else ("career" if intent == "contact_exchange" else "personal")
|
| 308 |
+
|
| 309 |
+
canon = maybe_canonical(message, mode)
|
| 310 |
+
if canon:
|
| 311 |
+
return canon
|
| 312 |
+
|
| 313 |
+
messages = [{"role": "system", "content": self.system_prompt(mode)}] \
|
| 314 |
+
+ history + [{"role": "user", "content": message}]
|
| 315 |
+
|
| 316 |
+
while True:
|
| 317 |
+
response = self.openai.chat.completions.create(
|
| 318 |
+
model="gemini-2.5-flash",
|
| 319 |
+
messages=messages,
|
| 320 |
+
tools=TOOLS,
|
| 321 |
+
temperature=0.2,
|
| 322 |
+
top_p=0.9
|
| 323 |
+
)
|
| 324 |
+
choice = response.choices[0]
|
| 325 |
+
if choice.finish_reason == "tool_calls":
|
| 326 |
+
results = self.handle_tool_call(choice.message.tool_calls)
|
| 327 |
+
messages.append(choice.message)
|
| 328 |
+
messages.extend(results)
|
| 329 |
+
continue
|
| 330 |
+
return choice.message.content or "Thanks—I've noted that."
|
| 331 |
+
except Exception as e:
|
| 332 |
+
# Fail-closed, keep UI stable
|
| 333 |
+
print(f"[FATAL] Chat turn failed: {e}", flush=True)
|
| 334 |
+
return "Oops, something went wrong on my side. Please ask that again—I've reset my context."
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
# ---------- Optional: startup log ----------
|
| 338 |
+
|
| 339 |
+
def _log_loaded_docs(self):
|
| 340 |
+
# Prints how many docs loaded per domain; helpful for sanity checks.
|
| 341 |
+
by_domain = self.content.by_domain
|
| 342 |
+
for domain, docs in by_domain.items():
|
| 343 |
+
print(f"[LOAD] Domain '{domain}': {len(docs)} document(s)")
|
| 344 |
+
for d in docs:
|
| 345 |
+
print(f" - {d.title}")
|
| 346 |
+
|
| 347 |
+
# ============================== Gradio UI ====================================
|
| 348 |
|
| 349 |
if __name__ == "__main__":
|
| 350 |
me = Me()
|
| 351 |
gr.ChatInterface(me.chat, type="messages").launch()
|
|
|
content.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# content.py
|
| 2 |
+
# Simple content registry for two modes: "career" and "personal".
|
| 3 |
+
# Auto-loads PDF/TXT/MD files from folders and concatenates text for prompts.
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import glob
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import List, Dict, Iterable, Optional
|
| 9 |
+
from pypdf import PdfReader
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class Doc:
|
| 14 |
+
domain: str # "career" or "personal"
|
| 15 |
+
title: str
|
| 16 |
+
text: str
|
| 17 |
+
source_path: str
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class ContentStore:
|
| 21 |
+
def __init__(self):
|
| 22 |
+
self.docs: List[Doc] = []
|
| 23 |
+
self.by_domain: Dict[str, List[Doc]] = {}
|
| 24 |
+
|
| 25 |
+
# ---------- Loading ----------
|
| 26 |
+
|
| 27 |
+
def add_doc(self, doc: Doc):
|
| 28 |
+
self.docs.append(doc)
|
| 29 |
+
self.by_domain.setdefault(doc.domain, []).append(doc)
|
| 30 |
+
|
| 31 |
+
def load_folder(self, folder: str, domain: str):
|
| 32 |
+
"""
|
| 33 |
+
Load all files in a folder into a domain.
|
| 34 |
+
Supported: .pdf, .txt, .md, .markdown
|
| 35 |
+
"""
|
| 36 |
+
os.makedirs(folder, exist_ok=True)
|
| 37 |
+
for path in glob.glob(os.path.join(folder, "*")):
|
| 38 |
+
if os.path.isdir(path):
|
| 39 |
+
# If you keep subfolders inside personal/, this will skip them.
|
| 40 |
+
# You can recurse if you like—just call load_folder on subdirs.
|
| 41 |
+
continue
|
| 42 |
+
text = self._extract_text(path)
|
| 43 |
+
if not text:
|
| 44 |
+
continue
|
| 45 |
+
title = os.path.basename(path)
|
| 46 |
+
self.add_doc(Doc(domain=domain, title=title, text=text, source_path=path))
|
| 47 |
+
|
| 48 |
+
def _extract_text(self, path: str) -> str:
|
| 49 |
+
lower = path.lower()
|
| 50 |
+
if lower.endswith(".pdf"):
|
| 51 |
+
out = []
|
| 52 |
+
try:
|
| 53 |
+
reader = PdfReader(path)
|
| 54 |
+
for p in reader.pages:
|
| 55 |
+
t = p.extract_text()
|
| 56 |
+
if t:
|
| 57 |
+
out.append(t)
|
| 58 |
+
except Exception:
|
| 59 |
+
return ""
|
| 60 |
+
return "\n".join(out)
|
| 61 |
+
if lower.endswith((".txt", ".md", ".markdown")):
|
| 62 |
+
try:
|
| 63 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 64 |
+
return f.read()
|
| 65 |
+
except Exception:
|
| 66 |
+
return ""
|
| 67 |
+
return ""
|
| 68 |
+
|
| 69 |
+
# ---------- Retrieval ----------
|
| 70 |
+
|
| 71 |
+
def join_domain_text(self, domains: Optional[Iterable[str]]) -> str:
|
| 72 |
+
"""
|
| 73 |
+
Concatenate documents for the selected domains.
|
| 74 |
+
If domains is None/empty, defaults to ["career"].
|
| 75 |
+
"""
|
| 76 |
+
if not domains:
|
| 77 |
+
domains = ["career"]
|
| 78 |
+
chunks: List[str] = []
|
| 79 |
+
for d in domains:
|
| 80 |
+
for doc in self.by_domain.get(d, []):
|
| 81 |
+
chunks.append(f"### {doc.title}\n{doc.text}\n")
|
| 82 |
+
return "\n".join(chunks)
|
me/career/career.pdf
ADDED
|
Binary file (86.9 kB). View file
|
|
|
me/career/summary.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
My name is Yuelin Liu. I see myself as a digital craftsperson, a builder who thrives on transforming complex problems into elegant, high-performance software. Based in Melbourne, my work is focused on the entire product lifecycle—from architecting scalable backends to shipping polished user interfaces.
|
| 2 |
+
|
| 3 |
+
I began by mastering the full stack, but my curiosity has pulled me towards the frontier of AI engineering. My focus now is on bridging the gap between the immense power of Large Language Models and the practical, human-centered products they can enable. I'm driven by the challenge of building not just features, but real-world solutions.
|