liuyuelintop commited on
Commit
9064719
·
verified ·
1 Parent(s): fb856e1

Upload folder using huggingface_hub

Browse files
__pycache__/content.cpython-312.pyc ADDED
Binary file (4.14 kB). View file
 
app.py CHANGED
@@ -1,137 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  from dotenv import load_dotenv
2
  from openai import OpenAI
3
  import json
4
  import os
 
 
 
5
  import requests
6
- from pypdf import PdfReader
7
  import gradio as gr
8
 
 
9
 
10
  load_dotenv(override=True)
11
 
12
- def push(text):
13
- requests.post(
14
- "https://api.pushover.net/1/messages.json",
15
- data={
16
- "token": os.getenv("PUSHOVER_TOKEN"),
17
- "user": os.getenv("PUSHOVER_USER"),
18
- "message": text,
19
- }
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def record_user_details(email, name="Name not provided", notes="not provided"):
24
- push(f"Recording {name} with email {email} and notes {notes}")
 
25
  return {"recorded": "ok"}
26
 
27
- def record_unknown_question(question):
28
- push(f"Recording {question}")
 
 
29
  return {"recorded": "ok"}
30
 
31
  record_user_details_json = {
32
  "name": "record_user_details",
33
- "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
34
  "parameters": {
35
  "type": "object",
36
  "properties": {
37
- "email": {
38
- "type": "string",
39
- "description": "The email address of this user"
40
- },
41
- "name": {
42
- "type": "string",
43
- "description": "The user's name, if they provided it"
44
- }
45
- ,
46
- "notes": {
47
- "type": "string",
48
- "description": "Any additional information about the conversation that's worth recording to give context"
49
- }
50
  },
51
  "required": ["email"],
52
  "additionalProperties": False
53
  }
54
  }
55
 
56
- record_unknown_question_json = {
57
- "name": "record_unknown_question",
58
- "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
59
  "parameters": {
60
  "type": "object",
61
  "properties": {
62
- "question": {
63
- "type": "string",
64
- "description": "The question that couldn't be answered"
65
- },
66
  },
67
  "required": ["question"],
68
  "additionalProperties": False
69
  }
70
  }
71
 
72
- tools = [{"type": "function", "function": record_user_details_json},
73
- {"type": "function", "function": record_unknown_question_json}]
 
 
 
 
 
74
 
75
 
76
- class Me:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
 
 
 
 
 
 
 
 
78
  def __init__(self):
 
79
  self.openai = OpenAI(
80
  api_key=os.getenv("GOOGLE_API_KEY"),
81
  base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
82
  )
83
- self.name = "Yuelin Liu"
84
- reader = PdfReader("me/linkedin.pdf")
85
- self.linkedin = ""
86
- for page in reader.pages:
87
- text = page.extract_text()
88
- if text:
89
- self.linkedin += text
90
- with open("me/summary.txt", "r", encoding="utf-8") as f:
91
- self.summary = f.read()
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  def handle_tool_call(self, tool_calls):
95
  results = []
96
  for tool_call in tool_calls:
97
  tool_name = tool_call.function.name
98
- arguments = json.loads(tool_call.function.arguments)
99
- print(f"Tool called: {tool_name}", flush=True)
100
- tool = globals().get(tool_name)
101
- result = tool(**arguments) if tool else {}
102
- results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  return results
104
-
105
- def system_prompt(self):
106
- system_prompt = f"You are acting as {self.name}. You are answering questions on {self.name}'s website, \
107
- particularly questions related to {self.name}'s career, background, skills and experience. \
108
- Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible. \
109
- You are given a summary of {self.name}'s background and LinkedIn profile which you can use to answer questions. \
110
- Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
111
- If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
112
- If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. "
113
-
114
- system_prompt += f"\n\n## Summary:\n{self.summary}\n\n## LinkedIn Profile:\n{self.linkedin}\n\n"
115
- system_prompt += f"With this context, please chat with the user, always staying in character as {self.name}."
116
- return system_prompt
117
-
 
 
 
 
 
 
 
 
 
 
 
 
118
  def chat(self, message, history):
119
- messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
120
- done = False
121
- while not done:
122
- response = self.openai.chat.completions.create(model="gemini-2.5-flash", messages=messages, tools=tools)
123
- if response.choices[0].finish_reason=="tool_calls":
124
- message = response.choices[0].message
125
- tool_calls = message.tool_calls
126
- results = self.handle_tool_call(tool_calls)
127
- messages.append(message)
128
- messages.extend(results)
129
- else:
130
- done = True
131
- return response.choices[0].message.content
132
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  if __name__ == "__main__":
135
  me = Me()
136
  gr.ChatInterface(me.chat, type="messages").launch()
137
-
 
1
+ # app.py
2
+ # Minimal, extensible chatbot with two modes: "career" and "personal".
3
+ # - Safe tools, with Pushover only for career gaps
4
+ # - Simple router: career | personal | contact_exchange | other
5
+ # - Canonical "Why hire you?" pitch (AI/Agents emphasized)
6
+ # - Rate-limited, de-duped notifications
7
+ #
8
+ # Requires a .env with:
9
+ # GOOGLE_API_KEY=...
10
+ # PUSHOVER_TOKEN=...
11
+ # PUSHOVER_USER=...
12
+
13
  from dotenv import load_dotenv
14
  from openai import OpenAI
15
  import json
16
  import os
17
+ import re
18
+ import time
19
+ from collections import deque
20
  import requests
 
21
  import gradio as gr
22
 
23
+ from content import ContentStore, Doc
24
 
25
  load_dotenv(override=True)
26
 
27
+ # ============================== Pushover utils ===============================
28
+
29
+ PUSH_WINDOW_SECONDS = 3600 # rate window (1 hour)
30
+ PUSH_MAX_IN_WINDOW = 5 # max pushes per hour
31
+ PUSH_DEDUPE_SECONDS = 6 * 3600 # suppress identical messages for 6 hours
32
+
33
+ _recent_pushes = deque() # (timestamp, message)
34
+ _last_seen = {} # message -> last_ts
35
+
36
+
37
+ def _should_push(message: str) -> bool:
38
+ now = time.time()
39
+
40
+ # De-dupe identical messages
41
+ last = _last_seen.get(message)
42
+ if last and now - last < PUSH_DEDUPE_SECONDS:
43
+ return False
44
+
45
+ # Windowed rate limit
46
+ while _recent_pushes and now - _recent_pushes[0][0] > PUSH_WINDOW_SECONDS:
47
+ _recent_pushes.popleft()
48
 
49
+ if len(_recent_pushes) >= PUSH_MAX_IN_WINDOW:
50
+ return False
51
+
52
+ _recent_pushes.append((now, message))
53
+ _last_seen[message] = now
54
+ return True
55
+
56
+
57
+ def push(text: str):
58
+ if not _should_push(text):
59
+ return
60
+ try:
61
+ requests.post(
62
+ "https://api.pushover.net/1/messages.json",
63
+ data={
64
+ "token": os.getenv("PUSHOVER_TOKEN"),
65
+ "user": os.getenv("PUSHOVER_USER"),
66
+ "message": text[:1024],
67
+ },
68
+ timeout=10,
69
+ )
70
+ except Exception as e:
71
+ print(f"[WARN] Pushover failed: {e}", flush=True)
72
+
73
+
74
+ # ============================== Tools (safe) =================================
75
 
76
  def record_user_details(email, name="Name not provided", notes="not provided"):
77
+ # Contact info is valuable -> notify
78
+ push(f"Contact: {name} | {email} | {notes}")
79
  return {"recorded": "ok"}
80
 
81
+ def record_resume_gap(question, why_missing="not specified", mode="career"):
82
+ # Only career gaps notify
83
+ if mode == "career":
84
+ push(f"Gap[career]: {question} | reason: {why_missing}")
85
  return {"recorded": "ok"}
86
 
87
  record_user_details_json = {
88
  "name": "record_user_details",
89
+ "description": "Record that a user shared their email to get in touch.",
90
  "parameters": {
91
  "type": "object",
92
  "properties": {
93
+ "email": {"type": "string", "description": "User email"},
94
+ "name": {"type": "string", "description": "User name if provided"},
95
+ "notes": {"type": "string", "description": "Context or notes from chat"}
 
 
 
 
 
 
 
 
 
 
96
  },
97
  "required": ["email"],
98
  "additionalProperties": False
99
  }
100
  }
101
 
102
+ record_resume_gap_json = {
103
+ "name": "record_resume_gap",
104
+ "description": "Use only when a question in the active mode cannot be answered from the documents.",
105
  "parameters": {
106
  "type": "object",
107
  "properties": {
108
+ "question": {"type": "string"},
109
+ "why_missing": {"type": "string"},
110
+ "mode": {"type": "string", "enum": ["career", "personal"], "default": "career"}
 
111
  },
112
  "required": ["question"],
113
  "additionalProperties": False
114
  }
115
  }
116
 
117
+ TOOLS = [{"type": "function", "function": record_user_details_json},
118
+ {"type": "function", "function": record_resume_gap_json}]
119
+
120
+ TOOL_IMPL = {
121
+ "record_user_details": record_user_details,
122
+ "record_resume_gap": record_resume_gap,
123
+ }
124
 
125
 
126
+ # ============================== Canonical answer =============================
127
+
128
+ USE_CANONICAL_WHY_HIRE = True
129
+
130
+ WHY_HIRE_REGEX = re.compile(
131
+ r"(why\s+(should|would)\s*(we\s+)?hire\s+you|why\s+hire\s+you|why\s+are\s+you\s+a\s+fit)",
132
+ re.I
133
+ )
134
+
135
+ def canonical_why_hire_pitch() -> str:
136
+ # Grounded, concise pitch featuring AI/Agents work.
137
+ return (
138
+ "I ship reliable product fast—and I’m doubling that impact with AI. Recently I built CodeCraft, "
139
+ "a real-time online IDE (Next.js 15, TypeScript, Convex, Clerk) deployed on Vercel, and I’ve also "
140
+ "engineered an agentic career chatbot with tool calling, routing, and safe notifications. On the AI side, "
141
+ "I work hands-on with LLMs for retrieval and tool use, and I prototype agentic workflows using frameworks "
142
+ "like LangChain and modern SDKs. I focus on measurable results: tight feedback loops, lean CI/CD, and clean "
143
+ "interfaces that make teams faster without sacrificing quality. I communicate clearly, break work into "
144
+ "milestones, and own outcomes end-to-end. If you need someone who can pick up context quickly and turn ideas "
145
+ "into production software—especially where AI can move the needle—I’ll add value from week one."
146
+ )
147
+
148
+ def maybe_canonical(message: str, mode: str):
149
+ if USE_CANONICAL_WHY_HIRE and mode == "career" and WHY_HIRE_REGEX.search(message):
150
+ return canonical_why_hire_pitch()
151
+ return None
152
+
153
+
154
+ # ============================== Router schema ===============================
155
+
156
+ ROUTER_SCHEMA = {
157
+ "type": "object",
158
+ "properties": {
159
+ "intent": {
160
+ "type": "string",
161
+ "enum": ["career", "personal", "contact_exchange", "other"]
162
+ },
163
+ "reason": {"type": "string"}
164
+ },
165
+ "required": ["intent"]
166
+ }
167
+
168
 
169
+ # ============================== App core ====================================
170
+
171
+ BOUNDARY_REPLY = (
172
+ "I’m here to talk about my experience, projects, and skills. "
173
+ "If you have a career-related question, I’m happy to help."
174
+ )
175
+
176
+ class Me:
177
  def __init__(self):
178
+ self.name = "Yuelin Liu"
179
  self.openai = OpenAI(
180
  api_key=os.getenv("GOOGLE_API_KEY"),
181
  base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
182
  )
 
 
 
 
 
 
 
 
 
183
 
184
+ # Content store (two modes only)
185
+ self.content = ContentStore()
186
+ # Put career.pdf + summary.txt here (and any other work docs)
187
+ self.content.load_folder("me/career", "career")
188
+ # Merge everything else (hobby/life/projects/education) into personal/
189
+ self.content.load_folder("me/personal","personal")
190
+
191
+ # Optional: quick startup log (comment out if noisy)
192
+ self._log_loaded_docs()
193
+
194
+ # ---------- Router / moderation ----------
195
+
196
+ def classify(self, message: str):
197
+ system = (
198
+ "Classify the user's message. "
199
+ "Return JSON with field 'intent' ∈ {career, personal, contact_exchange, other}. "
200
+ "Use 'career' for resume/skills/projects/tech stack/salary expectations; "
201
+ "use 'personal' for hobbies/life background/interests; "
202
+ "use 'contact_exchange' when the user shares or asks for an email; "
203
+ "use 'other' for off-topic/harassment/spam. Return ONLY JSON."
204
+ )
205
+ resp = self.openai.chat.completions.create(
206
+ model="gemini-2.5-flash",
207
+ messages=[
208
+ {"role": "system", "content": system},
209
+ {"role": "user", "content": message}
210
+ ],
211
+ # If response_format isn't supported, fallback below still keeps things safe.
212
+ response_format={"type": "json_schema", "json_schema": {"name": "router", "schema": ROUTER_SCHEMA}},
213
+ temperature=0.2,
214
+ top_p=0.9
215
+ )
216
+ try:
217
+ return json.loads(resp.choices[0].message.content)
218
+ except Exception:
219
+ return {"intent": "career", "reason": "fallback"}
220
+
221
+ # ---------- Tool handler ----------
222
+
223
+ def _safe_parse_args(self,raw: str):
224
+ # Be VERY forgiving with model-produced args
225
+ try:
226
+ return json.loads(raw)
227
+ except Exception:
228
+ try:
229
+ # common hiccup: single quotes
230
+ return json.loads(raw.replace("'", '"'))
231
+ except Exception:
232
+ # last resort: ignore args but keep running
233
+ print(f"[WARN] Unable to parse tool args: {raw}", flush=True)
234
+ return {}
235
 
236
  def handle_tool_call(self, tool_calls):
237
  results = []
238
  for tool_call in tool_calls:
239
  tool_name = tool_call.function.name
240
+ raw_args = tool_call.function.arguments or "{}"
241
+ args = self._safe_parse_args(raw_args)
242
+
243
+ impl = TOOL_IMPL.get(tool_name)
244
+ if not impl:
245
+ print(f"[WARN] Unknown tool: {tool_name}", flush=True)
246
+ results.append({
247
+ "role": "tool",
248
+ "content": json.dumps({"error": f"unknown tool {tool_name}"}),
249
+ "tool_call_id": tool_call.id
250
+ })
251
+ continue
252
+
253
+ try:
254
+ out = impl(**args)
255
+ except TypeError as e:
256
+ # model sent unexpected params; try calling with only those the function accepts
257
+ import inspect
258
+ sig = inspect.signature(impl)
259
+ filtered = {k: v for k, v in args.items() if k in sig.parameters}
260
+ try:
261
+ out = impl(**filtered)
262
+ except Exception as e2:
263
+ print(f"[ERROR] Tool '{tool_name}' failed: {e2}", flush=True)
264
+ out = {"error": "tool execution failed"}
265
+ except Exception as e:
266
+ print(f"[ERROR] Tool '{tool_name}' crashed: {e}", flush=True)
267
+ out = {"error": "tool execution crashed"}
268
+
269
+ results.append({
270
+ "role": "tool",
271
+ "content": json.dumps(out),
272
+ "tool_call_id": tool_call.id
273
+ })
274
  return results
275
+
276
+ # ---------- Prompt assembly ----------
277
+
278
+ def build_context_for_mode(self, mode: str):
279
+ domain = "career" if mode == "career" else "personal"
280
+ return self.content.join_domain_text([domain])
281
+
282
+ def system_prompt(self, mode: str):
283
+ domain_text = self.build_context_for_mode(mode)
284
+ scope = "career" if mode == "career" else "personal"
285
+ return f"""You are acting as {self.name}.
286
+ Answer only using {scope} information below. Do not invent personal facts outside these documents.
287
+
288
+ Strict tool policy:
289
+ - Use record_resume_gap ONLY for career questions you cannot answer from these documents.
290
+ - Do NOT record or notify for off-topic, harassing, sexual, discriminatory, or spam content.
291
+ - If the user provides contact details or asks to follow up, ask for an email and call record_user_details.
292
+
293
+ Be concise and professional. Gently redirect to career topics when appropriate.
294
+
295
+ ## Documents
296
+ {domain_text}
297
+ """
298
+
299
+ # ---------- Chat entrypoint ----------
300
+
301
  def chat(self, message, history):
302
+ try:
303
+ route = self.classify(message)
304
+ intent = route.get("intent", "career")
305
+ if intent == "other":
306
+ return BOUNDARY_REPLY
307
+ mode = "career" if intent == "career" else ("career" if intent == "contact_exchange" else "personal")
308
+
309
+ canon = maybe_canonical(message, mode)
310
+ if canon:
311
+ return canon
312
+
313
+ messages = [{"role": "system", "content": self.system_prompt(mode)}] \
314
+ + history + [{"role": "user", "content": message}]
315
+
316
+ while True:
317
+ response = self.openai.chat.completions.create(
318
+ model="gemini-2.5-flash",
319
+ messages=messages,
320
+ tools=TOOLS,
321
+ temperature=0.2,
322
+ top_p=0.9
323
+ )
324
+ choice = response.choices[0]
325
+ if choice.finish_reason == "tool_calls":
326
+ results = self.handle_tool_call(choice.message.tool_calls)
327
+ messages.append(choice.message)
328
+ messages.extend(results)
329
+ continue
330
+ return choice.message.content or "Thanks—I've noted that."
331
+ except Exception as e:
332
+ # Fail-closed, keep UI stable
333
+ print(f"[FATAL] Chat turn failed: {e}", flush=True)
334
+ return "Oops, something went wrong on my side. Please ask that again—I've reset my context."
335
+
336
+
337
+ # ---------- Optional: startup log ----------
338
+
339
+ def _log_loaded_docs(self):
340
+ # Prints how many docs loaded per domain; helpful for sanity checks.
341
+ by_domain = self.content.by_domain
342
+ for domain, docs in by_domain.items():
343
+ print(f"[LOAD] Domain '{domain}': {len(docs)} document(s)")
344
+ for d in docs:
345
+ print(f" - {d.title}")
346
+
347
+ # ============================== Gradio UI ====================================
348
 
349
  if __name__ == "__main__":
350
  me = Me()
351
  gr.ChatInterface(me.chat, type="messages").launch()
 
content.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # content.py
2
+ # Simple content registry for two modes: "career" and "personal".
3
+ # Auto-loads PDF/TXT/MD files from folders and concatenates text for prompts.
4
+
5
+ import os
6
+ import glob
7
+ from dataclasses import dataclass
8
+ from typing import List, Dict, Iterable, Optional
9
+ from pypdf import PdfReader
10
+
11
+
12
+ @dataclass
13
+ class Doc:
14
+ domain: str # "career" or "personal"
15
+ title: str
16
+ text: str
17
+ source_path: str
18
+
19
+
20
+ class ContentStore:
21
+ def __init__(self):
22
+ self.docs: List[Doc] = []
23
+ self.by_domain: Dict[str, List[Doc]] = {}
24
+
25
+ # ---------- Loading ----------
26
+
27
+ def add_doc(self, doc: Doc):
28
+ self.docs.append(doc)
29
+ self.by_domain.setdefault(doc.domain, []).append(doc)
30
+
31
+ def load_folder(self, folder: str, domain: str):
32
+ """
33
+ Load all files in a folder into a domain.
34
+ Supported: .pdf, .txt, .md, .markdown
35
+ """
36
+ os.makedirs(folder, exist_ok=True)
37
+ for path in glob.glob(os.path.join(folder, "*")):
38
+ if os.path.isdir(path):
39
+ # If you keep subfolders inside personal/, this will skip them.
40
+ # You can recurse if you like—just call load_folder on subdirs.
41
+ continue
42
+ text = self._extract_text(path)
43
+ if not text:
44
+ continue
45
+ title = os.path.basename(path)
46
+ self.add_doc(Doc(domain=domain, title=title, text=text, source_path=path))
47
+
48
+ def _extract_text(self, path: str) -> str:
49
+ lower = path.lower()
50
+ if lower.endswith(".pdf"):
51
+ out = []
52
+ try:
53
+ reader = PdfReader(path)
54
+ for p in reader.pages:
55
+ t = p.extract_text()
56
+ if t:
57
+ out.append(t)
58
+ except Exception:
59
+ return ""
60
+ return "\n".join(out)
61
+ if lower.endswith((".txt", ".md", ".markdown")):
62
+ try:
63
+ with open(path, "r", encoding="utf-8") as f:
64
+ return f.read()
65
+ except Exception:
66
+ return ""
67
+ return ""
68
+
69
+ # ---------- Retrieval ----------
70
+
71
+ def join_domain_text(self, domains: Optional[Iterable[str]]) -> str:
72
+ """
73
+ Concatenate documents for the selected domains.
74
+ If domains is None/empty, defaults to ["career"].
75
+ """
76
+ if not domains:
77
+ domains = ["career"]
78
+ chunks: List[str] = []
79
+ for d in domains:
80
+ for doc in self.by_domain.get(d, []):
81
+ chunks.append(f"### {doc.title}\n{doc.text}\n")
82
+ return "\n".join(chunks)
me/career/career.pdf ADDED
Binary file (86.9 kB). View file
 
me/career/summary.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ My name is Yuelin Liu. I see myself as a digital craftsperson, a builder who thrives on transforming complex problems into elegant, high-performance software. Based in Melbourne, my work is focused on the entire product lifecycle—from architecting scalable backends to shipping polished user interfaces.
2
+
3
+ I began by mastering the full stack, but my curiosity has pulled me towards the frontier of AI engineering. My focus now is on bridging the gap between the immense power of Large Language Models and the practical, human-centered products they can enable. I'm driven by the challenge of building not just features, but real-world solutions.