Spaces:
Sleeping
Sleeping
File size: 14,403 Bytes
3dd4599 7d8ee8d 1514f66 7d8ee8d 3dd4599 a7ce579 7d8ee8d 3dd4599 d1567c0 3dd4599 01e09d8 6c2acf3 d1567c0 3dd4599 272fd5b 3dd4599 272fd5b 3dd4599 bd59709 3dd4599 d1567c0 5b6170f d2d1ae8 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 7394cfa 5b6170f 3dd4599 01e09d8 1514f66 01e09d8 ac790b7 01e09d8 ac790b7 01e09d8 1514f66 3dd4599 5b6170f 3dd4599 5b6170f 1383c28 7394cfa 6ab32e8 5b6170f 3dd4599 663028e 1a5794c 3dd4599 663028e 3dd4599 6ef46ff 663028e 3dd4599 663028e 3dd4599 663028e 17cd6ef 663028e 3dd4599 ab3a211 3dd4599 ab3a211 b6b4764 627e6af 8a45e12 3dd4599 d2d1ae8 8a45e12 3dd4599 8a45e12 3dd4599 01e09d8 3dd4599 01e09d8 3dd4599 8a45e12 7d8ee8d 3dd4599 86000ea 3dd4599 86000ea f033a76 72d37a9 3dd4599 90da7d1 5048f43 83f72c4 d2d1ae8 3dd4599 d2d1ae8 ab3a211 8a45e12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 |
import os
import re
import json
import time
import requests
import wandb
import torch
import spaces
from tqdm.auto import tqdm
import psutil
import pymupdf
import gradio as gr
from qdrant_client import QdrantClient
from utils import download_pdf_from_gdrive, merge_strings_with_prefix
from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
def rag_query(query: str):
"""
Allows searching the vector database which contains
information for a man named Suvaditya for a given query
by performing semantic search. Returns results by
looking at his resume, which contains a plethora of
information about him.
Args:
query: The query against which the search will be run,
in the form a single string phrase no more than
10 words.
Returns:
search_results: A list of results that come closest
to the given query semantically,
determined by Cosine Similarity.
"""
return client.query(
collection_name="resume",
query_text=query
)
def generate_answer(chat_history):
# Generate result
tool_prompt = tokenizer.apply_chat_template(
chat_history,
tools=[rag_query],
return_tensors="pt",
return_dict=True,
add_generation_prompt=True,
)
tool_prompt = tool_prompt.to(model.device)
out = model.generate(
**tool_prompt,
max_new_tokens=512,
do_sample=True,
top_p=0.95,
num_beams=4
)
generated_text = out[0, tool_prompt['input_ids'].shape[1]:]
generated_text = tokenizer.decode(generated_text)
torch.cuda.empty_cache()
return generated_text
def parse_tool_request(tool_call, top_k=5):
pattern = r"<tool_call>(.*?)</tool_call>"
match_result = re.search(pattern, tool_call, re.DOTALL)
if match_result:
result = match_result.group(1).strip()
else:
return None, None
query = json.loads(result)["arguments"]["query"]
query_results = [
query_piece.metadata["document"] for query_piece in rag_query(query)
]
return query_results[:top_k], query
def update_chat_history(chat_history, tool_query, query_results):
assistant_tool_message = {
"role": "assistant",
"metadata": "🛠️ Using Qdrant Engine to search for the query 🛠️",
"tool_calls": [{
"type": "function",
"function": {
"name": "rag_query",
"arguments": {"query": f"{tool_query}"}
}
}]
}
result_tool_message = {
"role": "tool",
"name": "rag_query",
"content": "\n".join(query_results)
}
chat_history.append(assistant_tool_message)
chat_history.append(result_tool_message)
return chat_history
if __name__ == "__main__":
RESUME_DATA = """
Suvaditya Mukherjee Email: suvadity@usc.edu
Portfolio: suvadityamuk.com Mobile: (213) 827-9733
Github: github.com/suvadityamuk
Education
University of Southern California Master of Science - Computer Science (Artificial Intelligence); GPA: 3.85/4 - Los Angeles, CA, USA
August 2024 - July 2026
Courses: Machine Learning, Deep Learning, Advanced Computer Vision, Analysis of Algorithms
NMIMS Mukesh Patel School of Technology, Management and Engineering
Bachelor of Technology - Computer Science (Artificial Intelligence); GPA: 3.94/4 - Mumbai, India
August 2020 - May 2024
Courses: Deep Learning, Data Structures and Algorithms, Machine Learning, Natural Language Processing, Software Engineering,
Operating Systems, Mathematics, Computer Organization and Architecture, Computer Networks, Database Management Systems
Experience
USC Institute of Creative Technologies Los Angeles, CA, USA
Machine Learning Student Worker - Learning Sciences Lab (Part-time) September 2024 - Present
Course Generation using Generative AI: Leverage Generative AI with LangChain and OpenAI to help make novel
techniques for course generation, tutoring content generation, and OpenTutor courses to learn and teach AI for the
AIRCOEE program in collaboration with the US Department of Defense, under Prof. (Dr.) Benjamin Nye.
Cogeneration Testbed: Maintain technologies for co-generation of tutoring content using open and cloud-based LLMs
to help educators.
USC School of Cinematic Arts Los Angeles, CA, USA
Machine Learning Assistant - Interactive Games Division (Part-time) September 2024 - Present
Student Worker: Assist Prof. (Dr.) Mark Bolas to develop an introductory Python Programming course for Game
Developers.
ML Research: Find new approaches to apply Generative AI based on LLMs and Diffusion Models to solve problems at
large-scale in Creative Media, with solutions such as generating scripts and summaries based on videos.
HARMAN International Bengaluru, India
Machine Learning Intern (Full-time) December 2023 - May 2024
K-Shot Rotation-Invariant Object Detection Pipeline Development: Produced new Intellectual Property
towards achieving a robust pipeline to perform K-shot object detection without dependence on rotation alignment.
Improved pipeline with 35\% better results on client data
Zero-shot Time-Series Forecasting with LLMs: Researched on how to achieve zero-shot time-series forecasting
through LLMs while building on previous developments.
Spot Instance Handler using Agentic LLMs: Built an agent-based LLM system on Gemini 1.5 Pro and LangChain
to help reduce costs by 10\% incurred, by running non-critical workloads on spot-instances
Center for Visual Information Technology, IIIT-Hyderabad Hyderabad, India
Research Intern (Full-time) June 2023 - November 2023
Research: Contributed towards research along Domain Adaptation problems in Autonomous Driving under Prof. C.V.
Jawahar and Prof. Shankar Gangisetty
Code Implementations: Operated with internal tools to execute large-scale GPU training and experimentation on
Image Segmentation problems
UnifyAI (Ivy) London, United Kingdom
ML Research Engineer Intern (Full-time) January 2023 - July 2023
Demos and Examples: Developed new demos, examples, and guides to internal and external official documentation,
most notably around converting torchvision models into TFLite. Also helped in establishing programs and managing the
Google Summer of Code program as an Organization Admin
Internal AI Developer: Prototyped an AI Developer (Code-LLM) to automate and builds upon existing codebases and
speeds up internal development, along with handling self-training through Cloud resources such as GCP and AWS
Publications and Research
Presentation: Pushing the Performance Envelope : An Optimization Study for 3D Generative Modelling with
PyTorch: Work on finding techniques to optimize 3D Text-to-Image Mesh generation [Accepted at PyTorch Conference 2024]
Paper: Guiding the Student\’s Learning Curve: Augmenting Knowledge Distillation with Insights from
GradCAM: Work on investigating the effects of using GradCAM representations of Teacher models as direct inputs to
Student models for quicker convergence. [Accepted]
Paper: Project Lingua Franca: Democratizing Information through Unified Optical Character Recognition
and Neural Machine Translation: Work on combined Optical Character Recognition and Neural Machine Translation for
information translation with high-impact languages as targets [Accepted]
Leadership
Google Developer Expert: Recognized and selected as a top contributor to the Google ML Developer Community. Work
towards creating detailed tutorials, delivering talks around Deep Learning, and helping beta-test new products on GCP Vertex
AI and Gemini suite of tools.
Google Summer of Code: (Org Admin and Mentor) Mentored incoming students for completing tasks, handled
communications with Google Open Source Programs Office for compliance.
"""
# RESUME_PATH = os.path.join(os.getcwd(), "Resume.pdf")
# RESUME_URL = "https://drive.google.com/file/d/1YMF9NNTG5gubwJ7ipI5JfxAJKhlD9h2v/"
# ONNX_MODEL_PATH = "https://huggingface.co/onnx-community/Qwen2.5-1.5B-Instruct/resolve/main/onnx/model.onnx_data"
# SAVE_PATH = "./model.onnx_data"
# print("Downloading ONNX model...")
# response = requests.get(ONNX_MODEL_PATH, stream=True)
# response.raise_for_status()
# total_size = int(response.headers.get('content-length', 0))
# with open(SAVE_PATH, 'wb') as file, tqdm(
# desc=os.path.basename(SAVE_PATH),
# total=total_size,
# unit='iB',
# unit_scale=True
# ) as pbar:
# for data in response.iter_content(chunk_size=8192):
# size = file.write(data)
# pbar.update(size)
# print("Downloaded ONNX model!")
# Download file
# download_pdf_from_gdrive(RESUME_URL, RESUME_PATH)
# doc = pymupdf.open(RESUME_PATH)
# fulltext = doc[0].get_text().split("\n")
# fulltext = merge_strings_with_prefix(fulltext)
fulltext = RESUME_DATA.split("\n\n")
print(fulltext)
# Embed the sentences
# client = QdrantClient(":memory:", optimize_for_ram_usage=True)
client = QdrantClient(":memory:")
client.set_model("sentence-transformers/all-MiniLM-L6-v2")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
if not client.collection_exists(collection_name="resume"):
client.create_collection(
collection_name="resume",
vectors_config=client.get_fastembed_vector_params(),
)
_ = client.add(
collection_name="resume",
documents=fulltext,
ids=range(len(fulltext)),
batch_size=100,
# parallel=0,
)
# wandb.login(
# key=os.getenv("WANDB_API_KEY")
# )
model_name = "Qwen/Qwen2.5-3B-Instruct"
# wandb.init(
# project="resume-rag",
# name="zerogpu-run",
# save_code=True,
# config={
# "model_name": model_name,
# "resume_url": RESUME_URL
# }
# )
# wandb.login(
# key=os.getenv("WANDB_API_KEY")
# )
@spaces.GPU
def rag_process(message, chat_history):
if not chat_history:
system_message = {
"role": "system",
"content": """You are an AI assistant focused on answering questions about Suvaditya's resume.
Only provide information that is explicitly mentioned in the resume data.
If you're unsure about any information, refuse to answer and direct users to suvadityamuk.com.
Be accurate and concise in your responses. """
}
chat_history = [system_message]
# wandb.init(
# project="resume-rag",
# name="zerogpu-run",
# save_code=True,
# config={
# "model_name": model_name,
# "resume_url": RESUME_URL
# }
# )
# Append current user message to chat history
current_message = {
"role": "user",
"content": message
}
chat_history.append(current_message)
# start_time = time.time()
# Generate LLM answer
generated_text = generate_answer(chat_history)
# generated_text = onnx_inference(chat_history, rag_query, tokenizer)
# Detect if tool call is requested by LLM. If yes, then
# execute tool and use else return None
query_results, tool_query = parse_tool_request(generated_text)
# If tool call was requested
if query_results is not None and tool_query is not None:
# Update chat history with result of tool call
chat_history = update_chat_history(
chat_history, tool_query, query_results
)
# Generate result from the
generated_text = generate_answer(chat_history)
# generated_text = onnx_inference(chat_history, rag_query, tokenizer)
# metrics = {
# "conversation": {
# "turn": len(chat_history) // 2,
# "history": chat_history,
# "current_question": message,
# "current_answer": generated_text[:-10],
# "tool_query": tool_query,
# "rag_results": query_results
# },
# "performance": {
# "response_time": time.time() - start_time,
# "gpu_memory_used": torch.cuda.memory_allocated() if torch.cuda.is_available() else 0,
# "cpu_memory": psutil.Process().memory_info().rss,
# # "gpu_utilization": torch.cuda.utilization() if torch.cuda.is_available() else 0
# }
# }
# wandb.log(metrics)
return generated_text[:-10]
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
# quantization_config=QuantoConfig(
# weights="int8",
# )
# quantization_config = BitsAndBytesConfig(
# load_in_8bit=True,
# # bnb_4bit_compute_dtype=torch.float16,
# # bnb_4bit_quant_type="nf4"
# )
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
demo = gr.ChatInterface(
fn=rag_process,
type="messages",
title="Suvaditya's Personal RAG, a space on ZeroGPU!",
examples=["Where did Suvaditya complete his Bachelor's Degree?", "Where is Suvaditya currently working?"],
description="Ask any question about Suvaditya's resume and get an answer! \n\nNote: Sometimes, as always, the LLM may give wrong answers. Here's a link to my [resume](https://suvadityamuk.com/uploads/resume.pdf), if you'd like to go through it yourself! Get in touch with me through [X](https://x.com/halcyonrayes), [Gmail](mailto:suvadityamuk@gmail.com), [LinkedIn](https://www.linkedin.com/in/suvadityamukherjee), or [schedule a meeting with me here](https://cal.com/suvadityamuk)",
theme="John6666/YntecDark",
)
demo.launch()
# wandb.finish() |