drewgenai commited on
Commit
078c1b4
·
1 Parent(s): f0c5aed

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .chainlit/
3
+ .venv/
4
+ .env
5
+ /output/
6
+ /upload/
7
+ *.jsonl
8
+ /models/
9
+ *z*.py
10
+ *z*.md
11
+ *z*.ipynb
12
+ /z*
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Get a distribution that has uv already installed
2
+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
3
+
4
+ # Add user - this is the user that will run the app
5
+ # If you do not set user, the app will run as root (undesirable)
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+
9
+ # Set the home directory and path
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ ENV UVICORN_WS_PROTOCOL=websockets
14
+
15
+
16
+ # Set the working directory
17
+ WORKDIR $HOME/app
18
+
19
+ # Copy the app to the container
20
+ COPY --chown=user . $HOME/app
21
+
22
+ # Install the dependencies
23
+ # RUN uv sync --frozen
24
+ RUN uv sync
25
+
26
+ # Expose the port
27
+ EXPOSE 7860
28
+
29
+ # Run the app
30
+ CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import json
4
+ import pandas as pd
5
+ import chainlit as cl
6
+ from dotenv import load_dotenv
7
+ from langchain_core.documents import Document
8
+ from langchain_community.document_loaders import PyMuPDFLoader
9
+ from langchain_experimental.text_splitter import SemanticChunker
10
+ from langchain_community.vectorstores import Qdrant
11
+ from langchain_huggingface import HuggingFaceEmbeddings
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain_core.prompts import ChatPromptTemplate
15
+ from langchain.tools import tool
16
+ from langchain.schema import HumanMessage
17
+ from typing_extensions import List, TypedDict
18
+ from operator import itemgetter
19
+ from langchain.agents import AgentExecutor, create_openai_tools_agent
20
+ from langchain_core.prompts import MessagesPlaceholder
21
+ from qdrant_client import QdrantClient
22
+ from qdrant_client.models import VectorParams, Distance
23
+
24
+ load_dotenv()
25
+
26
+
27
+ UPLOAD_PATH = "upload/"
28
+ OUTPUT_PATH = "output/"
29
+ INITIAL_DATA_PATH = "./data/Instruments_Definitions.xlsx"
30
+ os.makedirs(UPLOAD_PATH, exist_ok=True)
31
+ os.makedirs(OUTPUT_PATH, exist_ok=True)
32
+
33
+ # Initialize embeddings model
34
+ model_id = "Snowflake/snowflake-arctic-embed-m"
35
+ embedding_model = HuggingFaceEmbeddings(model_name=model_id)
36
+ semantic_splitter = SemanticChunker(embedding_model, add_start_index=True, buffer_size=30)
37
+ llm = ChatOpenAI(model="gpt-4o-mini")
38
+
39
+ # Export comparison prompt
40
+ export_prompt = """
41
+ CONTEXT:
42
+ {context}
43
+
44
+ QUERY:
45
+ {question}
46
+
47
+ You are a helpful assistant. Use the available context to answer the question.
48
+
49
+ Between these two files containing protocols, identify and match **entire assessment sections** based on conceptual similarity. Do NOT match individual questions.
50
+
51
+ ### **Output Format:**
52
+ Return the response in **valid JSON format** structured as a list of dictionaries, where each dictionary contains:
53
+ [
54
+ {{
55
+ "Derived Description": "A short name for the matched concept",
56
+ "Protocol_1": "Protocol 1 - Matching Element",
57
+ "Protocol_2": "Protocol 2 - Matching Element"
58
+ }},
59
+ ...
60
+ ]
61
+ ### **Example Output:**
62
+ [
63
+ {{
64
+ "Derived Description": "Pain Coping Strategies",
65
+ "Protocol_1": "Pain Coping Strategy Scale (PCSS-9)",
66
+ "Protocol_2": "Chronic Pain Adjustment Index (CPAI-10)"
67
+ }},
68
+ {{
69
+ "Derived Description": "Work Stress and Fatigue",
70
+ "Protocol_1": "Work-Related Stress Scale (WRSS-8)",
71
+ "Protocol_2": "Occupational Fatigue Index (OFI-7)"
72
+ }},
73
+ ...
74
+ ]
75
+
76
+ ### Rules:
77
+ 1. Only output **valid JSON** with no explanations, summaries, or markdown formatting.
78
+ 2. Ensure each entry in the JSON list represents a single matched data element from the two protocols.
79
+ 3. If no matching element is found in a protocol, leave it empty ("").
80
+ 4. **Do NOT include headers, explanations, or additional formatting**—only return the raw JSON list.
81
+ 5. It should include all the elements in the two protocols.
82
+ 6. If it cannot match the element, create the row and include the protocol it did find and put "could not match" in the other protocol column.
83
+ 7. protocol should be the between
84
+ """
85
+
86
+ compare_export_prompt = ChatPromptTemplate.from_template(export_prompt)
87
+
88
+ QUERY_PROMPT = """
89
+ You are a helpful assistant. Use the available context to answer the question concisely and informatively.
90
+
91
+ CONTEXT:
92
+ {context}
93
+
94
+ QUERY:
95
+ {question}
96
+
97
+ Provide a natural-language response using the given information. If you do not know the answer, say so.
98
+ """
99
+
100
+ query_prompt = ChatPromptTemplate.from_template(QUERY_PROMPT)
101
+
102
+
103
+ @tool
104
+ def document_query_tool(question: str) -> str:
105
+ """Retrieves relevant document sections and answers questions based on the uploaded documents."""
106
+
107
+ retriever = cl.user_session.get("qdrant_retriever")
108
+ if not retriever:
109
+ return "Error: No documents available for retrieval. Please upload two PDF files first."
110
+ retriever = retriever.with_config({"k": 10})
111
+
112
+ # Use a RAG chain similar to the comparison tool
113
+ rag_chain = (
114
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
115
+ | query_prompt | llm | StrOutputParser()
116
+ )
117
+ response_text = rag_chain.invoke({"question": question})
118
+
119
+ # Get the retrieved docs for context
120
+ retrieved_docs = retriever.invoke(question)
121
+
122
+ return {
123
+ "messages": [HumanMessage(content=response_text)],
124
+ "context": retrieved_docs
125
+ }
126
+
127
+
128
+ @tool
129
+ def document_comparison_tool(question: str) -> str:
130
+ """Compares the two uploaded documents, identifies matched elements, exports them as JSON, formats into CSV, and provides a download link."""
131
+
132
+ # Retrieve the vector database retriever
133
+ retriever = cl.user_session.get("qdrant_retriever")
134
+ if not retriever:
135
+ return "Error: No documents available for retrieval. Please upload two PDF files first."
136
+
137
+ # Process query using RAG
138
+ rag_chain = (
139
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
140
+ | compare_export_prompt | llm | StrOutputParser()
141
+ )
142
+ response_text = rag_chain.invoke({"question": question})
143
+
144
+ # Parse response and save as CSV
145
+ try:
146
+ structured_data = json.loads(response_text)
147
+ if not structured_data:
148
+ return "Error: No matched elements found."
149
+
150
+ # Define output file path
151
+ file_path = os.path.join(OUTPUT_PATH, "comparison_results.csv")
152
+
153
+ # Save to CSV
154
+ df = pd.DataFrame(structured_data, columns=["Derived Description", "Protocol_1", "Protocol_2"])
155
+ df.to_csv(file_path, index=False)
156
+
157
+ # Send the message with the file directly from the tool
158
+ cl.run_sync(
159
+ cl.Message(
160
+ content="Comparison complete! Download the CSV below:",
161
+ elements=[cl.File(name="comparison_results.csv", path=file_path, display="inline")],
162
+ ).send()
163
+ )
164
+
165
+ # Return a simple confirmation message
166
+ return "Comparison results have been generated and displayed."
167
+
168
+ except json.JSONDecodeError:
169
+ return "Error: Response is not valid JSON."
170
+
171
+
172
+ # Define tools for the agent
173
+ tools = [document_query_tool, document_comparison_tool]
174
+
175
+ # Set up the agent with a system prompt
176
+ system_prompt = """You are an intelligent document analysis assistant. You have access to two tools:
177
+
178
+ 1. document_query_tool: Use this when a user wants information or has questions about the content of uploaded documents.
179
+ 2. document_comparison_tool: Use this when a user wants to compare elements between two uploaded documents or export comparison results.
180
+
181
+ Analyze the user's request carefully to determine which tool is most appropriate.
182
+ """
183
+
184
+ # Create the agent using OpenAI function calling
185
+ agent_prompt = ChatPromptTemplate.from_messages([
186
+ ("system", system_prompt),
187
+ MessagesPlaceholder(variable_name="chat_history"),
188
+ ("human", "{input}"),
189
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
190
+ ])
191
+
192
+ agent = create_openai_tools_agent(
193
+ llm=ChatOpenAI(model="gpt-4o", temperature=0),
194
+ tools=tools,
195
+ prompt=agent_prompt
196
+ )
197
+
198
+ # Create the agent executor
199
+ agent_executor = AgentExecutor.from_agent_and_tools(
200
+ agent=agent,
201
+ tools=tools,
202
+ verbose=True,
203
+ handle_parsing_errors=True,
204
+ )
205
+
206
+
207
+ def initialize_vector_store():
208
+ """Initialize an empty Qdrant vector store"""
209
+ try:
210
+ # Create a Qdrant client for in-memory storage
211
+ client = QdrantClient(location=":memory:")
212
+
213
+ # Create the collection with the appropriate vector size
214
+ # Snowflake/snowflake-arctic-embed-m produces 768-dimensional vectors
215
+ vector_size = 768 # Changed from 1536 to match your embedding model
216
+
217
+ # Check if collection exists, if not create it
218
+ collections = client.get_collections().collections
219
+ collection_names = [collection.name for collection in collections]
220
+
221
+ if "document_comparison" not in collection_names:
222
+ client.create_collection(
223
+ collection_name="document_comparison",
224
+ vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
225
+ )
226
+ print("Created new collection: document_comparison")
227
+
228
+ # Create the vector store with the client
229
+ vectorstore = Qdrant(
230
+ client=client,
231
+ collection_name="document_comparison",
232
+ embeddings=embedding_model
233
+ )
234
+ print("Vector store initialized successfully")
235
+ return vectorstore
236
+ except Exception as e:
237
+ print(f"Error initializing vector store: {str(e)}")
238
+ return None
239
+
240
+
241
+ async def load_reference_data(vectorstore):
242
+ """Load reference Excel data into the vector database"""
243
+ if not os.path.exists(INITIAL_DATA_PATH):
244
+ print(f"Warning: Initial data file {INITIAL_DATA_PATH} not found")
245
+ return vectorstore
246
+
247
+ try:
248
+ # Load Excel file
249
+ df = pd.read_excel(INITIAL_DATA_PATH)
250
+
251
+ # Convert DataFrame to documents
252
+ documents = []
253
+ for _, row in df.iterrows():
254
+ # Combine all columns into a single text
255
+ content = " ".join([f"{col}: {str(val)}" for col, val in row.items()])
256
+ doc = Document(page_content=content, metadata={"source": "Instruments_Definitions.xlsx"})
257
+ documents.append(doc)
258
+
259
+ # Add documents to vector store
260
+ if documents:
261
+ vectorstore.add_documents(documents)
262
+ print(f"Successfully loaded {len(documents)} entries from {INITIAL_DATA_PATH}")
263
+
264
+ return vectorstore
265
+ except Exception as e:
266
+ print(f"Error loading reference data: {str(e)}")
267
+ return vectorstore
268
+
269
+
270
+ async def process_uploaded_files(files, vectorstore):
271
+ """Process uploaded PDF files and add them to the vector store"""
272
+ documents_with_metadata = []
273
+ for file in files:
274
+ file_path = os.path.join(UPLOAD_PATH, file.name)
275
+ shutil.copyfile(file.path, file_path)
276
+
277
+ loader = PyMuPDFLoader(file_path)
278
+ documents = loader.load()
279
+
280
+ for doc in documents:
281
+ source_name = file.name
282
+ chunks = semantic_splitter.split_text(doc.page_content)
283
+ for chunk in chunks:
284
+ doc_chunk = Document(page_content=chunk, metadata={"source": source_name})
285
+ documents_with_metadata.append(doc_chunk)
286
+
287
+ if documents_with_metadata:
288
+ # Add documents to vector store
289
+ vectorstore.add_documents(documents_with_metadata)
290
+ print(f"Added {len(documents_with_metadata)} chunks from uploaded files")
291
+ return True
292
+ return False
293
+
294
+
295
+ @cl.on_chat_start
296
+ async def start():
297
+ # Initialize chat history for the agent
298
+ cl.user_session.set("chat_history", [])
299
+
300
+ # Initialize vector store
301
+ vectorstore = initialize_vector_store()
302
+ if not vectorstore:
303
+ await cl.Message("Error: Could not initialize vector store.").send()
304
+ return
305
+
306
+ # Load reference data
307
+ with cl.Step("Loading reference data"):
308
+ vectorstore = await load_reference_data(vectorstore)
309
+ cl.user_session.set("qdrant_vectorstore", vectorstore)
310
+ cl.user_session.set("qdrant_retriever", vectorstore.as_retriever())
311
+ await cl.Message("Reference data loaded successfully!").send()
312
+
313
+ # Ask for PDF uploads
314
+ files = await cl.AskFileMessage(
315
+ content="Please upload **two PDF files** for comparison:",
316
+ accept=["application/pdf"],
317
+ max_files=2
318
+ ).send()
319
+
320
+ if len(files) != 2:
321
+ await cl.Message("Error: You must upload exactly two PDF files.").send()
322
+ return
323
+
324
+ # Process uploaded files
325
+ with cl.Step("Processing uploaded files"):
326
+ success = await process_uploaded_files(files, vectorstore)
327
+ if success:
328
+ # Update the retriever with the latest vector store
329
+ cl.user_session.set("qdrant_retriever", vectorstore.as_retriever())
330
+ await cl.Message("Files uploaded and processed successfully! You can now enter your query.").send()
331
+ else:
332
+ await cl.Message("Error: Unable to process files. Please try again.").send()
333
+
334
+
335
+ @cl.on_message
336
+ async def handle_message(message: cl.Message):
337
+ # Get chat history
338
+ chat_history = cl.user_session.get("chat_history", [])
339
+
340
+ # Run the agent
341
+ with cl.Step("Agent thinking"):
342
+ response = await cl.make_async(agent_executor.invoke)(
343
+ {"input": message.content, "chat_history": chat_history}
344
+ )
345
+
346
+ # Handle the response based on the tool that was called
347
+ if isinstance(response["output"], dict) and "messages" in response["output"]:
348
+ # This is from document_query_tool
349
+ await cl.Message(response["output"]["messages"][0].content).send()
350
+ else:
351
+ # Generic response (including the confirmation from document_comparison_tool)
352
+ await cl.Message(content=str(response["output"])).send()
353
+
354
+ # Update chat history with the new exchange
355
+ chat_history.extend([
356
+ HumanMessage(content=message.content),
357
+ HumanMessage(content=str(response["output"]))
358
+ ])
359
+ cl.user_session.set("chat_history", chat_history)
chainlit.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Welcome to Chat with Your Text File
2
+ With this application, you can compare uploaded protocol files
data/Instruments_Definitions.xlsx ADDED
Binary file (10 kB). View file
 
example_files/Instruments_Definitions.xlsx ADDED
Binary file (10 kB). View file
 
example_files/docx/Protocol_NOAPS v1.0.docx ADDED
Binary file (20.8 kB). View file
 
example_files/docx/Protocol_PKAS v1.0.docx ADDED
Binary file (26.2 kB). View file
 
example_files/docx/Protocol_PPMT v1.0.docx ADDED
Binary file (20.5 kB). View file
 
example_files/pdf/Protocol_NOAPS v1.0.pdf ADDED
Binary file (75 kB). View file
 
example_files/pdf/Protocol_PKAS v1.0.pdf ADDED
Binary file (140 kB). View file
 
example_files/pdf/Protocol_PPMT v1.0.pdf ADDED
Binary file (48 kB). View file
 
pyproject.toml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "protocol-sync"
3
+ version = "0.1.0"
4
+ description = "midterm POC huggingface project"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "IProgress",
9
+ "PyMuPDF",
10
+ "accelerate",
11
+ "chainlit",
12
+ "huggingface_hub",
13
+ "ipykernel",
14
+ "ipywidgets",
15
+ "langchain",
16
+ "langchain-community",
17
+ "langchain-core",
18
+ "langchain-experimental",
19
+ "langchain-huggingface",
20
+ "langchain-openai",
21
+ "langchain-qdrant",
22
+ "langchain-text-splitters",
23
+ "langgraph",
24
+ "langsmith",
25
+ "lxml",
26
+ "openai",
27
+ "pymupdf",
28
+ "pypdf2",
29
+ "qdrant-client",
30
+ "ragas",
31
+ "torch",
32
+ "transformers",
33
+ "tqdm",
34
+ "unstructured",
35
+ "wandb",
36
+ "websockets",
37
+ "openpyxl",
38
+ ]
requirements.txt ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.4.0
2
+ aiofiles==23.2.1
3
+ aiohappyeyeballs==2.4.6
4
+ aiohttp==3.11.13
5
+ aiosignal==1.3.2
6
+ annotated-types==0.7.0
7
+ anyio==4.8.0
8
+ appdirs==1.4.4
9
+ asttokens==3.0.0
10
+ asyncer==0.0.7
11
+ attrs==25.1.0
12
+ backoff==2.2.1
13
+ beautifulsoup4==4.13.3
14
+ bidict==0.23.1
15
+ certifi==2025.1.31
16
+ cffi==1.17.1
17
+ chainlit==2.2.1
18
+ chardet==5.2.0
19
+ charset-normalizer==3.4.1
20
+ chevron==0.14.0
21
+ click==8.1.8
22
+ comm==0.2.2
23
+ cryptography==44.0.1
24
+ dataclasses-json==0.6.7
25
+ datasets==3.3.2
26
+ debugpy==1.8.12
27
+ decorator==5.2.1
28
+ deepdiff==8.2.0
29
+ deprecated==1.2.18
30
+ dill==0.3.8
31
+ diskcache==5.6.3
32
+ distro==1.9.0
33
+ docker-pycreds==0.4.0
34
+ emoji==2.14.1
35
+ executing==2.2.0
36
+ fastapi==0.115.8
37
+ filelock==3.17.0
38
+ filetype==1.2.0
39
+ frozenlist==1.5.0
40
+ fsspec==2024.12.0
41
+ gitdb==4.0.12
42
+ gitpython==3.1.44
43
+ googleapis-common-protos==1.68.0
44
+ greenlet==3.1.1
45
+ grpcio==1.70.0
46
+ grpcio-tools==1.70.0
47
+ h11==0.14.0
48
+ h2==4.2.0
49
+ hpack==4.1.0
50
+ httpcore==1.0.7
51
+ httpx==0.28.1
52
+ httpx-sse==0.4.0
53
+ huggingface-hub==0.29.1
54
+ hyperframe==6.1.0
55
+ idna==3.10
56
+ importlib-metadata==8.5.0
57
+ iprogress==0.4
58
+ ipykernel==6.29.5
59
+ ipython==8.32.0
60
+ ipywidgets==8.1.5
61
+ jedi==0.19.2
62
+ jinja2==3.1.5
63
+ jiter==0.8.2
64
+ joblib==1.4.2
65
+ jsonpatch==1.33
66
+ jsonpath-python==1.0.6
67
+ jsonpointer==3.0.0
68
+ jupyter-client==8.6.3
69
+ jupyter-core==5.7.2
70
+ jupyterlab-widgets==3.0.13
71
+ langchain==0.3.15
72
+ langchain-community==0.3.15
73
+ langchain-core==0.3.31
74
+ langchain-experimental==0.3.4
75
+ langchain-huggingface==0.1.2
76
+ langchain-openai==0.3.1
77
+ langchain-qdrant==0.2.0
78
+ langchain-text-splitters==0.3.5
79
+ langdetect==1.0.9
80
+ langgraph==0.2.74
81
+ langgraph-checkpoint==2.0.16
82
+ langgraph-sdk==0.1.53
83
+ langsmith==0.3.10
84
+ lazify==0.4.0
85
+ literalai==0.1.103
86
+ lxml==5.3.1
87
+ markupsafe==3.0.2
88
+ marshmallow==3.26.1
89
+ matplotlib-inline==0.1.7
90
+ mpmath==1.3.0
91
+ msgpack==1.1.0
92
+ multidict==6.1.0
93
+ multiprocess==0.70.16
94
+ mypy-extensions==1.0.0
95
+ nest-asyncio==1.6.0
96
+ networkx==3.4.2
97
+ nltk==3.9.1
98
+ numpy==2.2.3
99
+ nvidia-cublas-cu12==12.4.5.8
100
+ nvidia-cuda-cupti-cu12==12.4.127
101
+ nvidia-cuda-nvrtc-cu12==12.4.127
102
+ nvidia-cuda-runtime-cu12==12.4.127
103
+ nvidia-cudnn-cu12==9.1.0.70
104
+ nvidia-cufft-cu12==11.2.1.3
105
+ nvidia-curand-cu12==10.3.5.147
106
+ nvidia-cusolver-cu12==11.6.1.9
107
+ nvidia-cusparse-cu12==12.3.1.170
108
+ nvidia-cusparselt-cu12==0.6.2
109
+ nvidia-nccl-cu12==2.21.5
110
+ nvidia-nvjitlink-cu12==12.4.127
111
+ nvidia-nvtx-cu12==12.4.127
112
+ openai==1.64.0
113
+ opentelemetry-api==1.29.0
114
+ opentelemetry-exporter-otlp==1.29.0
115
+ opentelemetry-exporter-otlp-proto-common==1.29.0
116
+ opentelemetry-exporter-otlp-proto-grpc==1.29.0
117
+ opentelemetry-exporter-otlp-proto-http==1.29.0
118
+ opentelemetry-instrumentation==0.50b0
119
+ opentelemetry-proto==1.29.0
120
+ opentelemetry-sdk==1.29.0
121
+ opentelemetry-semantic-conventions==0.50b0
122
+ orderly-set==5.3.0
123
+ orjson==3.10.15
124
+ packaging==24.2
125
+ pandas==2.2.3
126
+ parso==0.8.4
127
+ pexpect==4.9.0
128
+ pillow==11.1.0
129
+ platformdirs==4.3.6
130
+ portalocker==2.10.1
131
+ prompt-toolkit==3.0.50
132
+ propcache==0.3.0
133
+ protobuf==5.29.3
134
+ psutil==7.0.0
135
+ ptyprocess==0.7.0
136
+ pure-eval==0.2.3
137
+ pyarrow==19.0.1
138
+ pycparser==2.22
139
+ pydantic==2.10.6
140
+ pydantic-core==2.27.2
141
+ pydantic-settings==2.8.0
142
+ pygments==2.19.1
143
+ pyjwt==2.10.1
144
+ pymupdf==1.25.3
145
+ pypdf==5.3.0
146
+ pypdf2==3.0.1
147
+ python-dateutil==2.9.0.post0
148
+ python-dotenv==1.0.1
149
+ python-engineio==4.11.2
150
+ python-iso639==2025.2.18
151
+ python-magic==0.4.27
152
+ python-multipart==0.0.18
153
+ python-socketio==5.12.1
154
+ pytz==2025.1
155
+ pyyaml==6.0.2
156
+ pyzmq==26.2.1
157
+ qdrant-client==1.13.2
158
+ ragas==0.2.13
159
+ rapidfuzz==3.12.1
160
+ regex==2024.11.6
161
+ requests==2.32.3
162
+ requests-toolbelt==1.0.0
163
+ safetensors==0.5.2
164
+ scikit-learn==1.6.1
165
+ scipy==1.15.2
166
+ sentence-transformers==3.4.1
167
+ sentry-sdk==2.22.0
168
+ setproctitle==1.3.5
169
+ setuptools==75.8.0
170
+ simple-websocket==1.1.0
171
+ six==1.17.0
172
+ smmap==5.0.2
173
+ sniffio==1.3.1
174
+ soupsieve==2.6
175
+ sqlalchemy==2.0.38
176
+ stack-data==0.6.3
177
+ starlette==0.41.3
178
+ sympy==1.13.1
179
+ syncer==2.0.3
180
+ tabulate==0.9.0
181
+ tenacity==9.0.0
182
+ threadpoolctl==3.5.0
183
+ tiktoken==0.9.0
184
+ tokenizers==0.21.0
185
+ tomli==2.2.1
186
+ torch==2.6.0
187
+ tornado==6.4.2
188
+ tqdm==4.67.1
189
+ traitlets==5.14.3
190
+ transformers==4.49.0
191
+ triton==3.2.0
192
+ typing-extensions==4.12.2
193
+ typing-inspect==0.9.0
194
+ tzdata==2025.1
195
+ unstructured==0.14.8
196
+ unstructured-client==0.25.9
197
+ uptrace==1.29.0
198
+ urllib3==2.3.0
199
+ uvicorn==0.34.0
200
+ wandb==0.19.7
201
+ watchfiles==0.20.0
202
+ wcwidth==0.2.13
203
+ websockets==15.0
204
+ widgetsnbextension==4.0.13
205
+ wrapt==1.17.2
206
+ wsproto==1.2.0
207
+ xxhash==3.5.0
208
+ yarl==1.18.3
209
+ zipp==3.21.0
210
+ zstandard==0.23.0
uv.lock ADDED
The diff for this file is too large to render. See raw diff