Spaces:
Sleeping
Sleeping
replace project2
Browse files- Dockerfile +18 -60
- app.py +108 -273
- code_analysis.py:Zone.Identifier → app.py:Zone.Identifier +0 -0
- code_analysis.py +0 -31
- prompts.py +0 -59
- prompts.py:Zone.Identifier +0 -0
- requirements.txt +0 -106
- requirements.txt:Zone.Identifier +0 -0
- states.py +0 -7
- states.py:Zone.Identifier +0 -0
- tools.py +0 -51
- tools.py:Zone.Identifier +0 -0
Dockerfile
CHANGED
|
@@ -1,73 +1,31 @@
|
|
| 1 |
-
# FROM python:3.10
|
| 2 |
-
# RUN useradd -m -u 1000 user
|
| 3 |
-
# USER root
|
| 4 |
-
# ENV HOME=/home/user \
|
| 5 |
-
# PATH=/home/user/.local/bin:$PATH
|
| 6 |
-
# WORKDIR $HOME/app
|
| 7 |
-
# COPY --chown=user:user . $HOME/app
|
| 8 |
-
# # COPY --chown=user . $HOME/app
|
| 9 |
-
# COPY requirements.txt .
|
| 10 |
-
# # COPY ./requirements.txt ~/app/requirements.txt
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
| 14 |
-
# # RUN pip install -r requirements.
|
| 15 |
-
# # Expose port
|
| 16 |
-
# EXPOSE 7860
|
| 17 |
-
# # RUN pip install pydantic==2.10.1 chainlit
|
| 18 |
-
# # COPY . .
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
#
|
| 22 |
-
# RUN chmod -R 755 /home/user/app
|
| 23 |
-
# RUN chmod -R 755 /home/user/.local
|
| 24 |
-
# RUN mkdir -p /home/user/app/.files && chown -R user:user /home/user/app/.files
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# USER user
|
| 28 |
-
|
| 29 |
-
# CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
| 30 |
-
|
| 31 |
-
# # CMD ["chainlit", "run", "app.py", "--port", "7860"]
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
FROM python:3.10
|
| 35 |
-
|
| 36 |
-
# Create user with specific UID
|
| 37 |
RUN useradd -m -u 1000 user
|
|
|
|
| 38 |
|
| 39 |
-
# Set
|
| 40 |
ENV HOME=/home/user \
|
| 41 |
-
PATH=/home/user/.local/bin:$PATH
|
| 42 |
|
| 43 |
-
|
| 44 |
-
WORKDIR $HOME/app
|
| 45 |
-
|
| 46 |
-
# Copy requirements and install dependencies as root
|
| 47 |
-
COPY requirements.txt .
|
| 48 |
-
RUN pip install --upgrade pip && \
|
| 49 |
-
pip install --no-cache-dir -r requirements.txt
|
| 50 |
|
| 51 |
-
# Copy application files
|
| 52 |
-
COPY . .
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
mkdir -p /home/user/.local && \
|
| 57 |
-
chown -R user:user /home/user
|
| 58 |
-
|
| 59 |
-
RUN pip install pydantic==2.10.1 chainlit
|
| 60 |
|
| 61 |
-
|
|
|
|
| 62 |
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
# Expose port
|
| 65 |
EXPOSE 7860
|
| 66 |
|
| 67 |
-
#
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# Run the application
|
| 71 |
-
CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860", "--no-cache"]
|
| 72 |
-
|
| 73 |
-
# CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
# Get a distribution that has uv already installed
|
| 3 |
+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
# Add user - this is the user that will run the app
|
| 6 |
+
# If you do not set user, the app will run as root (undesirable)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
RUN useradd -m -u 1000 user
|
| 8 |
+
USER user
|
| 9 |
|
| 10 |
+
# Set the home directory and path
|
| 11 |
ENV HOME=/home/user \
|
| 12 |
+
PATH=/home/user/.local/bin:$PATH
|
| 13 |
|
| 14 |
+
ENV UVICORN_WS_PROTOCOL=websockets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# Set the working directory
|
| 18 |
+
WORKDIR $HOME/app
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
# Copy the app to the container
|
| 21 |
+
COPY --chown=user . $HOME/app
|
| 22 |
|
| 23 |
+
# Install the dependencies
|
| 24 |
+
# RUN uv sync --frozen
|
| 25 |
+
RUN uv sync
|
| 26 |
|
| 27 |
+
# Expose the port
|
| 28 |
EXPOSE 7860
|
| 29 |
|
| 30 |
+
# Run the app
|
| 31 |
+
CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,304 +1,139 @@
|
|
| 1 |
import os
|
| 2 |
-
import
|
| 3 |
-
from
|
| 4 |
-
from
|
| 5 |
-
import
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
from
|
| 12 |
-
from
|
| 13 |
-
from typing_extensions import List, TypedDict
|
| 14 |
-
from langchain_core.documents import Document
|
| 15 |
-
from langchain_core.prompts import ChatPromptTemplate
|
| 16 |
-
from langchain.schema.output_parser import StrOutputParser
|
| 17 |
-
from langchain_core.tools import Tool, tool
|
| 18 |
-
from langgraph.prebuilt import ToolNode
|
| 19 |
-
from typing import TypedDict, Annotated
|
| 20 |
-
from langgraph.graph.message import add_messages
|
| 21 |
-
import operator
|
| 22 |
-
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
|
| 23 |
-
from langchain.vectorstores import Qdrant
|
| 24 |
-
from langchain.embeddings import OpenAIEmbeddings
|
| 25 |
-
from langchain.schema import Document
|
| 26 |
-
from qdrant_client import QdrantClient
|
| 27 |
-
from qdrant_client.http.models import Distance, VectorParams
|
| 28 |
-
|
| 29 |
import chainlit as cl
|
| 30 |
-
import tempfile
|
| 31 |
-
import shutil
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
from states import AgentState
|
| 41 |
|
| 42 |
-
|
|
|
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
-
processed_file_path = None
|
| 47 |
-
document_file_path = None
|
| 48 |
-
vectorstore = None
|
| 49 |
-
main_chain = None
|
| 50 |
-
qdrant_client = None
|
| 51 |
|
| 52 |
-
|
| 53 |
-
async def on_chat_start():
|
| 54 |
-
await cl.Message(content="Welcome to the Python Code Documentation Assistant! Please upload a Python file to get started.").send()
|
| 55 |
|
| 56 |
-
|
| 57 |
-
async
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
if message.elements and any(el.type == "file" for el in message.elements):
|
| 61 |
-
file_elements = [el for el in message.elements if el.type == "file"]
|
| 62 |
-
file_element = file_elements[0]
|
| 63 |
-
is_python_file = (
|
| 64 |
-
file_element.mime.startswith("text/x-python") or
|
| 65 |
-
file_element.name.endswith(".py") or
|
| 66 |
-
file_element.mime == "text/plain" # Some systems identify .py as text/plain
|
| 67 |
-
)
|
| 68 |
-
if is_python_file:
|
| 69 |
-
# Send processing message
|
| 70 |
-
msg = cl.Message(content="Processing your Python file...")
|
| 71 |
-
await msg.send()
|
| 72 |
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
| 76 |
-
temp_dir = tempfile.mkdtemp()
|
| 77 |
-
file_path = os.path.join(temp_dir, file_element.name)
|
| 78 |
|
| 79 |
-
with open(file_element.path, "rb") as source_file:
|
| 80 |
-
file_content_bytes = source_file.read()
|
| 81 |
-
with open(file_path, "wb") as destination_file:
|
| 82 |
-
destination_file.write(file_content_bytes)
|
| 83 |
-
|
| 84 |
-
processed_file_path = file_path
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
try:
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
imports = extract_imports(file_content, file_path)
|
| 91 |
-
|
| 92 |
-
print(f'Done reading file')
|
| 93 |
-
|
| 94 |
-
# Define describe packages graph
|
| 95 |
-
search_packages_tools = [search_pypi]
|
| 96 |
-
describe_imports_llm = ChatOpenAI(model="gpt-4o-mini")
|
| 97 |
-
# describe_imports_llm = describe_imports_llm.bind_tools(tools = search_packages_tools, tool_choice="required")
|
| 98 |
-
|
| 99 |
-
describe_imports_prompt = ChatPromptTemplate.from_messages([
|
| 100 |
-
("system", describe_imports),
|
| 101 |
-
("human", "{imports}")
|
| 102 |
-
])
|
| 103 |
-
|
| 104 |
-
describe_imports_chain = (
|
| 105 |
-
{"code_language": itemgetter("code_language"), "imports": itemgetter("imports")}
|
| 106 |
-
| describe_imports_prompt | describe_imports_llm | StrOutputParser()
|
| 107 |
-
)
|
| 108 |
-
|
| 109 |
-
print(f'done defining imports chain')
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
# Define imports chain function
|
| 113 |
-
def call_imports_chain(state):
|
| 114 |
-
last_message= state["messages"][-1]
|
| 115 |
-
content = json.loads(last_message.content)
|
| 116 |
-
chain_input = {"code_language": content['code_language'],
|
| 117 |
-
"imports": content['imports']}
|
| 118 |
-
response = describe_imports_chain.invoke(chain_input)
|
| 119 |
-
return {"messages": [AIMessage(content=response)]}
|
| 120 |
-
|
| 121 |
-
# bind model to tool or ToolNode
|
| 122 |
-
imports_tool_node = ToolNode(search_packages_tools)
|
| 123 |
-
|
| 124 |
-
# construct graph and compile
|
| 125 |
-
uncompiled_imports_graph = StateGraph(AgentState)
|
| 126 |
-
uncompiled_imports_graph.add_node("imports_agent", call_imports_chain)
|
| 127 |
-
uncompiled_imports_graph.add_node("imports_action", imports_tool_node)
|
| 128 |
-
uncompiled_imports_graph.set_entry_point("imports_agent")
|
| 129 |
-
|
| 130 |
-
def should_continue(state):
|
| 131 |
-
last_message = state["messages"][-1]
|
| 132 |
-
|
| 133 |
-
if last_message.tool_calls:
|
| 134 |
-
return "imports_action"
|
| 135 |
-
|
| 136 |
-
return END
|
| 137 |
-
|
| 138 |
-
uncompiled_imports_graph.add_conditional_edges(
|
| 139 |
-
"imports_agent",
|
| 140 |
-
should_continue
|
| 141 |
-
)
|
| 142 |
-
|
| 143 |
-
uncompiled_imports_graph.add_edge("imports_action", "imports_agent")
|
| 144 |
-
|
| 145 |
-
compiled_imports_graph = uncompiled_imports_graph.compile()
|
| 146 |
-
|
| 147 |
-
print(f'compiled imports graph')
|
| 148 |
-
# Invoke imports graph
|
| 149 |
-
initial_state = {
|
| 150 |
-
"messages": [{
|
| 151 |
-
"role": "human",
|
| 152 |
-
"content": json.dumps({
|
| 153 |
-
"code_language": "python",
|
| 154 |
-
"imports": imports
|
| 155 |
-
})
|
| 156 |
-
}]
|
| 157 |
-
}
|
| 158 |
-
|
| 159 |
-
# await msg.update(content="Analyzing imports and generating documentation...")
|
| 160 |
-
msg.content = "Analyzing your code and generating documentation..."
|
| 161 |
-
await msg.update()
|
| 162 |
-
|
| 163 |
-
msg = cl.Message(content="Analyzing your code and generating documentation...")
|
| 164 |
-
await msg.send()
|
| 165 |
-
|
| 166 |
-
result = compiled_imports_graph.invoke(initial_state)
|
| 167 |
-
|
| 168 |
-
# Define qdrant Database
|
| 169 |
-
qdrant_client = QdrantClient(":memory:")
|
| 170 |
-
|
| 171 |
-
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 172 |
-
embedding_dim = 1536
|
| 173 |
-
|
| 174 |
-
qdrant_client.create_collection(
|
| 175 |
-
collection_name="description_rag_data",
|
| 176 |
-
vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
|
| 177 |
-
)
|
| 178 |
-
|
| 179 |
-
vectorstore = Qdrant(qdrant_client, collection_name="description_rag_data", embeddings=embedding_model)
|
| 180 |
-
|
| 181 |
-
# Add packages chunks
|
| 182 |
-
text = result['messages'][-1].content
|
| 183 |
-
chunks = [
|
| 184 |
-
{"type": "Imported Packages", "name": "Imported Packages", "content": text},
|
| 185 |
-
#{"type": "Source Code", "name": "Source Code", "content": file_content},
|
| 186 |
-
|
| 187 |
-
]
|
| 188 |
-
|
| 189 |
-
docs = [
|
| 190 |
-
Document(
|
| 191 |
-
page_content=f"{chunk['type']} - {chunk['name']} - {chunk['content']}", # Content for the model
|
| 192 |
-
metadata={**chunk} # Store metadata, but don't put embeddings here
|
| 193 |
-
)
|
| 194 |
-
for chunk in chunks
|
| 195 |
-
]
|
| 196 |
-
vectorstore.add_documents(docs)
|
| 197 |
-
qdrant_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
| 198 |
-
|
| 199 |
-
print('done adding docs to DB')
|
| 200 |
-
#define documenter chain
|
| 201 |
-
documenter_llm = ChatOpenAI(model="gpt-4o-mini")
|
| 202 |
-
documenter_llm_prompt = ChatPromptTemplate.from_messages([
|
| 203 |
-
("system", documenter_prompt),
|
| 204 |
-
])
|
| 205 |
-
documenter_chain = (
|
| 206 |
-
{"context": itemgetter("context")}
|
| 207 |
-
| documenter_llm_prompt
|
| 208 |
-
| documenter_llm
|
| 209 |
-
| StrOutputParser()
|
| 210 |
-
)
|
| 211 |
-
|
| 212 |
-
print('done defining documenter chain')
|
| 213 |
-
#extract description chunks from database
|
| 214 |
-
collection_name = "description_rag_data"
|
| 215 |
-
all_points = qdrant_client.scroll(collection_name=collection_name, limit=1000)[0] # Adjust limit if needed
|
| 216 |
-
one_chunk = all_points[0].payload
|
| 217 |
-
input_text = f"type: {one_chunk['metadata']['type']} \nname: {one_chunk['metadata']['name']} \ncontent: {one_chunk['metadata']['content']}"
|
| 218 |
-
|
| 219 |
-
print('done extracting chunks form DB')
|
| 220 |
-
|
| 221 |
-
document_response = documenter_chain.invoke({"context": input_text})
|
| 222 |
-
|
| 223 |
-
print('done invoking documenter chain and will write in docx')
|
| 224 |
-
# write packages description in word file
|
| 225 |
-
document_file_path = write_to_docx(document_response)
|
| 226 |
|
| 227 |
-
print('done writing docx file')
|
| 228 |
-
# Set up Main Chain for chat
|
| 229 |
-
main_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
| 230 |
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
|
| 238 |
-
{"context": itemgetter("query") | qdrant_retriever, "code_language": itemgetter("code_language"), "query": itemgetter("query"), }
|
| 239 |
-
| main_llm_prompt
|
| 240 |
-
| main_llm
|
| 241 |
-
| StrOutputParser()
|
| 242 |
-
)
|
| 243 |
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
name="documentation.docx",
|
| 249 |
-
path=document_file_path,
|
| 250 |
-
display="inline"
|
| 251 |
-
)
|
| 252 |
-
]
|
| 253 |
-
print('done defining elements')
|
| 254 |
-
msg.content = "✅ Your Python file has been processed! You can download the documentation file below. How can I help you with your code?"
|
| 255 |
-
msg.elements = elements
|
| 256 |
-
await msg.update()
|
| 257 |
-
|
| 258 |
-
except Exception as e:
|
| 259 |
-
msg.content = f"❌ Error processing file: {str(e)}"
|
| 260 |
-
await msg.update()
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
await cl.Message(content="Please upload a Python (.py) file.").send()
|
| 265 |
|
| 266 |
-
|
| 267 |
-
elif processed_file_path and main_chain:
|
| 268 |
-
user_input = message.content
|
| 269 |
-
# Send thinking message
|
| 270 |
-
msg = cl.Message(content="Thinking...")
|
| 271 |
-
await msg.send()
|
| 272 |
-
|
| 273 |
-
try:
|
| 274 |
-
# Use main_chain to answer the query
|
| 275 |
-
# invoke main chain
|
| 276 |
-
inputs = {
|
| 277 |
-
'code_language': 'Python',
|
| 278 |
-
'query': user_input
|
| 279 |
-
}
|
| 280 |
-
|
| 281 |
-
response = main_chain.invoke(inputs)
|
| 282 |
-
|
| 283 |
-
# Update with the response
|
| 284 |
-
msg.content = response
|
| 285 |
-
await msg.update()
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
|
|
|
| 292 |
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
|
|
|
| 296 |
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
global processed_file_path
|
| 301 |
-
# Clean up temporary files
|
| 302 |
-
if processed_file_path and os.path.exists(os.path.dirname(processed_file_path)):
|
| 303 |
-
shutil.rmtree(os.path.dirname(processed_file_path))
|
| 304 |
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
from typing import List
|
| 3 |
+
from chainlit.types import AskFileResponse
|
| 4 |
+
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
|
| 5 |
+
from aimakerspace.openai_utils.prompts import (
|
| 6 |
+
UserRolePrompt,
|
| 7 |
+
SystemRolePrompt,
|
| 8 |
+
AssistantRolePrompt,
|
| 9 |
+
)
|
| 10 |
+
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
| 11 |
+
from aimakerspace.vectordatabase import VectorDatabase
|
| 12 |
+
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
import chainlit as cl
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
system_template = """\
|
| 16 |
+
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
| 17 |
+
system_role_prompt = SystemRolePrompt(system_template)
|
| 18 |
|
| 19 |
+
user_prompt_template = """\
|
| 20 |
+
Context:
|
| 21 |
+
{context}
|
| 22 |
|
| 23 |
+
Question:
|
| 24 |
+
{question}
|
| 25 |
+
"""
|
| 26 |
+
user_role_prompt = UserRolePrompt(user_prompt_template)
|
| 27 |
|
| 28 |
+
class RetrievalAugmentedQAPipeline:
|
| 29 |
+
def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
|
| 30 |
+
self.llm = llm
|
| 31 |
+
self.vector_db_retriever = vector_db_retriever
|
|
|
|
| 32 |
|
| 33 |
+
async def arun_pipeline(self, user_query: str):
|
| 34 |
+
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
|
| 35 |
|
| 36 |
+
context_prompt = ""
|
| 37 |
+
for context in context_list:
|
| 38 |
+
context_prompt += context[0] + "\n"
|
| 39 |
|
| 40 |
+
formatted_system_prompt = system_role_prompt.create_message()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
async def generate_response():
|
| 45 |
+
async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
|
| 46 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
+
return {"response": generate_response(), "context": context_list}
|
| 49 |
|
| 50 |
+
text_splitter = CharacterTextSplitter()
|
|
|
|
|
|
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
def process_file(file: AskFileResponse):
|
| 54 |
+
import tempfile
|
| 55 |
+
import shutil
|
| 56 |
+
|
| 57 |
+
print(f"Processing file: {file.name}")
|
| 58 |
+
|
| 59 |
+
# Create a temporary file with the correct extension
|
| 60 |
+
suffix = f".{file.name.split('.')[-1]}"
|
| 61 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
| 62 |
+
# Copy the uploaded file content to the temporary file
|
| 63 |
+
shutil.copyfile(file.path, temp_file.name)
|
| 64 |
+
print(f"Created temporary file at: {temp_file.name}")
|
| 65 |
+
|
| 66 |
+
# Create appropriate loader
|
| 67 |
+
if file.name.lower().endswith('.pdf'):
|
| 68 |
+
loader = PDFLoader(temp_file.name)
|
| 69 |
+
else:
|
| 70 |
+
loader = TextFileLoader(temp_file.name)
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
# Load and process the documents
|
| 74 |
+
documents = loader.load_documents()
|
| 75 |
+
texts = text_splitter.split_texts(documents)
|
| 76 |
+
return texts
|
| 77 |
+
finally:
|
| 78 |
+
# Clean up the temporary file
|
| 79 |
try:
|
| 80 |
+
os.unlink(temp_file.name)
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error cleaning up temporary file: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
@cl.on_chat_start
|
| 86 |
+
async def on_chat_start():
|
| 87 |
+
files = None
|
| 88 |
|
| 89 |
+
# Wait for the user to upload a file
|
| 90 |
+
while files == None:
|
| 91 |
+
files = await cl.AskFileMessage(
|
| 92 |
+
content="Please upload a Text or PDF file to begin! test",
|
| 93 |
+
accept=["text/plain", "application/pdf"],
|
| 94 |
+
max_size_mb=2,
|
| 95 |
+
timeout=180,
|
| 96 |
+
).send()
|
| 97 |
|
| 98 |
+
file = files[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
msg = cl.Message(
|
| 101 |
+
content=f"Processing `{file.name}`..."
|
| 102 |
+
)
|
| 103 |
+
await msg.send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
# load the file
|
| 106 |
+
texts = process_file(file)
|
|
|
|
| 107 |
|
| 108 |
+
print(f"Processing {len(texts)} text chunks")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
# Create a dict vector store
|
| 111 |
+
vector_db = VectorDatabase()
|
| 112 |
+
vector_db = await vector_db.abuild_from_list(texts)
|
| 113 |
+
|
| 114 |
+
chat_openai = ChatOpenAI()
|
| 115 |
|
| 116 |
+
# Create a chain
|
| 117 |
+
retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
|
| 118 |
+
vector_db_retriever=vector_db,
|
| 119 |
+
llm=chat_openai
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Let the user know that the system is ready
|
| 123 |
+
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
| 124 |
+
await msg.update()
|
| 125 |
|
| 126 |
+
cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
|
| 127 |
|
| 128 |
|
| 129 |
+
@cl.on_message
|
| 130 |
+
async def main(message):
|
| 131 |
+
chain = cl.user_session.get("chain")
|
| 132 |
|
| 133 |
+
msg = cl.Message(content="")
|
| 134 |
+
result = await chain.arun_pipeline(message.content)
|
| 135 |
|
| 136 |
+
async for stream_resp in result["response"]:
|
| 137 |
+
await msg.stream_token(stream_resp)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
+
await msg.send()
|
code_analysis.py:Zone.Identifier → app.py:Zone.Identifier
RENAMED
|
File without changes
|
code_analysis.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
def read_python_file(file_path):
|
| 3 |
-
try:
|
| 4 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
| 5 |
-
content = f.read()
|
| 6 |
-
return content
|
| 7 |
-
except FileNotFoundError:
|
| 8 |
-
print(f"File not found: {file_path}")
|
| 9 |
-
raise
|
| 10 |
-
except IOError as e:
|
| 11 |
-
print(f"Error reading file {file_path}: {str(e)}")
|
| 12 |
-
raise
|
| 13 |
-
except Exception as e:
|
| 14 |
-
print(f"Unexpected error reading file {file_path}: {str(e)}")
|
| 15 |
-
raise
|
| 16 |
-
|
| 17 |
-
def extract_imports(code, file_path):
|
| 18 |
-
try:
|
| 19 |
-
|
| 20 |
-
# Split into lines and find imports
|
| 21 |
-
import_lines = []
|
| 22 |
-
for line in code.split('\n'):
|
| 23 |
-
line = line.strip()
|
| 24 |
-
if line.startswith('import ') or line.startswith('from '):
|
| 25 |
-
import_lines.append(line)
|
| 26 |
-
|
| 27 |
-
return import_lines
|
| 28 |
-
|
| 29 |
-
except Exception as e:
|
| 30 |
-
print(f"Error extracting imports from file {file_path}: {str(e)}")
|
| 31 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
describe_imports = """You are an expert {code_language} developer.
|
| 2 |
-
Your will be given code lines that import packages.
|
| 3 |
-
Your role is to give a brief description of each package
|
| 4 |
-
|
| 5 |
-
You have access to the following tool and you MUST use it:
|
| 6 |
-
search_pypi: Use this to get information about Python packages from PyPI.
|
| 7 |
-
|
| 8 |
-
For each import:
|
| 9 |
-
1. Extract the main package name
|
| 10 |
-
2. Use the search_pypi tool to get package information by calling "search_pypi(package_name)"
|
| 11 |
-
3. Combine the information into a clear description
|
| 12 |
-
4. If the retuned value of tool is empty use your own knowledge
|
| 13 |
-
5. If you have no knowledge for this package then it's description should be "I don't know details about this package"
|
| 14 |
-
|
| 15 |
-
You must respond in the following JSON format:
|
| 16 |
-
{{"Imported_Packages": [
|
| 17 |
-
{{"name": "package1", "desc": "brief description of package1"}},
|
| 18 |
-
{{"name": "package2", "desc": "brief description of package2"}}
|
| 19 |
-
]}}
|
| 20 |
-
|
| 21 |
-
Rules for the output:
|
| 22 |
-
1. Use valid JSON format
|
| 23 |
-
2. Package names should be the exact names from the imports
|
| 24 |
-
3. Descriptions should be brief and clear
|
| 25 |
-
4. Do not include any text outside the JSON structure
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
documenter_prompt = """You are an expert code documenter.
|
| 29 |
-
Your role is to write a well structured document that describes code functionality.
|
| 30 |
-
|
| 31 |
-
From the given context:
|
| 32 |
-
1- type: is the type of the code block (funciton, class, ..)
|
| 33 |
-
2- name: is the name of the code block
|
| 34 |
-
3- content: is the description of the code block
|
| 35 |
-
|
| 36 |
-
Instructions:
|
| 37 |
-
Write a docx document with the following structure Heading 1(type) -> Heading 2(name) -> content
|
| 38 |
-
|
| 39 |
-
Rules for the output:
|
| 40 |
-
1. Don't write information out of context
|
| 41 |
-
2. If needed, structure long responses in lists and sections
|
| 42 |
-
|
| 43 |
-
<context>
|
| 44 |
-
{context}
|
| 45 |
-
</context>
|
| 46 |
-
"""
|
| 47 |
-
|
| 48 |
-
main_prompt = """You are an expert {code_language} developer.
|
| 49 |
-
Your role is to answer user's questions about code and its description that will be given to you in context.
|
| 50 |
-
|
| 51 |
-
Rules for the output:
|
| 52 |
-
1. Don't answer out of context questions.
|
| 53 |
-
2. Provide a single, clear response using only the given context.
|
| 54 |
-
3. If needed, structure long responses in lists and sections.
|
| 55 |
-
|
| 56 |
-
<context>
|
| 57 |
-
{context}
|
| 58 |
-
</context>
|
| 59 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts.py:Zone.Identifier
DELETED
|
File without changes
|
requirements.txt
DELETED
|
@@ -1,106 +0,0 @@
|
|
| 1 |
-
aiofiles==23.2.1
|
| 2 |
-
aiohappyeyeballs==2.4.6
|
| 3 |
-
aiohttp==3.11.12
|
| 4 |
-
aiosignal==1.3.2
|
| 5 |
-
annotated-types==0.7.0
|
| 6 |
-
anyio==4.8.0
|
| 7 |
-
async-timeout==4.0.3
|
| 8 |
-
asyncer==0.0.7
|
| 9 |
-
attrs==25.1.0
|
| 10 |
-
bidict==0.23.1
|
| 11 |
-
Brotli==1.1.0
|
| 12 |
-
certifi==2025.1.31
|
| 13 |
-
chainlit==2.2.1
|
| 14 |
-
charset-normalizer==3.4.1
|
| 15 |
-
chevron==0.14.0
|
| 16 |
-
click==8.1.8
|
| 17 |
-
dataclasses-json==0.6.7
|
| 18 |
-
Deprecated==1.2.18
|
| 19 |
-
distro==1.9.0
|
| 20 |
-
docx==0.2.4
|
| 21 |
-
fastapi==0.115.8
|
| 22 |
-
filetype==1.2.0
|
| 23 |
-
frozenlist==1.5.0
|
| 24 |
-
googleapis-common-protos==1.68.0
|
| 25 |
-
greenlet==3.1.1
|
| 26 |
-
grpcio==1.70.0
|
| 27 |
-
grpcio-tools==1.70.0
|
| 28 |
-
h11==0.14.0
|
| 29 |
-
h2==4.2.0
|
| 30 |
-
hpack==4.1.0
|
| 31 |
-
httpcore==1.0.7
|
| 32 |
-
httpx==0.28.1
|
| 33 |
-
httpx-sse==0.4.0
|
| 34 |
-
hyperframe==6.1.0
|
| 35 |
-
idna==3.10
|
| 36 |
-
importlib_metadata==8.5.0
|
| 37 |
-
jiter==0.8.2
|
| 38 |
-
jsonpatch==1.33
|
| 39 |
-
jsonpointer==3.0.0
|
| 40 |
-
langchain==0.3.15
|
| 41 |
-
langchain-community==0.3.15
|
| 42 |
-
langchain-core==0.3.31
|
| 43 |
-
langchain-openai==0.3.1
|
| 44 |
-
langchain-qdrant==0.2.0
|
| 45 |
-
langchain-text-splitters==0.3.5
|
| 46 |
-
langgraph==0.2.74
|
| 47 |
-
langgraph-checkpoint==2.0.16
|
| 48 |
-
langgraph-sdk==0.1.51
|
| 49 |
-
langsmith==0.3.8
|
| 50 |
-
Lazify==0.4.0
|
| 51 |
-
literalai==0.1.103
|
| 52 |
-
lxml==5.3.1
|
| 53 |
-
marshmallow==3.26.1
|
| 54 |
-
msgpack==1.1.0
|
| 55 |
-
multidict==6.1.0
|
| 56 |
-
mypy-extensions==1.0.0
|
| 57 |
-
numpy==1.26.4
|
| 58 |
-
openai==1.63.2
|
| 59 |
-
opentelemetry-api==1.29.0
|
| 60 |
-
opentelemetry-exporter-otlp==1.29.0
|
| 61 |
-
opentelemetry-exporter-otlp-proto-common==1.29.0
|
| 62 |
-
opentelemetry-exporter-otlp-proto-grpc==1.29.0
|
| 63 |
-
opentelemetry-exporter-otlp-proto-http==1.29.0
|
| 64 |
-
opentelemetry-instrumentation==0.50b0
|
| 65 |
-
opentelemetry-proto==1.29.0
|
| 66 |
-
opentelemetry-sdk==1.29.0
|
| 67 |
-
opentelemetry-semantic-conventions==0.50b0
|
| 68 |
-
orjson==3.10.15
|
| 69 |
-
pillow==11.1.0
|
| 70 |
-
portalocker==2.10.1
|
| 71 |
-
propcache==0.2.1
|
| 72 |
-
protobuf==5.29.3
|
| 73 |
-
pydantic==2.10.6
|
| 74 |
-
pydantic-settings==2.7.1
|
| 75 |
-
pydantic_core==2.27.2
|
| 76 |
-
PyJWT==2.10.1
|
| 77 |
-
python-docx==1.1.2
|
| 78 |
-
python-dotenv==1.0.1
|
| 79 |
-
python-engineio==4.11.2
|
| 80 |
-
python-multipart==0.0.18
|
| 81 |
-
python-socketio==5.12.1
|
| 82 |
-
PyYAML==6.0.2
|
| 83 |
-
qdrant-client==1.13.2
|
| 84 |
-
regex==2024.11.6
|
| 85 |
-
requests==2.32.3
|
| 86 |
-
requests-toolbelt==1.0.0
|
| 87 |
-
simple-websocket==1.1.0
|
| 88 |
-
sniffio==1.3.1
|
| 89 |
-
socksio==1.0.0
|
| 90 |
-
SQLAlchemy==2.0.38
|
| 91 |
-
starlette==0.41.3
|
| 92 |
-
syncer==2.0.3
|
| 93 |
-
tenacity==9.0.0
|
| 94 |
-
tiktoken==0.9.0
|
| 95 |
-
tomli==2.2.1
|
| 96 |
-
tqdm==4.67.1
|
| 97 |
-
typing-inspect==0.9.0
|
| 98 |
-
uptrace==1.29.0
|
| 99 |
-
urllib3==2.3.0
|
| 100 |
-
uvicorn==0.34.0
|
| 101 |
-
watchfiles==0.20.0
|
| 102 |
-
wrapt==1.17.2
|
| 103 |
-
wsproto==1.2.0
|
| 104 |
-
yarl==1.18.3
|
| 105 |
-
zipp==3.21.0
|
| 106 |
-
zstandard==0.23.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt:Zone.Identifier
DELETED
|
File without changes
|
states.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
from typing_extensions import List, TypedDict
|
| 2 |
-
from typing import TypedDict, Annotated
|
| 3 |
-
from langgraph.graph.message import add_messages
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
class AgentState(TypedDict):
|
| 7 |
-
messages: Annotated[list, add_messages]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
states.py:Zone.Identifier
DELETED
|
File without changes
|
tools.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
from langchain_core.tools import Tool, tool
|
| 2 |
-
import requests
|
| 3 |
-
import json
|
| 4 |
-
from docx import Document
|
| 5 |
-
import re
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
@tool
|
| 9 |
-
def search_pypi(package_name: str) -> str:
|
| 10 |
-
"""Search PyPI for Python package information. Input should be the package name.
|
| 11 |
-
Args:
|
| 12 |
-
package_name: name of the package
|
| 13 |
-
"""
|
| 14 |
-
print(f"Tool called for package: {package_name}")
|
| 15 |
-
base_url = "https://pypi.org/pypi"
|
| 16 |
-
try:
|
| 17 |
-
try:
|
| 18 |
-
response = requests.get(f"{base_url}/{package_name}/json")
|
| 19 |
-
response.raise_for_status()
|
| 20 |
-
info = response.json()
|
| 21 |
-
except requests.RequestException as e:
|
| 22 |
-
raise Exception(f"Error fetching PyPI info for {package_name}: {str(e)}")
|
| 23 |
-
result = json.dumps({
|
| 24 |
-
"name": info["info"]["name"],
|
| 25 |
-
"summary": info["info"]["summary"],
|
| 26 |
-
})
|
| 27 |
-
print(f"Tool result: {result}")
|
| 28 |
-
return result
|
| 29 |
-
except Exception as e:
|
| 30 |
-
return f"Could not find package information: {str(e)}"
|
| 31 |
-
|
| 32 |
-
# @tool
|
| 33 |
-
def write_to_docx(documentation_text: str) -> str:
|
| 34 |
-
"""
|
| 35 |
-
Writes the AI-generated documentation to a .docx file and returns the file path.
|
| 36 |
-
"""
|
| 37 |
-
doc = Document()
|
| 38 |
-
# doc.add_heading("Code Documentation", level=1)
|
| 39 |
-
|
| 40 |
-
lines = documentation_text.split("\n")
|
| 41 |
-
for line in lines:
|
| 42 |
-
if line.startswith("# "): # Section Heading
|
| 43 |
-
doc.add_heading(line[2:], level=1)
|
| 44 |
-
elif line.startswith("## "): # Subsection Heading
|
| 45 |
-
doc.add_heading(line[3:], level=2)
|
| 46 |
-
else: # Normal paragraph
|
| 47 |
-
doc.add_paragraph(line)
|
| 48 |
-
|
| 49 |
-
file_path = "generated_documentation.docx"
|
| 50 |
-
doc.save(file_path)
|
| 51 |
-
return file_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools.py:Zone.Identifier
DELETED
|
File without changes
|