Spaces:

melghorab
/

code-assistant

Sleeping

App Files Files Community

Mahinour1 commited on Mar 5

Commit

716d5ad

1 Parent(s): 45adabc

midterm changes

Browse files

Files changed (8) hide show

Dockerfile +1 -1
app_2.py +393 -0
app_2.py:Zone.Identifier +0 -0
prompts.py +158 -38
prompts.py:Zone.Identifier +0 -0
states.py:Zone.Identifier +0 -0
tools.py +45 -15
tools.py:Zone.Identifier +0 -0

Dockerfile CHANGED Viewed

@@ -93,4 +93,4 @@ RUN uv sync
 EXPOSE 7860
 # Run the app
-CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

 EXPOSE 7860
 # Run the app
+CMD ["uv", "run", "chainlit", "run", "app_2.py", "--host", "0.0.0.0", "--port", "7860"]

app_2.py ADDED Viewed

	@@ -0,0 +1,393 @@

+import os
+import getpass
+from operator import itemgetter
+from typing import List, Dict
+import json
+import requests
+import traceback
+#LangChain, LangGraph
+from langchain_openai import ChatOpenAI
+from langgraph.graph import START, StateGraph, END
+from typing_extensions import List, TypedDict
+# from langchain_core.documents import Document
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+from langchain_core.tools import Tool, tool
+from langgraph.prebuilt import ToolNode
+from typing import TypedDict, Annotated
+from langgraph.graph.message import add_messages
+import operator
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
+from langchain.vectorstores import Qdrant
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.schema import Document
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, VectorParams
+import chainlit as cl
+import tempfile
+import shutil
+#helper imports
+from code_analysis import *
+from tools import search_pypi, write_to_docx
+from prompts import main_prompt, documenter_prompt, code_description_prompt
+from states import AgentState
+# read openai key
+os.environ["OPENAI_API_KEY"] = "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"
+# Global variables to store processed data
+processed_file_path = None
+document_file_path = None
+vectorstore = None
+main_chain = None
+qdrant_client = None
+@cl.on_chat_start
+async def on_chat_start():
+    await cl.Message(content="Welcome to the Python Code Documentation Assistant! Please upload a Python file to get started.").send()
+@cl.on_message
+async def on_message(message: cl.Message):
+    global processed_file_path, document_file_path, vectorstore, main_chain, qdrant_client
+    if message.elements and any(el.type == "file" for el in message.elements):
+        file_elements = [el for el in message.elements if el.type == "file"]
+        file_element = file_elements[0]
+        is_python_file = (
+            file_element.mime.startswith("text/x-python") or
+            file_element.name.endswith(".py") or
+            file_element.mime == "text/plain"  # Some systems identify .py as text/plain
+        )
+        if is_python_file:
+            # Send processing message
+            msg = cl.Message(content="Processing your Python file...")
+            await msg.send()
+            print(f'file element \n {file_element} \n')
+            # Save uploaded file to a temporary location
+            temp_dir = tempfile.mkdtemp()
+            file_path = os.path.join(temp_dir, file_element.name)
+            with open(file_element.path, "rb") as source_file:
+                file_content_bytes = source_file.read()
+                with open(file_path, "wb") as destination_file:
+                    destination_file.write(file_content_bytes)
+            processed_file_path = file_path
+            try:
+                # read file and extract imports
+                file_content = read_python_file(file_path)
+                # imports = extract_imports(file_content, file_path)
+                print(f'Done reading file')
+                # Define describe packages graph
+                search_packages_tools = [search_pypi]
+##################################### DESCRIBE CODE AGENT ####################################
+                describe_code_llm = ChatOpenAI(model="gpt-4o-mini")
+                # describe_imports_llm = describe_imports_llm.bind_tools(tools = search_packages_tools, tool_choice="required")
+                describe_code_prompt = ChatPromptTemplate.from_messages([
+                        ("system", code_description_prompt),
+                        ("human", "{code}")
+                    ])
+                describe_code_chain = (
+                    {"code_language": itemgetter("code_language"), "code": itemgetter("code")}
+                    | describe_code_prompt | describe_code_llm | StrOutputParser()
+                )
+                print(f'done defining imports chain')
+                # Define describe code chain node
+                def describe_code(state):
+                    # print("Starting chain function")
+                    last_message= state["messages"][-1]
+                    # print(f'last message is \n {last_message}')
+                    content = json.loads(last_message.content)
+                    # print(f'content is {content}')
+                    # print(type(content))
+                    chain_input = {"code_language": content['code_language'],
+                                    "code": content['code']}
+                    # print(f'chain_input is {chain_input}')
+                    # print(type(chain_input))
+                    response = describe_code_chain.invoke(chain_input)
+                    # print(f"Chain response: {response}")
+                    return {"messages": [AIMessage(content=response)]}
+######################################## DOCUMENT WRITER AGENT ###################################3
+                documenter_llm = ChatOpenAI(model="gpt-4o-mini")
+                documenter_llm_prompt = ChatPromptTemplate.from_messages([
+                        ("system", documenter_prompt),
+                        ("human", "{content}")
+                    ])
+                documenter_chain = (
+                    {"content": itemgetter("content")}
+                    | documenter_llm_prompt
+                    | documenter_llm
+                    | StrOutputParser()
+                )
+                def write_document_content(state):
+                    print(state)
+                    json_content = state['messages'][-1].content
+                    json_content = json_content[json_content.find("{"):json_content.rfind("}")+1].strip()
+                    json_content = json.loads(json_content)
+                    document_response = documenter_chain.invoke({"content": json_content})
+                    return {"messages": [AIMessage(content=document_response)]}
+########################################## CONSTRUCT GRAPH ############################################################33
+                class AgentState(TypedDict):
+                    messages: Annotated[list, add_messages]
+                uncompiled_code_graph = StateGraph(AgentState)
+                uncompiled_code_graph.add_node("code_agent", describe_code)
+                uncompiled_code_graph.add_node("write_content_agent", write_document_content)
+                uncompiled_code_graph.add_node("write_document", write_to_docx)
+                uncompiled_code_graph.set_entry_point("code_agent")
+                uncompiled_code_graph.add_edge("code_agent", "write_content_agent")
+                uncompiled_code_graph.add_edge("write_content_agent", "write_document")
+                compiled_code_graph = uncompiled_code_graph.compile()
+                initial_state = {
+                    "messages": [{
+                        "role": "human",
+                        "content": json.dumps({
+                            "code_language": "python",
+                            "code": file_content
+                        })
+                    }]
+                }
+                # bind model to tool or ToolNode
+                # imports_tool_node = ToolNode(search_packages_tools)
+                # construct graph and compile
+                # uncompiled_imports_graph = StateGraph(AgentState)
+                # uncompiled_imports_graph.add_node("imports_agent", call_imports_chain)
+                # uncompiled_imports_graph.add_node("imports_action", imports_tool_node)
+                # uncompiled_imports_graph.set_entry_point("imports_agent")
+                # def should_continue(state):
+                #     last_message = state["messages"][-1]
+                #     if last_message.tool_calls:
+                #         return "imports_action"
+                #     return END
+                # uncompiled_imports_graph.add_conditional_edges(
+                #     "imports_agent",
+                #     should_continue
+                # )
+                # uncompiled_imports_graph.add_edge("imports_action", "imports_agent")
+                # compiled_imports_graph = uncompiled_imports_graph.compile()
+                # print(f'compiled imports graph')
+                # # Invoke imports graph
+                # initial_state = {
+                #     "messages": [{
+                #         "role": "human",
+                #         "content": json.dumps({
+                #             "code_language": "python",
+                #             "imports": imports
+                #         })
+                #     }]
+                # }
+                # await msg.update(content="Analyzing imports and generating documentation...")
+                msg.content = "Analyzing your code and generating documentation..."
+                await msg.update()
+                # msg = cl.Message(content="Analyzing your code and generating documentation...")
+                # await msg.send()
+                documenter_result = compiled_code_graph.invoke(initial_state)
+############################################## SAVE DESCRIPTION CHUNKS IN VECTOR STORE ########################################3
+                qdrant_client = QdrantClient(":memory:")
+                embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+                embedding_dim = 1536
+                qdrant_client.create_collection(
+                    collection_name="description_rag_data",
+                    vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
+                )
+                vectorstore = Qdrant(qdrant_client, collection_name="description_rag_data", embeddings=embedding_model)
+                # Add chunks
+                chunks = documenter_result['messages'][1].content
+                chunks = chunks[chunks.find("{"):chunks.rfind("}")+1].strip()
+                chunks = json.loads(chunks)
+                print(f'################################### raw chunks \n {chunks} \n ######################## \n')
+                chunks_list = []
+                for key in chunks:
+                    if isinstance(chunks[key], dict):
+                        chunks_list.append(chunks[key])
+                    elif isinstance(chunks[key], list):
+                        for value in chunks[key]:
+                            chunks_list.append(value)
+                print(f'################################### chunks_list \n {chunks_list} \n ######################## \n')
+                docs = [
+                    Document(
+                        page_content=f"{chunk.get('type', '')} - {chunk.get('name', '')} - {chunk.get('description', '')}",  # Content for the model
+                        metadata={**chunk}  # Store metadata, but don't put embeddings here
+                    )
+                    for chunk in chunks_list
+                ]
+                vectorstore.add_documents(docs)
+                qdrant_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+                print('done adding docs to DB')
+                #define documenter chain
+                # documenter_llm = ChatOpenAI(model="gpt-4o-mini")
+                # documenter_llm_prompt = ChatPromptTemplate.from_messages([
+                #     ("system", documenter_prompt),
+                # ])
+                # documenter_chain = (
+                #     {"context": itemgetter("context")}
+                #     | documenter_llm_prompt
+                #     | documenter_llm
+                #     | StrOutputParser()
+                # )
+                # print('done defining documenter chain')
+                #extract description chunks from database
+                # collection_name = "description_rag_data"
+                # all_points = qdrant_client.scroll(collection_name=collection_name, limit=1000)[0]  # Adjust limit if needed
+                # one_chunk = all_points[0].payload
+                # input_text = f"type: {one_chunk['metadata']['type']} \nname: {one_chunk['metadata']['name']} \ncontent: {one_chunk['metadata']['content']}"
+                # print('done extracting chunks form DB')
+                # document_response = documenter_chain.invoke({"context": input_text})
+                print('done invoking documenter chain and will write in docx')
+                # write packages description in word file
+                # document_file_path  = write_to_docx(document_response)
+                # print (f'################################ \n documenter_result \n {documenter_result} \n ############################ \n')
+                # document_file_path  = documenter_result['messages'][-1].content[0]
+                # print()
+                document_file_path = 'generated_documentation.docx'
+                print('done writing docx file')
+                # Set up Main Chain for chat
+                main_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+                main_llm_prompt = ChatPromptTemplate.from_messages([
+                    ("system", main_prompt),
+                    ("human", "{query}")
+                ])
+                main_chain = (
+                    {"context": itemgetter("query") | qdrant_retriever, "code_language": itemgetter("code_language"), "query": itemgetter("query"), }
+                    | main_llm_prompt
+                    | main_llm
+                    | StrOutputParser()
+                )
+                print('done defining main chain')
+                # Present download button for the document
+                elements = [
+                    cl.File(
+                        name="documentation.docx",
+                        path=document_file_path,
+                        display="inline"
+                    )
+                ]
+                print('done defining elements')
+                msg.content = "✅ Your Python file has been processed! You can download the documentation file below. How can I help you with your code?"
+                msg.elements = elements
+                await msg.update()
+                # await msg.update(
+                #         content="✅ Your Python file has been processed! You can download the documentation file below. How can I help you with your code?.",
+                #         elements=elements
+                #     )
+            except Exception as e:
+                # await msg.update(content=f"❌ Error processing file: {str(e)}")
+                error_traceback = traceback.format_exc()
+                print(error_traceback)
+                msg.content = f"❌ Error processing file: {str(e)}"
+                await msg.update()
+                # msg = cl.Message(content=f"second message ❌ Error processing file: {str(e)}")
+                # await msg.send()
+        else:
+            await cl.Message(content="Please upload a Python (.py) file.").send()
+    # Handle chat messages if file has been processed
+    elif processed_file_path and main_chain:
+        user_input = message.content
+        # Send thinking message
+        msg = cl.Message(content="Thinking...")
+        await msg.send()
+        try:
+            # Use main_chain to answer the query
+# invoke main chain
+            inputs = {
+                'code_language': 'Python',
+                'query': user_input
+            }
+            response = main_chain.invoke(inputs)
+            # Update with the response
+            # await msg.update(content=response)
+            msg.content = response
+            await msg.update()
+            # msg = cl.Message(content=response)
+            # await msg.send()
+        except Exception as e:
+            # await msg.update(content=f"❌ Error processing your question: {str(e)}")
+            msg.content = f"❌ Error processing your question: {str(e)}"
+            await msg.update()
+            # msg = cl.Message(content=f"❌ Error processing your question: {str(e)}")
+            # await msg.send()
+    else:
+        await cl.Message(content="Please upload a Python file first before asking questions.").send()
+@cl.on_stop
+def on_stop():
+    global processed_file_path
+    # Clean up temporary files
+    if processed_file_path and os.path.exists(os.path.dirname(processed_file_path)):
+        shutil.rmtree(os.path.dirname(processed_file_path))

app_2.py:Zone.Identifier ADDED Viewed

File without changes

prompts.py CHANGED Viewed

@@ -1,49 +1,169 @@
-describe_imports = """You are an expert {code_language} developer.
-Your will be given code lines that import packages.
-Your role is to give a brief description of each package
-You have access to the following tool and you MUST use it:
-search_pypi: Use this to get information about Python packages from PyPI.
-For each import:
-1. Extract the main package name
-2. Use the search_pypi tool to get package information by calling "search_pypi(package_name)"
-3. Combine the information into a clear description
-4. If the retuned value of tool is empty use your own knowledge
-5. If you have no knowledge for this package then it's description should be "I don't know details about this package"
-You must respond in the following JSON format:
-{{"Imported_Packages": [
-    {{"name": "package1", "desc": "brief description of package1"}},
-    {{"name": "package2", "desc": "brief description of package2"}}
-]}}
-Rules for the output:
-1. Use valid JSON format
-2. Package names should be the exact names from the imports
-3. Descriptions should be brief and clear
-4. Do not include any text outside the JSON structure
 """
-documenter_prompt = """You are an expert code documenter.
-Your role is to write a well structured document that describes code functionality.
-From the given context:
-1- type: is the type of the code block (funciton, class, ..)
-2- name: is the name of the code block
-3- content: is the description of the code block
-Instructions:
-Write a docx document with the following structure Heading 1(type) -> Heading 2(name) -> content
-Rules for the output:
-1. Don't write information out of context
-2. If needed, structure long responses in lists and sections
-<context>
-{context}
-</context>
 """
 main_prompt = """You are an expert {code_language} developer.
 Your role is to answer user's questions about code and its description that will be given to you in context.

+code_description_prompt = """
+You are an expert {code_language} developer.
+Your will be given python code lines.
+Your role is to break down its components into a specific JSON format.
+Input:
+A Python file containing:
+1- Imports
+2- Function definitions
+3- Execution code
+Output Format:
+The output should be a JSON with three main sections:
+1- Imports
+{{
+  "type": "imports",
+  "description": [
+    {{"package1_name": "detailed description of package1"}},
+    {{"package2_name": "detailed description of package2"}}
+  ]
+}}
+2- Functions
+{{
+  "functions": [
+    {{
+      "type": "function",
+      "name": "function1_name",
+      "description": "detailed explanation of function1's purpose and functionality"
+    }},
+    {{
+      "type": "function",
+      "name": "function2_name",
+      "description": "detailed explanation of function2's purpose and functionality"
+    }}
+  ]
+}}
+3- Execution Code
+{{
+  "type": "execution",
+  "description": "comprehensive description of what the execution code does"
+}}
+Analysis Guidelines:
+1- Imports Section:
+- Identify each imported package
+- Provide a clear, concise description of the package's purpose
+- Include the standard library or third-party nature of the package
+- Explain why the package is likely being used in this code
+2- Functions Section:
+- List each function in the order they appear
+- Describe the function's:
+* Primary purpose
+* Input parameters
+* Return value (if any)
+* Key operations performed
+- Highlight any notable algorithms or logic within the function
+3- Execution Code Section:
+- Describe the overall flow of the code
+- Explain how functions are called
+- Detail any data processing, computations, or side effects
+- Provide context on the script's main objective
+Important Notes:
+- Use valid JSON format for output
+- Be precise and technical in descriptions
+- Use clear, professional language
+- Avoid unnecessary verbosity
+- Focus on explaining the code's functionality and purpose
 """
+# describe_imports = """You are an expert {code_language} developer.
+# Your will be given code lines that import packages.
+# Your role is to give a brief description of each package
+# You have access to the following tool and you MUST use it:
+# search_pypi: Use this to get information about Python packages from PyPI.
+# For each import:
+# 1. Extract the main package name
+# 2. Use the search_pypi tool to get package information by calling "search_pypi(package_name)"
+# 3. Combine the information into a clear description
+# 4. If the retuned value of tool is empty use your own knowledge
+# 5. If you have no knowledge for this package then it's description should be "I don't know details about this package"
+# You must respond in the following JSON format:
+# {{"Imported_Packages": [
+#     {{"name": "package1", "desc": "brief description of package1"}},
+#     {{"name": "package2", "desc": "brief description of package2"}}
+# ]}}
+# Rules for the output:
+# 1. Use valid JSON format
+# 2. Package names should be the exact names from the imports
+# 3. Descriptions should be brief and clear
+# 4. Do not include any text outside the JSON structure
+# """
+# documenter_prompt = """You are an expert code documenter.
+# Your role is to write a well structured document that describes code functionality.
+documenter_prompt = """
+Create a comprehensive Word document from the provided JSON input describing a Python script.
+Document Requirements:
+1. Title should reflect the script's primary purpose
+2. Organize content into logical sections:
+   - Imports
+   - Functions
+   - Execution Mechanism
+   - Optional: Technical Insights and Potential Improvements
+For Each Section:
+- Explain the purpose and functionality
+- Provide technical details
+- Use professional technical writing style
+- Include function signatures and parameter descriptions
+- Break down complex descriptions into clear, concise points
+Formatting Guidelines:
+- Use a clean, professional Word document template
+- Ensure consistent font and spacing
+- Use bold text for emphasis
+- Create bulleted or numbered lists for detailed explanations
+- Include any available descriptions or comments from the JSON input
+Specific Section Handling:
+- Imports: Explain each imported library's purpose and specific use in the script
+- Functions:
+  - Provide detailed function signatures
+  - Explain input parameters
+  - Describe return values
+  - Break down the function's purpose and mechanism
+- Execution: Explain how the script is intended to run and its primary workflow
+Additional Recommendations:
+- If the JSON includes type information, incorporate it into the documentation
+- Add context to explain the script's overall purpose
+- Suggest potential improvements or extensions if the JSON provides enough context
+Final Output:
+- Fully formatted .docx file
+- Comprehensive explanation of the script
+- Technical yet readable documentation
+- Output should be the document content only without any introduction
 """
+# From the given context:
+# 1- type: is the type of the code block (funciton, class, ..)
+# 2- name: is the name of the code block
+# 3- content: is the description of the code block
+# Instructions:
+# Write a docx document with the following structure Heading 1(type) -> Heading 2(name) -> content
+# Rules for the output:
+# 1. Don't write information out of context
+# 2. If needed, structure long responses in lists and sections
+# <context>
+# {context}
+# </context>
+# """
 main_prompt = """You are an expert {code_language} developer.
 Your role is to answer user's questions about code and its description that will be given to you in context.

prompts.py:Zone.Identifier ADDED Viewed

File without changes

states.py:Zone.Identifier ADDED Viewed

File without changes

tools.py CHANGED Viewed

@@ -3,6 +3,8 @@ import requests
 import json
 from docx import Document
 import re
 @tool
@@ -30,22 +32,50 @@ def search_pypi(package_name: str) -> str:
         return f"Could not find package information: {str(e)}"
 # @tool
-def write_to_docx(documentation_text: str) -> str:
-    """
-    Writes the AI-generated documentation to a .docx file and returns the file path.
-    """
-    doc = Document()
-    # doc.add_heading("Code Documentation", level=1)
-    lines = documentation_text.split("\n")
     for line in lines:
-        if line.startswith("# "):  # Section Heading
-            doc.add_heading(line[2:], level=1)
-        elif line.startswith("## "):  # Subsection Heading
             doc.add_heading(line[3:], level=2)
-        else:  # Normal paragraph
             doc.add_paragraph(line)
-    file_path = "generated_documentation.docx"
-    doc.save(file_path)
-    return file_path

 import json
 from docx import Document
 import re
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
 @tool
         return f"Could not find package information: {str(e)}"
 # @tool
+# def write_to_docx(documentation_text: str) -> str:
+#     """
+#     Writes the AI-generated documentation to a .docx file and returns the file path.
+#     """
+#     doc = Document()
+#     # doc.add_heading("Code Documentation", level=1)
+#     lines = documentation_text.split("\n")
+#     for line in lines:
+#         if line.startswith("# "):  # Section Heading
+#             doc.add_heading(line[2:], level=1)
+#         elif line.startswith("## "):  # Subsection Heading
+#             doc.add_heading(line[3:], level=2)
+#         else:  # Normal paragraph
+#             doc.add_paragraph(line)
+#     file_path = "generated_documentation.docx"
+#     doc.save(file_path)
+#     return file_path
+def write_to_docx(state):
+    text = state['messages'][-1].content
+    filename = 'generated_documentation.docx'
+    doc = Document()
+    lines = text.split("\n")
     for line in lines:
+        if line.startswith("### "):
+            doc.add_heading(line[4:], level=3)
+        elif line.startswith("## "):
             doc.add_heading(line[3:], level=2)
+        elif line.startswith("# "):
+            doc.add_heading(line[2:], level=1)
+        elif "**" in line:
+            bold_parts = re.split(r"(\*\*.*?\*\*)", line)
+            para = doc.add_paragraph()
+            for part in bold_parts:
+                if part.startswith("**") and part.endswith("**"):
+                    para.add_run(part[2:-2]).bold = True
+                else:
+                    para.add_run(part)
+        else:
             doc.add_paragraph(line)
+    # Save document
+    doc.save(filename)
+    return {"messages": [SystemMessage(content=[filename])]}

tools.py:Zone.Identifier ADDED Viewed

File without changes