Initial attempt

Files changed (11) hide show

.env.sample +1 -0
.gitignore +4 -0
README.md +8 -0
pedalo/__init__.py +0 -0
pedalo/agents/__init__.py +0 -0
pedalo/agents/code_interpreter.py +59 -0
pedalo/main.py +23 -0
poetry.lock +0 -0
pyproject.toml +26 -0
stmain.py +46 -0
tests/__init__.py +0 -0

.env.sample ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=YOUR_KEY

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+**/__pycache__/*
+.idea
+out/*.png

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation
+Ask your data what you wanna know!
+# How to use
+2. `poetry install` (Using Poetry (version 1.4.2))
+3. `poetry run streamlit run stmain.py`

pedalo/__init__.py ADDED Viewed

File without changes

pedalo/agents/__init__.py ADDED Viewed

File without changes

pedalo/agents/code_interpreter.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from dotenv import load_dotenv
+from langchain.agents import AgentType, create_pandas_dataframe_agent, initialize_agent
+from langchain.agents.agent_toolkits import create_python_agent
+from langchain.callbacks import StreamlitCallbackHandler
+from langchain.chat_models import ChatOpenAI
+from langchain.tools import PythonREPLTool, Tool
+from pandas import DataFrame
+model_python_agent_executor = "gpt-3.5-turbo"
+model_pandas_agent = "gpt-3.5-turbo"
+model_grand_agent = "gpt-3.5-turbo"
+def run(
+    prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
+) -> str:
+    # python_agent_executor = create_python_agent(
+    #     llm=ChatOpenAI(temperature=0, model=model_python_agent_executor),
+    #     tool=PythonREPLTool(),
+    #     agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+    #     verbose=True,
+    # )
+    pandas_agent = create_pandas_dataframe_agent(
+        llm=ChatOpenAI(
+            temperature=0,
+            model=model_pandas_agent,
+            streaming=True,
+        ),
+        df=df,
+        verbose=True,
+        agent_type=AgentType.OPENAI_FUNCTIONS, #AgentType.OPENAI_FUNCTIONS, #
+    )
+    grand_agent = initialize_agent(
+        tools=[
+    #         Tool(
+    #             name="PythonAgent",
+    #             func=python_agent_executor.run,
+    #             description="""useful when you need to transform natural language and write from it python and execute the python code,
+    # returning the results of the code execution,
+    # DO NOT SEND PYTHON CODE TO THIS TOOL""",
+    #         ),
+            Tool(
+                name="PandasAgent",
+                func=pandas_agent.run,
+                description="""useful when you need to answer question for a provided pandas dataframe, This tool already knows which dataframe to handle.
+                                               takes as an input the entire question and returns the answer after running calculations""",
+            ),
+        ],
+        llm=ChatOpenAI(temperature=0, model=model_grand_agent, streaming=True),
+        agent_type=AgentType.OPENAI_FUNCTIONS,
+        # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+        verbose=True,
+    )
+    cmd = f"""in provided pandas dataframe, answer the provided question.
+     \n{prompt}"""
+    response = pandas_agent.run(cmd, callbacks=[st_callback])
+    return response

pedalo/main.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langchain.callbacks import StreamlitCallbackHandler
+from pandas import DataFrame
+from dotenv import load_dotenv
+import os
+import openai
+from pedalo.agents import code_interpreter
+load_dotenv()  # Load environment variables from .env file
+api_key = os.getenv("OPENAI_API_KEY")
+openai.api_key = api_key
+def run(
+    prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
+):
+    result = code_interpreter.run(prompt, df, st_callback, model)
+    return result
+if __name__ == "__main__":
+    run()

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[tool.poetry]
+name = "pedalo"
+version = "0.1.0"
+description = ""
+authors = ["Luke31 <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.11"
+langchain = "^0.0.258"
+streamlit = "^1.25.0"
+pandas = "^2.0.3"
+python-dotenv = "^1.0.0"
+openai = "^0.27.8"
+tabulate = "^0.9.0"
+matplotlib = "^3.7.2"
+seaborn = "^0.12.2"
+neo4j = "^5.11.0"
+[tool.poetry.group.dev.dependencies]
+black = "^23.7.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

stmain.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from langchain.callbacks import StreamlitCallbackHandler
+import streamlit as st
+from pandas import DataFrame
+from pedalo.main import run
+import pandas as pd
+# st.set_page_config(layout="wide")
+st.title("PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation")
+st.write("Ask your data what you wanna know!")
+model = st.sidebar.radio("Which model do you wanna use?", ("gpt-4", "gpt-3.5-turbo"), index=1)
+uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv"])
+def run_df_analysis(prompt:str, df: DataFrame):
+    st_callback = StreamlitCallbackHandler(st.container())
+    response = run(prompt, df, st_callback, model)
+    st.write(response)
+def initial_analysis(df: DataFrame):
+    run_df_analysis("Give a brief outline and interpretation of the file content.", df)
+def user_interrogation(df: DataFrame):
+    user_question = st.text_input("or enter your question about the CSV data:")
+    if user_question:
+        run_df_analysis(user_question, df)
+def main():
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file)
+        st.write(df)
+        # if st.button("Start analyzing"):
+        st.write(f"Starting to analyze using model {model}...")
+        if st.button("Give initial insight"):
+            initial_analysis(df)
+        user_interrogation(df)
+    else:
+        st.write("Please upload a CSV file.")
+main()

tests/__init__.py ADDED Viewed

File without changes