Luke31
commited on
Commit
·
060a68f
0
Parent(s):
Initial attempt
Browse files- .env.sample +1 -0
- .gitignore +4 -0
- README.md +8 -0
- pedalo/__init__.py +0 -0
- pedalo/agents/__init__.py +0 -0
- pedalo/agents/code_interpreter.py +59 -0
- pedalo/main.py +23 -0
- poetry.lock +0 -0
- pyproject.toml +26 -0
- stmain.py +46 -0
- tests/__init__.py +0 -0
.env.sample
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OPENAI_API_KEY=YOUR_KEY
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
**/__pycache__/*
|
3 |
+
.idea
|
4 |
+
out/*.png
|
README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation
|
2 |
+
|
3 |
+
Ask your data what you wanna know!
|
4 |
+
|
5 |
+
# How to use
|
6 |
+
|
7 |
+
2. `poetry install` (Using Poetry (version 1.4.2))
|
8 |
+
3. `poetry run streamlit run stmain.py`
|
pedalo/__init__.py
ADDED
File without changes
|
pedalo/agents/__init__.py
ADDED
File without changes
|
pedalo/agents/code_interpreter.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from langchain.agents import AgentType, create_pandas_dataframe_agent, initialize_agent
|
3 |
+
from langchain.agents.agent_toolkits import create_python_agent
|
4 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.tools import PythonREPLTool, Tool
|
7 |
+
from pandas import DataFrame
|
8 |
+
|
9 |
+
model_python_agent_executor = "gpt-3.5-turbo"
|
10 |
+
model_pandas_agent = "gpt-3.5-turbo"
|
11 |
+
model_grand_agent = "gpt-3.5-turbo"
|
12 |
+
|
13 |
+
|
14 |
+
def run(
|
15 |
+
prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
|
16 |
+
) -> str:
|
17 |
+
# python_agent_executor = create_python_agent(
|
18 |
+
# llm=ChatOpenAI(temperature=0, model=model_python_agent_executor),
|
19 |
+
# tool=PythonREPLTool(),
|
20 |
+
# agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
21 |
+
# verbose=True,
|
22 |
+
# )
|
23 |
+
|
24 |
+
pandas_agent = create_pandas_dataframe_agent(
|
25 |
+
llm=ChatOpenAI(
|
26 |
+
temperature=0,
|
27 |
+
model=model_pandas_agent,
|
28 |
+
streaming=True,
|
29 |
+
),
|
30 |
+
df=df,
|
31 |
+
verbose=True,
|
32 |
+
agent_type=AgentType.OPENAI_FUNCTIONS, #AgentType.OPENAI_FUNCTIONS, #
|
33 |
+
)
|
34 |
+
|
35 |
+
grand_agent = initialize_agent(
|
36 |
+
tools=[
|
37 |
+
# Tool(
|
38 |
+
# name="PythonAgent",
|
39 |
+
# func=python_agent_executor.run,
|
40 |
+
# description="""useful when you need to transform natural language and write from it python and execute the python code,
|
41 |
+
# returning the results of the code execution,
|
42 |
+
# DO NOT SEND PYTHON CODE TO THIS TOOL""",
|
43 |
+
# ),
|
44 |
+
Tool(
|
45 |
+
name="PandasAgent",
|
46 |
+
func=pandas_agent.run,
|
47 |
+
description="""useful when you need to answer question for a provided pandas dataframe, This tool already knows which dataframe to handle.
|
48 |
+
takes as an input the entire question and returns the answer after running calculations""",
|
49 |
+
),
|
50 |
+
],
|
51 |
+
llm=ChatOpenAI(temperature=0, model=model_grand_agent, streaming=True),
|
52 |
+
agent_type=AgentType.OPENAI_FUNCTIONS,
|
53 |
+
# agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
54 |
+
verbose=True,
|
55 |
+
)
|
56 |
+
cmd = f"""in provided pandas dataframe, answer the provided question.
|
57 |
+
\n{prompt}"""
|
58 |
+
response = pandas_agent.run(cmd, callbacks=[st_callback])
|
59 |
+
return response
|
pedalo/main.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
2 |
+
from pandas import DataFrame
|
3 |
+
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
import openai
|
7 |
+
|
8 |
+
from pedalo.agents import code_interpreter
|
9 |
+
|
10 |
+
load_dotenv() # Load environment variables from .env file
|
11 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
12 |
+
openai.api_key = api_key
|
13 |
+
|
14 |
+
|
15 |
+
def run(
|
16 |
+
prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
|
17 |
+
):
|
18 |
+
result = code_interpreter.run(prompt, df, st_callback, model)
|
19 |
+
return result
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
run()
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "pedalo"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Luke31 <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.11"
|
10 |
+
langchain = "^0.0.258"
|
11 |
+
streamlit = "^1.25.0"
|
12 |
+
pandas = "^2.0.3"
|
13 |
+
python-dotenv = "^1.0.0"
|
14 |
+
openai = "^0.27.8"
|
15 |
+
tabulate = "^0.9.0"
|
16 |
+
matplotlib = "^3.7.2"
|
17 |
+
seaborn = "^0.12.2"
|
18 |
+
neo4j = "^5.11.0"
|
19 |
+
|
20 |
+
|
21 |
+
[tool.poetry.group.dev.dependencies]
|
22 |
+
black = "^23.7.0"
|
23 |
+
|
24 |
+
[build-system]
|
25 |
+
requires = ["poetry-core"]
|
26 |
+
build-backend = "poetry.core.masonry.api"
|
stmain.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
2 |
+
import streamlit as st
|
3 |
+
from pandas import DataFrame
|
4 |
+
|
5 |
+
from pedalo.main import run
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
# st.set_page_config(layout="wide")
|
9 |
+
st.title("PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation")
|
10 |
+
st.write("Ask your data what you wanna know!")
|
11 |
+
model = st.sidebar.radio("Which model do you wanna use?", ("gpt-4", "gpt-3.5-turbo"), index=1)
|
12 |
+
|
13 |
+
uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv"])
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
def run_df_analysis(prompt:str, df: DataFrame):
|
18 |
+
st_callback = StreamlitCallbackHandler(st.container())
|
19 |
+
response = run(prompt, df, st_callback, model)
|
20 |
+
st.write(response)
|
21 |
+
|
22 |
+
|
23 |
+
def initial_analysis(df: DataFrame):
|
24 |
+
run_df_analysis("Give a brief outline and interpretation of the file content.", df)
|
25 |
+
|
26 |
+
|
27 |
+
def user_interrogation(df: DataFrame):
|
28 |
+
user_question = st.text_input("or enter your question about the CSV data:")
|
29 |
+
if user_question:
|
30 |
+
run_df_analysis(user_question, df)
|
31 |
+
|
32 |
+
|
33 |
+
def main():
|
34 |
+
if uploaded_file is not None:
|
35 |
+
df = pd.read_csv(uploaded_file)
|
36 |
+
st.write(df)
|
37 |
+
|
38 |
+
# if st.button("Start analyzing"):
|
39 |
+
st.write(f"Starting to analyze using model {model}...")
|
40 |
+
if st.button("Give initial insight"):
|
41 |
+
initial_analysis(df)
|
42 |
+
user_interrogation(df)
|
43 |
+
else:
|
44 |
+
st.write("Please upload a CSV file.")
|
45 |
+
|
46 |
+
main()
|
tests/__init__.py
ADDED
File without changes
|