Luke31 commited on
Commit
060a68f
·
0 Parent(s):

Initial attempt

Browse files
.env.sample ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=YOUR_KEY
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ **/__pycache__/*
3
+ .idea
4
+ out/*.png
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation
2
+
3
+ Ask your data what you wanna know!
4
+
5
+ # How to use
6
+
7
+ 2. `poetry install` (Using Poetry (version 1.4.2))
8
+ 3. `poetry run streamlit run stmain.py`
pedalo/__init__.py ADDED
File without changes
pedalo/agents/__init__.py ADDED
File without changes
pedalo/agents/code_interpreter.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from langchain.agents import AgentType, create_pandas_dataframe_agent, initialize_agent
3
+ from langchain.agents.agent_toolkits import create_python_agent
4
+ from langchain.callbacks import StreamlitCallbackHandler
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.tools import PythonREPLTool, Tool
7
+ from pandas import DataFrame
8
+
9
+ model_python_agent_executor = "gpt-3.5-turbo"
10
+ model_pandas_agent = "gpt-3.5-turbo"
11
+ model_grand_agent = "gpt-3.5-turbo"
12
+
13
+
14
+ def run(
15
+ prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
16
+ ) -> str:
17
+ # python_agent_executor = create_python_agent(
18
+ # llm=ChatOpenAI(temperature=0, model=model_python_agent_executor),
19
+ # tool=PythonREPLTool(),
20
+ # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
21
+ # verbose=True,
22
+ # )
23
+
24
+ pandas_agent = create_pandas_dataframe_agent(
25
+ llm=ChatOpenAI(
26
+ temperature=0,
27
+ model=model_pandas_agent,
28
+ streaming=True,
29
+ ),
30
+ df=df,
31
+ verbose=True,
32
+ agent_type=AgentType.OPENAI_FUNCTIONS, #AgentType.OPENAI_FUNCTIONS, #
33
+ )
34
+
35
+ grand_agent = initialize_agent(
36
+ tools=[
37
+ # Tool(
38
+ # name="PythonAgent",
39
+ # func=python_agent_executor.run,
40
+ # description="""useful when you need to transform natural language and write from it python and execute the python code,
41
+ # returning the results of the code execution,
42
+ # DO NOT SEND PYTHON CODE TO THIS TOOL""",
43
+ # ),
44
+ Tool(
45
+ name="PandasAgent",
46
+ func=pandas_agent.run,
47
+ description="""useful when you need to answer question for a provided pandas dataframe, This tool already knows which dataframe to handle.
48
+ takes as an input the entire question and returns the answer after running calculations""",
49
+ ),
50
+ ],
51
+ llm=ChatOpenAI(temperature=0, model=model_grand_agent, streaming=True),
52
+ agent_type=AgentType.OPENAI_FUNCTIONS,
53
+ # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
54
+ verbose=True,
55
+ )
56
+ cmd = f"""in provided pandas dataframe, answer the provided question.
57
+ \n{prompt}"""
58
+ response = pandas_agent.run(cmd, callbacks=[st_callback])
59
+ return response
pedalo/main.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.callbacks import StreamlitCallbackHandler
2
+ from pandas import DataFrame
3
+
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import openai
7
+
8
+ from pedalo.agents import code_interpreter
9
+
10
+ load_dotenv() # Load environment variables from .env file
11
+ api_key = os.getenv("OPENAI_API_KEY")
12
+ openai.api_key = api_key
13
+
14
+
15
+ def run(
16
+ prompt: str, df: DataFrame, st_callback: StreamlitCallbackHandler, model="gpt-4"
17
+ ):
18
+ result = code_interpreter.run(prompt, df, st_callback, model)
19
+ return result
20
+
21
+
22
+ if __name__ == "__main__":
23
+ run()
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "pedalo"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Luke31 <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.11"
10
+ langchain = "^0.0.258"
11
+ streamlit = "^1.25.0"
12
+ pandas = "^2.0.3"
13
+ python-dotenv = "^1.0.0"
14
+ openai = "^0.27.8"
15
+ tabulate = "^0.9.0"
16
+ matplotlib = "^3.7.2"
17
+ seaborn = "^0.12.2"
18
+ neo4j = "^5.11.0"
19
+
20
+
21
+ [tool.poetry.group.dev.dependencies]
22
+ black = "^23.7.0"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
stmain.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.callbacks import StreamlitCallbackHandler
2
+ import streamlit as st
3
+ from pandas import DataFrame
4
+
5
+ from pedalo.main import run
6
+ import pandas as pd
7
+
8
+ # st.set_page_config(layout="wide")
9
+ st.title("PEDALO - Productive Exploratory Data Analysis using Langchain interrOgation")
10
+ st.write("Ask your data what you wanna know!")
11
+ model = st.sidebar.radio("Which model do you wanna use?", ("gpt-4", "gpt-3.5-turbo"), index=1)
12
+
13
+ uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv"])
14
+
15
+
16
+
17
+ def run_df_analysis(prompt:str, df: DataFrame):
18
+ st_callback = StreamlitCallbackHandler(st.container())
19
+ response = run(prompt, df, st_callback, model)
20
+ st.write(response)
21
+
22
+
23
+ def initial_analysis(df: DataFrame):
24
+ run_df_analysis("Give a brief outline and interpretation of the file content.", df)
25
+
26
+
27
+ def user_interrogation(df: DataFrame):
28
+ user_question = st.text_input("or enter your question about the CSV data:")
29
+ if user_question:
30
+ run_df_analysis(user_question, df)
31
+
32
+
33
+ def main():
34
+ if uploaded_file is not None:
35
+ df = pd.read_csv(uploaded_file)
36
+ st.write(df)
37
+
38
+ # if st.button("Start analyzing"):
39
+ st.write(f"Starting to analyze using model {model}...")
40
+ if st.button("Give initial insight"):
41
+ initial_analysis(df)
42
+ user_interrogation(df)
43
+ else:
44
+ st.write("Please upload a CSV file.")
45
+
46
+ main()
tests/__init__.py ADDED
File without changes