File size: 2,816 Bytes
ab007e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fbe7f0
ab007e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
import pandas as pd
from pandasai import SmartDataframe
from pandasai.llm.google_gemini import GoogleGemini
import warnings
from pandasai.responses.response_parser import ResponseParser
import os
from huggingface_hub import hf_hub_download

class StreamlitResponse(ResponseParser):
    def __init__(self, context) -> None:
        super().__init__(context)

    def format_dataframe(self, result):
        st.dataframe(result["value"])
        return

    def format_plot(self, result):
        st.image(result["value"])
        return

    # def format_other(self, result):
    #     st.write(result["value"])
    #     return


warnings.filterwarnings("ignore")
# Load your data
repo_id = "Akankshg/ML_DATA"
filename = "EDA_DATA.parquet"

# Access the token from environment variables (ensure you have added it to your Space's secrets)
token = os.environ["HUGGING_FACE_HUB_TOKEN"]

# Fetch the local file path
local_file = hf_hub_download(repo_id=repo_id, filename=filename,repo_type="dataset", token=token)

# Cache the data loading process
@st.cache_data()
def fetch_data():
    # Load the parquet file using the local_file path
    data = pd.read_parquet(local_file)
    return data

df = fetch_data()
# Initialize LLM
# Access the Google API key stored as a secret
google_api_key = os.environ.get("GOOGLE_API_KEY")

# Initialize Google Gemini or any other Google API client using the key
llm = GoogleGemini(api_key=google_api_key)

pandas_ai = SmartDataframe(df, config={"llm": llm, "response_parser": StreamlitResponse,"verbose": True})
pandas_ai_2 = SmartDataframe(df, config={"llm": llm,"verbose": True})   ## string
# Streamlit app title and description
st.title("AI-Powered Data Analysis App")
st.write("This application allows you to interact with your dataset using natural language prompts. Just ask a question, and the AI will provide insights based on your data.")

# Display the dataset
st.subheader("Dataset Preview")
st.dataframe(df.head())

# User input for natural language prompt
prompt = st.text_input("Enter your prompt:", placeholder="e.g., What are the top diagnoses?")

# Process the input and display the result
if st.button("Submit"):
    if 'plot' in prompt or 'graph' in prompt or 'PLOT' in prompt or 'Graph' in prompt:
        try:
            result = pandas_ai.chat(prompt)
            st.subheader("Result")
        except KeyError as e:
            st.error(f"Error: {e}. Unable to retrieve result.")
    elif prompt:
        try:
            result = pandas_ai_2.chat(prompt)
            st.subheader("Result")
            st.write(result)
        except KeyError as e:
            st.error(f"Error: {e}. Unable to retrieve result.")
    else:
        st.warning("Please enter a prompt.")

# Add a footer
st.write("Powered by PandasAI and Google Gemini.")