raghuv-aditya commited on
Commit
4f18307
·
verified ·
1 Parent(s): a5d77fb

Transfer of files

Browse files
Files changed (3) hide show
  1. README.md +2 -14
  2. app.py +199 -0
  3. config.py +13 -0
README.md CHANGED
@@ -1,14 +1,2 @@
1
- ---
2
- title: Query Pilot
3
- emoji: ⚡
4
- colorFrom: red
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.6.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: AI agent for web search and data extraction.
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # QueryPilot
2
+ Reflects guided query execution and retrieval
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from modules.data_processor import process_query_and_update_csv, extract_column_name, process_query_and_update_sheets
3
+ from modules.gsheet_handler import fetch_google_sheet_data, update_google_sheet
4
+ import pandas as pd
5
+ import tempfile
6
+ from google.oauth2.service_account import Credentials
7
+ import json
8
+ import gspread
9
+
10
+ def preview_columns(file=None, credentials=None, sheet_id=None, sheet_name=None):
11
+ """
12
+ Preview columns from the uploaded CSV file or Google Sheet.
13
+ """
14
+ try:
15
+ if file:
16
+ df = pd.read_csv(file.name)
17
+ elif credentials and sheet_id and sheet_name:
18
+ df = fetch_google_sheet_data(credentials.name, sheet_id, sheet_name)
19
+ else:
20
+ return "No data source provided", []
21
+
22
+ return df.head(), list(df.columns)
23
+ except Exception as e:
24
+ return str(e), []
25
+
26
+ def process_data(file=None, credentials=None, sheet_id=None, sheet_name=None, query_template=None):
27
+ """
28
+ Process data using the provided query template and return the updated DataFrame and CSV file path.
29
+ """
30
+ try:
31
+ if file:
32
+ print(file.name)
33
+ updated_df = process_query_and_update_csv(file.name, query_template)
34
+ elif credentials and sheet_id and sheet_name:
35
+ # credentials_path = credentials.name # The file path for the credentials JSON
36
+
37
+ # # Use gspread to authenticate and fetch the data
38
+ # gc = gspread.service_account(credentials_path) # Pass the path of the credentials file
39
+ # print("Dddddddddd")
40
+ # sh = gc.open_by_url(sheet_id) # Open the Google Sheet by URL
41
+ # worksheet = sh.worksheet(sheet_name) # Access the specified worksheet
42
+
43
+ # # Extract all values from the sheet
44
+ # values = worksheet.get_all_values()
45
+ # df = pd.DataFrame(values[1:], columns=values[0])
46
+ # print(df)
47
+ df = fetch_google_sheet_data(credentials.name, sheet_id, sheet_name)
48
+ # Process the data with the query template
49
+ # print(df)
50
+ # print("krsghvkrgsnker")
51
+ updated_df = process_query_and_update_sheets(credentials.name, df, query_template)
52
+ # update_google_sheet(credentials.name, sheet_id, sheet_name, updated_df)
53
+ else:
54
+ return None, "No data source provided"
55
+
56
+ # Write DataFrame to a temporary file for download
57
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
58
+ updated_df.to_csv(temp_file.name, index=False)
59
+ return updated_df, temp_file.name # Return DataFrame and file path
60
+ except Exception as e:
61
+ return pd.DataFrame(), str(e)
62
+
63
+ def update_sheet(credentials, sheet_id, sheet_name, processed_df):
64
+ """
65
+ Update the Google Sheet with the processed data.
66
+ """
67
+ try:
68
+ update_google_sheet(credentials.name, sheet_id, sheet_name, processed_df)
69
+ return "Google Sheet updated successfully!"
70
+ except Exception as e:
71
+ return str(e)
72
+
73
+ # Gradio Interface
74
+ # Gradio Interface with Information
75
+ def gradio_app():
76
+ with gr.Blocks(theme=gr.themes.Citrus()) as app:
77
+ # General Information
78
+ gr.Markdown("""
79
+ # CSV/Google Sheets Query Processor Dashboard
80
+ This application allows you to:
81
+ - Upload a CSV file or connect to a Google Sheet.
82
+ - Preview the data to understand the structure and available columns.
83
+ - Process the data by executing query templates that extract or manipulate information.
84
+ - Download the processed data as a CSV file or update the Google Sheet directly.
85
+
86
+
87
+ **Note**:
88
+ This app uses my personal OpenAI API key and SERP API key, which have limited free API calls.
89
+ If the app does not work due to API limits, you can:
90
+ 1. Visit the [GitHub Repository](https://github.com/your-repo-url).
91
+ 2. Download the project.
92
+ 3. Use your own API keys to run it locally.
93
+
94
+ For help setting up, refer to the documentation in the GitHub repository.
95
+ """)
96
+
97
+ # States to store independent data for CSV and Google Sheets
98
+ csv_data_state = gr.State(None) # To store CSV data
99
+ sheet_data_state = gr.State(None) # To store Google Sheets data
100
+
101
+ with gr.Tabs():
102
+ with gr.TabItem("CSV File"):
103
+ # CSV Tab Information
104
+ gr.Markdown("""
105
+ ## **CSV File Operations**
106
+ 1. Upload a CSV file to preview its columns and structure.
107
+ 2. Enter a query template using placeholders like `{ColumnName}` to extract or modify data.
108
+ 3. Process the CSV and download the updated file.
109
+
110
+ **Sample Query Template**:
111
+ `Get me the name of the CEO of {Company}`
112
+ Replace `{Company}` with the column name containing company names.
113
+ """)
114
+
115
+ csv_file = gr.File(label="Upload CSV File")
116
+ query_template_csv = gr.Textbox(label="CSV Query Template (e.g., 'Get me the name of CEO of {Company}')")
117
+ with gr.Row():
118
+ preview_button_csv = gr.Button("Preview Columns")
119
+ process_button_csv = gr.Button("Process Queries")
120
+
121
+ preview_output_csv = gr.Dataframe(label="CSV Data Preview")
122
+ processed_output_csv = gr.Dataframe(label="Processed CSV Data")
123
+ download_button_csv = gr.File(label="Download Processed CSV")
124
+
125
+ with gr.TabItem("Google Sheets"):
126
+ # Google Sheets Tab Information
127
+ gr.Markdown("""
128
+ ## **Google Sheets Operations**
129
+ This section allows you to connect to a Google Sheet and perform data queries.
130
+
131
+ **Steps to Use**:
132
+ 1. **Provide Google Service Account Credentials**:
133
+ - Create a Service Account in Google Cloud Console.
134
+ - Download the Service Account credentials as a JSON file.
135
+ - Share the Google Sheet with the Service Account's email (found in the JSON file under `client_email`).
136
+ 2. **Enter the Google Sheet ID**:
137
+ - The Google Sheet ID is the part of the URL between `/d/` and `/edit`, for example:
138
+ `https://docs.google.com/spreadsheets/d/<SheetID>/edit`
139
+ 3. **Enter the Sheet Name**:
140
+ - This is the name of the specific worksheet (tab) within the Google Sheet, e.g., `Sheet1`.
141
+
142
+ **Example Input**:
143
+ - Google Sheet ID: `1aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789`
144
+ - Sheet Name: `SalesData`
145
+
146
+ **Sample Query Template**:
147
+ `Get me the revenue of {Product}`
148
+ Replace `{Product}` with the column name containing product names.
149
+ """)
150
+
151
+ credentials = gr.File(label="Google Service Account Credentials (JSON)")
152
+ sheet_id = gr.Textbox(label="Google Sheet ID")
153
+ sheet_name = gr.Textbox(label="Google Sheet Name (e.g., Sheet1)")
154
+ query_template_sheet = gr.Textbox(label="Google Sheets Query Template (e.g., 'Get me the revenue of {Product}')")
155
+ with gr.Row():
156
+ preview_button_sheet = gr.Button("Preview Columns")
157
+ process_button_sheet = gr.Button("Process Queries")
158
+ update_button = gr.Button("Update Google Sheet")
159
+
160
+ preview_output_sheet = gr.Dataframe(label="Google Sheet Data Preview")
161
+ processed_output_sheet = gr.Dataframe(label="Processed Google Sheet Data")
162
+ download_button_sheet = gr.File(label="Download Processed CSV")
163
+ update_status = gr.Textbox(label="Update Status", interactive=False)
164
+
165
+ # Button Interactions for CSV
166
+ preview_button_csv.click(
167
+ preview_columns,
168
+ inputs=[csv_file, gr.State(None), gr.State(None), gr.State(None)], # Pass placeholders for unused inputs
169
+ outputs=[preview_output_csv, csv_data_state],
170
+ )
171
+ process_button_csv.click(
172
+ process_data,
173
+ inputs=[csv_file, gr.State(None), gr.State(None), gr.State(None), query_template_csv],
174
+ outputs=[processed_output_csv, download_button_csv],
175
+ )
176
+
177
+ # Button Interactions for Google Sheets
178
+ preview_button_sheet.click(
179
+ preview_columns,
180
+ inputs=[gr.State(None), credentials, sheet_id, sheet_name],
181
+ outputs=[preview_output_sheet, sheet_data_state],
182
+ )
183
+ process_button_sheet.click(
184
+ process_data,
185
+ inputs=[gr.State(None), credentials, sheet_id, sheet_name, query_template_sheet],
186
+ outputs=[processed_output_sheet, download_button_sheet],
187
+ )
188
+ update_button.click(
189
+ update_sheet,
190
+ inputs=[credentials, sheet_id, sheet_name, processed_output_sheet],
191
+ outputs=[update_status],
192
+ )
193
+
194
+ return app
195
+
196
+
197
+ if __name__ == "__main__":
198
+ app = gradio_app()
199
+ app.launch()
config.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # from dotenv import load_dotenv
3
+
4
+
5
+ # load_dotenv()
6
+ # Ensure your OpenAI API key is set in the environment variables.
7
+ # open_api_key = os.getenv("OPENAI_API_KEY")
8
+ # os.environ["OPENAI_API_KEY"] = api_key
9
+
10
+ api_key = os.getenv("SERPAPI_KEY")
11
+
12
+ # Directory to persist embeddings with ChromaDB
13
+ PERSIST_DIRECTORY = "./chroma_db"