Spaces:
Sleeping
Sleeping
added app.py and requirements file
Browse files- app.py +228 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
from google.oauth2.credentials import Credentials
|
5 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
6 |
+
from google.auth.transport.requests import Request
|
7 |
+
from googleapiclient.discovery import build
|
8 |
+
import base64
|
9 |
+
from datetime import datetime, timedelta
|
10 |
+
|
11 |
+
# Gmail API scope
|
12 |
+
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
13 |
+
|
14 |
+
# Instead of reading from file, get credentials from environment variables
|
15 |
+
CLIENT_CONFIG = {
|
16 |
+
"installed": {
|
17 |
+
"client_id": os.environ.get('GOOGLE_CLIENT_ID'),
|
18 |
+
"client_secret": os.environ.get('GOOGLE_CLIENT_SECRET'),
|
19 |
+
"redirect_uris": ["urn:ietf:wg:oauth:2.0:oob"],
|
20 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
21 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
22 |
+
}
|
23 |
+
}
|
24 |
+
|
25 |
+
# Keywords for filtering emails
|
26 |
+
KEYWORDS = {
|
27 |
+
'job_related': [
|
28 |
+
'job opportunity', 'position', 'career', 'recruitment', 'hiring',
|
29 |
+
'interview', 'resume', 'CV', 'application', 'job posting',
|
30 |
+
'employment', 'role', 'vacancy', 'opening'
|
31 |
+
],
|
32 |
+
'personal': [
|
33 |
+
'personal', 'private', 'confidential', 'family', 'friend',
|
34 |
+
'social', 'invitation', 'gathering', 'meetup'
|
35 |
+
]
|
36 |
+
}
|
37 |
+
|
38 |
+
def get_gmail_service(state_dict):
|
39 |
+
"""Creates Gmail API service"""
|
40 |
+
creds = None
|
41 |
+
|
42 |
+
# Check if token exists in state
|
43 |
+
if 'token' in state_dict:
|
44 |
+
creds = Credentials.from_authorized_user_info(state_dict['token'], SCOPES)
|
45 |
+
|
46 |
+
# If credentials are invalid or don't exist
|
47 |
+
if not creds or not creds.valid:
|
48 |
+
if creds and creds.expired and creds.refresh_token:
|
49 |
+
creds.refresh(Request())
|
50 |
+
else:
|
51 |
+
flow = InstalledAppFlow.from_client_config(CLIENT_CONFIG, SCOPES)
|
52 |
+
creds = flow.run_local_server(port=0)
|
53 |
+
|
54 |
+
# Save credentials in state
|
55 |
+
state_dict['token'] = {
|
56 |
+
'token': creds.token,
|
57 |
+
'refresh_token': creds.refresh_token,
|
58 |
+
'token_uri': creds.token_uri,
|
59 |
+
'client_id': creds.client_id,
|
60 |
+
'client_secret': creds.client_secret,
|
61 |
+
'scopes': creds.scopes
|
62 |
+
}
|
63 |
+
|
64 |
+
return build('gmail', 'v1', credentials=creds)
|
65 |
+
|
66 |
+
def get_email_content(service, msg_id):
|
67 |
+
"""Retrieves email content and metadata"""
|
68 |
+
try:
|
69 |
+
message = service.users().messages().get(userId='me', id=msg_id, format='full').execute()
|
70 |
+
|
71 |
+
headers = message['payload']['headers']
|
72 |
+
subject = next((h['value'] for h in headers if h['name'].lower() == 'subject'), 'No Subject')
|
73 |
+
from_email = next((h['value'] for h in headers if h['name'].lower() == 'from'), 'No Sender')
|
74 |
+
date = next((h['value'] for h in headers if h['name'].lower() == 'date'), 'No Date')
|
75 |
+
|
76 |
+
# Get email body
|
77 |
+
if 'parts' in message['payload']:
|
78 |
+
parts = message['payload']['parts']
|
79 |
+
data = parts[0]['body'].get('data', '')
|
80 |
+
else:
|
81 |
+
data = message['payload']['body'].get('data', '')
|
82 |
+
|
83 |
+
if data:
|
84 |
+
text = base64.urlsafe_b64decode(data).decode('utf-8')
|
85 |
+
else:
|
86 |
+
text = "No content available"
|
87 |
+
|
88 |
+
# Check for attachments
|
89 |
+
attachments = []
|
90 |
+
if 'parts' in message['payload']:
|
91 |
+
for part in message['payload']['parts']:
|
92 |
+
if 'filename' in part and part['filename']:
|
93 |
+
attachments.append(part['filename'])
|
94 |
+
|
95 |
+
return {
|
96 |
+
'subject': subject,
|
97 |
+
'from': from_email,
|
98 |
+
'date': date,
|
99 |
+
'content': text,
|
100 |
+
'attachments': attachments
|
101 |
+
}
|
102 |
+
except Exception as e:
|
103 |
+
return f"Error retrieving email: {str(e)}"
|
104 |
+
|
105 |
+
def classify_email(email_data):
|
106 |
+
"""Classifies email based on content and attachments"""
|
107 |
+
text = f"{email_data['subject']} {email_data['content']}".lower()
|
108 |
+
|
109 |
+
# Check attachments for CV/Resume
|
110 |
+
has_cv = any(
|
111 |
+
att.lower().endswith(('.pdf', '.doc', '.docx')) or
|
112 |
+
any(kw in att.lower() for kw in ['cv', 'resume'])
|
113 |
+
for att in email_data['attachments']
|
114 |
+
)
|
115 |
+
|
116 |
+
# Check content for keywords
|
117 |
+
is_job_related = has_cv or any(kw.lower() in text for kw in KEYWORDS['job_related'])
|
118 |
+
is_personal = any(kw.lower() in text for kw in KEYWORDS['personal'])
|
119 |
+
|
120 |
+
return {
|
121 |
+
'job_related': is_job_related,
|
122 |
+
'personal': is_personal,
|
123 |
+
'has_cv': has_cv
|
124 |
+
}
|
125 |
+
|
126 |
+
def fetch_emails(days_back, include_job=True, include_personal=True, progress=gr.Progress()):
|
127 |
+
"""Main function to fetch and filter emails"""
|
128 |
+
state_dict = {}
|
129 |
+
try:
|
130 |
+
service = get_gmail_service(state_dict)
|
131 |
+
|
132 |
+
# Search for recent emails
|
133 |
+
query = f'after:{int((datetime.now() - timedelta(days=int(days_back))).timestamp())}'
|
134 |
+
results = service.users().messages().list(userId='me', q=query, maxResults=100).execute()
|
135 |
+
messages = results.get('messages', [])
|
136 |
+
|
137 |
+
if not messages:
|
138 |
+
return "No emails found in the specified time range."
|
139 |
+
|
140 |
+
filtered_emails = []
|
141 |
+
|
142 |
+
# Process emails with progress tracking
|
143 |
+
for i, message in enumerate(messages):
|
144 |
+
progress(i/len(messages), desc="Processing emails...")
|
145 |
+
email_data = get_email_content(service, message['id'])
|
146 |
+
if isinstance(email_data, str): # Error message
|
147 |
+
continue
|
148 |
+
|
149 |
+
classification = classify_email(email_data)
|
150 |
+
email_data.update(classification)
|
151 |
+
|
152 |
+
if ((include_job and classification['job_related']) or
|
153 |
+
(include_personal and classification['personal'])):
|
154 |
+
filtered_emails.append(email_data)
|
155 |
+
|
156 |
+
# Format output
|
157 |
+
output = f"Found {len(filtered_emails)} matching emails\n\n"
|
158 |
+
for email in filtered_emails:
|
159 |
+
output += f"π§ {email['subject']}\n"
|
160 |
+
output += f"From: {email['from']}\n"
|
161 |
+
output += f"Date: {email['date']}\n"
|
162 |
+
|
163 |
+
tags = []
|
164 |
+
if email['job_related']:
|
165 |
+
tags.append("π― Job Related")
|
166 |
+
if email['personal']:
|
167 |
+
tags.append("π€ Personal")
|
168 |
+
if email['has_cv']:
|
169 |
+
tags.append("π Has CV/Resume")
|
170 |
+
|
171 |
+
output += f"Tags: {', '.join(tags)}\n"
|
172 |
+
|
173 |
+
if email['attachments']:
|
174 |
+
output += "Attachments:\n"
|
175 |
+
for att in email['attachments']:
|
176 |
+
output += f"- {att}\n"
|
177 |
+
|
178 |
+
output += "\nContent Preview:\n"
|
179 |
+
preview = email['content'][:500] + "..." if len(email['content']) > 500 else email['content']
|
180 |
+
output += f"{preview}\n"
|
181 |
+
output += "-" * 80 + "\n\n"
|
182 |
+
|
183 |
+
return output
|
184 |
+
|
185 |
+
except Exception as e:
|
186 |
+
return f"Error: {str(e)}"
|
187 |
+
|
188 |
+
# Create Gradio interface
|
189 |
+
def create_interface():
|
190 |
+
with gr.Blocks(title="Email Filter") as demo:
|
191 |
+
gr.Markdown("# π§ Smart Email Filter")
|
192 |
+
gr.Markdown("Connect to your Gmail account to filter important emails")
|
193 |
+
|
194 |
+
with gr.Row():
|
195 |
+
days_back = gr.Slider(
|
196 |
+
minimum=1,
|
197 |
+
maximum=30,
|
198 |
+
value=7,
|
199 |
+
step=1,
|
200 |
+
label="Days to look back"
|
201 |
+
)
|
202 |
+
include_job = gr.Checkbox(
|
203 |
+
value=True,
|
204 |
+
label="Include Job Related Emails"
|
205 |
+
)
|
206 |
+
include_personal = gr.Checkbox(
|
207 |
+
value=True,
|
208 |
+
label="Include Personal Emails"
|
209 |
+
)
|
210 |
+
|
211 |
+
fetch_button = gr.Button("Connect and Fetch Emails")
|
212 |
+
output = gr.Textbox(
|
213 |
+
label="Results",
|
214 |
+
lines=20,
|
215 |
+
show_copy_button=True
|
216 |
+
)
|
217 |
+
|
218 |
+
fetch_button.click(
|
219 |
+
fn=fetch_emails,
|
220 |
+
inputs=[days_back, include_job, include_personal],
|
221 |
+
outputs=output
|
222 |
+
)
|
223 |
+
|
224 |
+
return demo
|
225 |
+
|
226 |
+
if __name__ == "__main__":
|
227 |
+
demo = create_interface()
|
228 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.19.2
|
2 |
+
google-auth-oauthlib==1.2.0
|
3 |
+
google-auth-httplib2==0.2.0
|
4 |
+
google-api-python-client==2.118.0
|
5 |
+
python-dateutil==2.8.2
|