acecalisto3 commited on
Commit
496defd
·
verified ·
1 Parent(s): 67dfce6

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +208 -131
app2.py CHANGED
@@ -1,202 +1,279 @@
1
  import asyncio
2
  import gradio as gr
3
- from sqlalchemy.exc import SQLAlchemyError
4
- from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
5
- from sqlalchemy.future import select
6
- from sqlalchemy.orm import sessionmaker
7
  import logging
8
  import os
9
  import sys
10
- from typing import List, Dict, Any
11
- from datetime import datetime
12
- from pytz import timezone
13
- import pytz
14
  import requests
15
  from bs4 import BeautifulSoup
16
- import pandas as pd
17
- import numpy as np
18
- import matplotlib.pyplot as plt
19
- from sklearn.model_selection import train_test_split
20
- from sklearn.linear_model import LinearRegression
21
- from sklearn import metrics
22
-
23
- # Global variables for database session and engine
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  db_session = None
25
  engine = None
26
  monitoring_task = None
27
  logger = logging.getLogger(__name__)
 
28
 
29
- # Configure logging
30
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31
-
32
- # Function for dynamically setting the database connection
33
- async def set_db_connection(host: str, port: str, user: str, password: str, db_name: str):
34
  global db_session, engine
35
  try:
36
- engine = create_async_engine(f"mysql+aiomysql://{user}:{password}@{host}:{port}/{db_name}", echo=False)
37
- Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
38
- db_session = Session()
 
 
 
 
39
  return "Database connection established."
40
  except Exception as e:
41
  logger.error(f"Failed to establish database connection: {e}")
42
  return f"Failed to connect to database: {e}"
43
 
44
- # Periodic feed updater with error handling and logging improvements
45
- async def periodic_update_with_error_handling():
46
  while True:
47
- try:
48
- await asyncio.sleep(300) # Wait for 5 minutes before updating the feed content.
49
- await update_feed_content() # Update the feed content.
50
- except Exception as e: # Catch all exceptions for logging purposes.
51
- logger.error(f"Error in periodic update: {e}") # Improved logging message format.
 
 
 
 
 
52
 
53
- # Function to fetch RSS feed content from the provided URL with error handling and logging improvements.
54
- async def fetch_feed_content(feed_url: str) -> Dict[str, Any]:
55
  try:
56
- result = await db_session.execute(select(Article).order_by(Article.timestamp.desc()).limit(20))
57
- articles = result.scalars().all() # Fetch latest articles
58
- feed = {
59
- 'title': 'Website Changes Feed',
60
- 'link': feed_url,
61
- 'description': 'Feed of changes detected on monitored websites.',
62
- 'items': [{'title': article.title, 'link': article.url, 'description': article.content, 'pubDate': str(article.timestamp)} for article in articles] if articles else []
63
- }
64
- return feed
65
  except Exception as e:
66
- logger.error(f"Error fetching feed content: {e}")
67
- return {}
 
 
 
 
 
 
68
 
69
- # Function to scrape website content
70
  async def scrape_website(url: str) -> str:
71
  try:
72
  response = requests.get(url)
73
- soup = BeautifulSoup(response.text, 'html.parser')
 
74
  return soup.get_text()
75
- except Exception as e:
76
- logger.error(f"Error scraping website: {e}")
77
  return ""
78
 
79
- # Function to analyze website content
80
  async def analyze_website_content(content: str) -> Dict[str, Any]:
81
  try:
82
- # Perform sentiment analysis using Natural Language Processing (NLP) techniques
83
- # For simplicity, we'll use a basic sentiment analysis approach
84
  sentiment = "Positive" if content.count("good") > content.count("bad") else "Negative"
85
- return {'sentiment': sentiment}
86
  except Exception as e:
87
  logger.error(f"Error analyzing website content: {e}")
88
  return {}
89
 
90
- # Function to predict website traffic
91
  async def predict_website_traffic(url: str) -> Dict[str, Any]:
92
  try:
93
- # Use machine learning model to predict website traffic
94
- # For simplicity, we'll use a basic linear regression model
95
- X = pd.DataFrame({'url': [url]})
96
- y = pd.DataFrame({'traffic': [100]}) # Replace with actual traffic data
97
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
98
- model = LinearRegression()
99
- model.fit(X_train, y_train)
100
- y_pred = model.predict(X_test)
101
- return {'traffic': y_pred}
102
  except Exception as e:
103
  logger.error(f"Error predicting website traffic: {e}")
104
  return {}
105
 
106
- # Function to update database status
107
- async def update_db_status():
108
  try:
109
- await db_session.execute("SELECT 1")
110
- return "Connected"
111
- except SQLAlchemyError:
112
- return "Disconnected"
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- # Main application that runs Gradio UI and background tasks
 
 
 
 
 
 
 
 
 
 
 
 
115
  async def main():
116
  global db_session, monitoring_task
117
- engine = None
118
  demo = gr.Blocks()
119
- # Define the Gradio interface
120
  with demo:
121
  gr.Markdown("# Website Monitor and Chatbot")
122
  with gr.Row():
123
  with gr.Column():
124
  gr.Markdown("## Database Settings")
125
- db_host = gr.Textbox(label="Database Host", placeholder="localhost", value="localhost")
126
- db_port = gr.Textbox(label="Database Port", placeholder="3306", value="3306")
127
- db_user = gr.Textbox(label="Database User", placeholder="username", value="")
128
- db_pass = gr.Textbox(label="Database Password", placeholder="password", type="password", value="")
129
- db_name = gr.Textbox(label="Database Name", placeholder="database_name", value="monitoring")
130
  db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
131
  status_text = gr.Textbox(label="Status", interactive=False)
 
132
  gr.Markdown("## RSS Feed Reader Settings")
133
- feed_target_url = gr.Textbox(label="RSS Feed Target URL", placeholder="http://yourwebsite.com/feed")
134
  view_button = gr.Button("View Feed")
135
  target_urls = gr.Textbox(label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com")
136
- storage_location = gr.Textbox(label="Storage Location (CSV file path)", placeholder="/path/to/your/file.csv")
137
  feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
138
  start_button = gr.Button("Start Monitoring")
139
  stop_button = gr.Button("Stop Monitoring")
 
140
  with gr.Column():
141
  feed_content = gr.JSON(label="RSS Feed Content")
142
- chatbot_interface = gr.Chatbot(type='messages', stateful=True) # Enable session state
143
  message_input = gr.Textbox(placeholder="Type your message here...")
144
  send_button = gr.Button("Send")
145
  scrape_button = gr.Button("Scrape Website")
146
  analyze_button = gr.Button("Analyze Website Content")
147
  predict_button = gr.Button("Predict Website Traffic")
148
  scrape_output = gr.Textbox(label="Scraped Website Content", interactive=False)
149
- analyze_output = gr.JSON(label="Website Content Analysis") # Removed interactive=False
150
- predict_output = gr.JSON(label="Website Traffic Prediction") # Removed interactive=False
151
- # Define button actions
152
- async def on_start_click(target_urls_str: str, storage_loc: str, feed_enabled: bool, host: str, port: str, user: str, password: str, db_name: str):
153
- global monitoring_task
154
- urls = [url.strip() for url in target_urls_str.split(",")]
155
- await set_db_connection(host, port, user, password, db_name)
156
- monitoring_task = asyncio.create_task(start_monitoring(urls, storage_loc, feed_enabled))
157
- return "Monitoring started."
158
- async def on_stop_click():
159
- global monitoring_task
160
- if monitoring_task:
161
- monitoring_task.cancel()
162
- monitoring_task = None
163
- return "Monitoring stopped."
164
- async def on_view_feed_click(feed_url: str):
165
- return await fetch_feed_content(feed_url)
166
- async def on_scrape_click(url: str):
167
- return await scrape_website(url)
168
- async def on_analyze_click(content: str):
169
- return await analyze_website_content(content)
170
- async def on_predict_click(url: str):
171
- return await predict_website_traffic(url)
172
- stop_button.click(on_stop_click, outputs=[status_text])
173
- view_button.click(on_view_feed_click, inputs=[feed_target_url], outputs=[feed_content])
174
- scrape_button.click(on_scrape_click, inputs=[target_urls], outputs=[scrape_output])
175
- analyze_button.click(on_analyze_click, inputs=[scrape_output], outputs=[analyze_output])
176
- predict_button.click(on_predict_click, inputs=[target_urls], outputs=[predict_output])
177
- send_button.click(chatbot_response, inputs=[message_input, chatbot_interface], outputs=[chatbot_interface, message_input])
178
- # Set up the timer for periodic updates
179
- feed_updater = gr.Timer(interval=300)
180
- feed_updater.tick(fn=update_feed_content, outputs=feed_content)
181
- # Load and check database status when the UI is loaded
182
- demo.load(update_db_status, outputs=db_status_textbox)
183
- asyncio.create_task(periodic_update_with_error_handling())
184
- # Launch the Gradio demo with a custom theme
185
- await demo.launch(theme="default", title="Website Monitor and Chatbot")
186
-
187
- # Function to handle chatbot responses with session state
188
- async def chatbot_response(message: str, chat_history: List[str]) -> List[str]:
189
- chat_history = chat_history or []
190
- response = f"Echo: {message}"
191
- chat_history.append((message, response))
192
- return chat_history, ""
193
-
194
- # Function to start monitoring
195
- async def start_monitoring(urls: List[str], storage_location: str, feed_enabled: bool):
196
- # Logic to start monitoring URLs and optionally save to CSV or enable RSS
197
- print(f"Starting monitoring for {urls}, saving to {storage_location}, RSS enabled: {feed_enabled}")
198
- return
199
-
200
- # Launch the app using asyncio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  if __name__ == "__main__":
202
  asyncio.run(main())
 
1
  import asyncio
2
  import gradio as gr
 
 
 
 
3
  import logging
4
  import os
5
  import sys
 
 
 
 
6
  import requests
7
  from bs4 import BeautifulSoup
8
+ from datetime import datetime
9
+ from pytz import timezone
10
+ from typing import List, Dict, Any
11
+ from sqlalchemy.exc import SQLAlchemyError
12
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
13
+ from sqlalchemy.future import select
14
+ from sqlalchemy.orm import sessionmaker
15
+ from dotenv import load_dotenv
16
+ from pydantic import BaseSettings
17
+
18
+ # --- Configuration ---
19
+ load_dotenv() # Load environment variables from .env file
20
+
21
+ class Settings(BaseSettings):
22
+ DATABASE_HOST: str = os.getenv("DATABASE_HOST", "localhost")
23
+ DATABASE_PORT: int = int(os.getenv("DATABASE_PORT", 3306))
24
+ DATABASE_USER: str = os.getenv("DATABASE_USER", "")
25
+ DATABASE_PASSWORD: str = os.getenv("DATABASE_PASSWORD", "")
26
+ DATABASE_NAME: str = os.getenv("DATABASE_NAME", "monitoring")
27
+ RSS_FEED_URL: str = os.getenv("RSS_FEED_URL", "")
28
+ STORAGE_LOCATION: str = os.getenv("STORAGE_LOCATION", "")
29
+ CHATBOT_API_KEY: str = os.getenv("CHATBOT_API_KEY", "") # Add your chatbot API key
30
+
31
+ class Config:
32
+ env_file = ".env"
33
+ env_file_encoding = "utf-8"
34
+
35
+ settings = Settings()
36
+
37
+ # --- Database Model (Example) ---
38
+ class Article:
39
+ def __init__(self, title, url, content, timestamp):
40
+ self.title = title
41
+ self.url = url
42
+ self.content = content
43
+ self.timestamp = timestamp
44
+
45
+ # --- Global Variables ---
46
  db_session = None
47
  engine = None
48
  monitoring_task = None
49
  logger = logging.getLogger(__name__)
50
+ logger.setLevel(logging.DEBUG)
51
 
52
+ # --- Database Connection ---
53
+ async def set_db_connection():
 
 
 
54
  global db_session, engine
55
  try:
56
+ engine = create_async_engine(
57
+ f"mysql+aiomysql://{settings.DATABASE_USER}:{settings.DATABASE_PASSWORD}@{settings.DATABASE_HOST}:{settings.DATABASE_PORT}/{settings.DATABASE_NAME}",
58
+ echo=False,
59
+ )
60
+ async_session_maker = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
61
+ db_session = async_session_maker()
62
+ logger.info("Database connection established.")
63
  return "Database connection established."
64
  except Exception as e:
65
  logger.error(f"Failed to establish database connection: {e}")
66
  return f"Failed to connect to database: {e}"
67
 
68
+ # --- Website Monitoring ---
69
+ async def start_monitoring(urls: List[str], storage_loc: str, feed_enabled: bool):
70
  while True:
71
+ for url in urls:
72
+ try:
73
+ content = await scrape_website(url)
74
+ analysis = await analyze_website_content(content)
75
+ await store_data(url, content, analysis)
76
+ if feed_enabled:
77
+ await update_feed_content()
78
+ except Exception as e:
79
+ logger.error(f"Error monitoring website {url}: {e}")
80
+ await asyncio.sleep(300) # Check every 5 minutes
81
 
82
+ async def store_data(url: str, content: str, analysis: Dict[str, Any]):
 
83
  try:
84
+ async with db_session as session:
85
+ article = Article(
86
+ title=f"Change on {url}",
87
+ url=url,
88
+ content=content,
89
+ timestamp=datetime.now(timezone("UTC")),
90
+ )
91
+ session.add(article)
92
+ await session.commit()
93
  except Exception as e:
94
+ logger.error(f"Error storing data: {e}")
95
+
96
+ async def update_feed_content():
97
+ try:
98
+ # ... (Your RSS feed generation logic) ...
99
+ pass
100
+ except Exception as e:
101
+ logger.error(f"Error updating feed content: {e}")
102
 
103
+ # --- Website Scraping and Analysis ---
104
  async def scrape_website(url: str) -> str:
105
  try:
106
  response = requests.get(url)
107
+ response.raise_for_status()
108
+ soup = BeautifulSoup(response.text, "html.parser")
109
  return soup.get_text()
110
+ except requests.exceptions.RequestException as e:
111
+ logger.error(f"Error scraping website {url}: {e}")
112
  return ""
113
 
 
114
  async def analyze_website_content(content: str) -> Dict[str, Any]:
115
  try:
116
+ # Perform sentiment analysis or other analysis
 
117
  sentiment = "Positive" if content.count("good") > content.count("bad") else "Negative"
118
+ return {"sentiment": sentiment}
119
  except Exception as e:
120
  logger.error(f"Error analyzing website content: {e}")
121
  return {}
122
 
123
+ # --- Website Traffic Prediction ---
124
  async def predict_website_traffic(url: str) -> Dict[str, Any]:
125
  try:
126
+ # ... (Your machine learning model for traffic prediction) ...
127
+ return {"traffic": 100} # Placeholder
 
 
 
 
 
 
 
128
  except Exception as e:
129
  logger.error(f"Error predicting website traffic: {e}")
130
  return {}
131
 
132
+ # --- Chatbot Integration ---
133
+ async def chatbot_response(message: str, chat_history: List[Dict[str, str]]) -> List[Dict[str, str]]:
134
  try:
135
+ if not settings.CHATBOT_API_KEY:
136
+ raise ValueError("Chatbot API key is not set.")
137
+ # ... (Your code to call the chatbot API) ...
138
+ # Example using a hypothetical API:
139
+ response = requests.post(
140
+ "https://your-chatbot-api.com/chat",
141
+ json={"message": message, "api_key": settings.CHATBOT_API_KEY},
142
+ )
143
+ response.raise_for_status()
144
+ bot_response = response.json()["response"]
145
+ chat_history.append({"role": "user", "content": message})
146
+ chat_history.append({"role": "bot", "content": bot_response})
147
+ return chat_history, ""
148
+ except Exception as e:
149
+ logger.error(f"Error calling chatbot API: {e}")
150
+ chat_history.append({"role": "bot", "content": "Sorry, I'm having trouble responding right now."})
151
+ return chat_history, ""
152
 
153
+ # --- Database Status ---
154
+ async def update_db_status():
155
+ global db_session, engine
156
+ if db_session and engine:
157
+ try:
158
+ await db_session.execute(select(1))
159
+ return "Database connection is active."
160
+ except SQLAlchemyError as e:
161
+ return f"Database error: {e}"
162
+ else:
163
+ return "Database connection not established."
164
+
165
+ # --- Gradio UI ---
166
  async def main():
167
  global db_session, monitoring_task
 
168
  demo = gr.Blocks()
169
+
170
  with demo:
171
  gr.Markdown("# Website Monitor and Chatbot")
172
  with gr.Row():
173
  with gr.Column():
174
  gr.Markdown("## Database Settings")
175
+ # ... (Database settings are now configured in .env file) ...
 
 
 
 
176
  db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
177
  status_text = gr.Textbox(label="Status", interactive=False)
178
+
179
  gr.Markdown("## RSS Feed Reader Settings")
180
+ # ... (RSS feed settings are now configured in .env file) ...
181
  view_button = gr.Button("View Feed")
182
  target_urls = gr.Textbox(label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com")
183
+ # ... (Storage location is now configured in .env file) ...
184
  feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
185
  start_button = gr.Button("Start Monitoring")
186
  stop_button = gr.Button("Stop Monitoring")
187
+
188
  with gr.Column():
189
  feed_content = gr.JSON(label="RSS Feed Content")
190
+ chatbot_interface = gr.Chatbot(type="messages")
191
  message_input = gr.Textbox(placeholder="Type your message here...")
192
  send_button = gr.Button("Send")
193
  scrape_button = gr.Button("Scrape Website")
194
  analyze_button = gr.Button("Analyze Website Content")
195
  predict_button = gr.Button("Predict Website Traffic")
196
  scrape_output = gr.Textbox(label="Scraped Website Content", interactive=False)
197
+ analyze_output = gr.JSON(label="Website Content Analysis", interactive=False)
198
+ predict_output = gr.JSON(label="Website Traffic Prediction", interactive=False)
199
+
200
+ # --- Button Actions ---
201
+ start_button.click(
202
+ fn=on_start_click,
203
+ inputs=[target_urls, feed_rss_checkbox], # Removed database settings
204
+ outputs=[status_text],
205
+ )
206
+
207
+ stop_button.click(fn=on_stop_click, outputs=[status_text])
208
+
209
+ view_button.click(
210
+ fn=on_view_feed_click,
211
+ inputs=[], # Removed feed_url input (now from settings)
212
+ outputs=[feed_content],
213
+ )
214
+
215
+ scrape_button.click(
216
+ fn=on_scrape_click,
217
+ inputs=[target_urls],
218
+ outputs=[scrape_output],
219
+ )
220
+
221
+ analyze_button.click(
222
+ fn=on_analyze_click,
223
+ inputs=[scrape_output],
224
+ outputs=[analyze_output],
225
+ )
226
+
227
+ predict_button.click(
228
+ fn=on_predict_click,
229
+ inputs=[target_urls],
230
+ outputs=[predict_output],
231
+ )
232
+
233
+ send_button.click(
234
+ fn=chatbot_response,
235
+ inputs=[message_input, chatbot_interface],
236
+ outputs=[chatbot_interface, message_input],
237
+ )
238
+
239
+ # --- Periodic Updates ---
240
+ feed_updater = gr.Timer(interval=300) # Update every 5 minutes
241
+ feed_updater.tick(fn=update_feed_content, outputs=[feed_content])
242
+
243
+ # --- Load Database Status ---
244
+ demo.load(fn=update_db_status, outputs=[db_status_textbox])
245
+
246
+ # --- Launch Gradio ---
247
+ await demo.launch()
248
+
249
+
250
+ # --- Helper Functions ---
251
+ async def on_start_click(target_urls_str: str, feed_enabled: bool):
252
+ global monitoring_task
253
+ urls = [url.strip() for url in target_urls_str.split(",")]
254
+ await set_db_connection()
255
+ monitoring_task = asyncio.create_task(start_monitoring(urls, settings.STORAGE_LOCATION, feed_enabled))
256
+ return "Monitoring started."
257
+
258
+ async def on_stop_click():
259
+ global monitoring_task
260
+ if monitoring_task:
261
+ monitoring_task.cancel()
262
+ monitoring_task = None
263
+ return "Monitoring stopped."
264
+
265
+ async def on_view_feed_click():
266
+ return await fetch_feed_content(settings.RSS_FEED_URL)
267
+
268
+ async def on_scrape_click(url: str):
269
+ return await scrape_website(url)
270
+
271
+ async def on_analyze_click(content: str):
272
+ return await analyze_website_content(content)
273
+
274
+ async def on_predict_click(url: str):
275
+ return await predict_website_traffic(url)
276
+
277
+ # --- Main Execution ---
278
  if __name__ == "__main__":
279
  asyncio.run(main())