Spaces:

srinuksv
/

sh

Sleeping

App Files Files Community

srinuksv commited on Oct 15, 2024

Commit

8106f26

verified ·

1 Parent(s): 890a670

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -1,25 +1,37 @@
 import time
-import pandas as pd
-import re
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
-from webdriver_manager.chrome import ChromeDriverManager
 from selenium.webdriver.chrome.options import Options
 app = FastAPI()
 # Serve static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
 def scrape_upwork_data(search_query, num_jobs, page):
-    options = Options()
-    options.add_argument("--headless")  # Run in headless mode for faster scraping
-    service = Service(ChromeDriverManager().install())
-    driver = webdriver.Chrome(service=service, options=options)
     job_listings = []
     try:
@@ -44,7 +56,8 @@ def scrape_upwork_data(search_query, num_jobs, page):
                 # Check for budget (fixed price or hourly)
                 try:
                     budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
-                except:
                     budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
                 job_listings.append({
@@ -58,7 +71,7 @@ def scrape_upwork_data(search_query, num_jobs, page):
                 })
             except Exception as e:
-                print(f'Error parsing job listing: {e}')
     finally:
         driver.quit()
@@ -86,7 +99,7 @@ async def read_root():
 @app.get("/jobs", response_class=HTMLResponse)
 async def get_jobs(query: str, num_jobs: int = 50):
     jobs = []
-    for page in range(1, 3):  # Change to however many pages you want to scrape
         job_listings = scrape_upwork_data(query, num_jobs, page)
         jobs.extend(job_listings)

 import time
+import logging
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from selenium import webdriver
 from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
 app = FastAPI()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
 # Serve static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
 def scrape_upwork_data(search_query, num_jobs, page):
+    # Setup Chrome options for remote WebDriver
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.add_argument("--headless")
+    chrome_options.add_argument("--window-size=1920x1080")
+    chrome_options.add_argument("--disable-gpu")
+    chrome_options.add_argument("--disable-extensions")
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    # Setup the remote WebDriver
+    driver = webdriver.Remote(
+        command_executor='http://localhost:4444/wd/hub',
+        options=chrome_options
+    )
     job_listings = []
     try:
                 # Check for budget (fixed price or hourly)
                 try:
                     budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
+                except Exception as e:
+                    logging.error(f'Error finding budget: {e}')
                     budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
                 job_listings.append({
                 })
             except Exception as e:
+                logging.error(f'Error parsing job listing: {e}')
     finally:
         driver.quit()
 @app.get("/jobs", response_class=HTMLResponse)
 async def get_jobs(query: str, num_jobs: int = 50):
     jobs = []
+    for page in range(1, (num_jobs // 50) + 1):  # Scrape as many pages as needed
         job_listings = scrape_upwork_data(query, num_jobs, page)
         jobs.extend(job_listings)