Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,37 @@
|
|
1 |
import time
|
2 |
-
import
|
3 |
-
import re
|
4 |
from fastapi import FastAPI
|
5 |
from fastapi.responses import HTMLResponse
|
6 |
from fastapi.staticfiles import StaticFiles
|
7 |
from selenium import webdriver
|
8 |
-
from selenium.webdriver.chrome.service import Service
|
9 |
from selenium.webdriver.common.by import By
|
10 |
-
from
|
11 |
from selenium.webdriver.chrome.options import Options
|
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
|
|
|
|
|
|
15 |
# Serve static files
|
16 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
17 |
|
18 |
def scrape_upwork_data(search_query, num_jobs, page):
|
19 |
-
options
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
job_listings = []
|
25 |
try:
|
@@ -44,7 +56,8 @@ def scrape_upwork_data(search_query, num_jobs, page):
|
|
44 |
# Check for budget (fixed price or hourly)
|
45 |
try:
|
46 |
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
|
47 |
-
except:
|
|
|
48 |
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
|
49 |
|
50 |
job_listings.append({
|
@@ -58,7 +71,7 @@ def scrape_upwork_data(search_query, num_jobs, page):
|
|
58 |
})
|
59 |
|
60 |
except Exception as e:
|
61 |
-
|
62 |
|
63 |
finally:
|
64 |
driver.quit()
|
@@ -86,7 +99,7 @@ async def read_root():
|
|
86 |
@app.get("/jobs", response_class=HTMLResponse)
|
87 |
async def get_jobs(query: str, num_jobs: int = 50):
|
88 |
jobs = []
|
89 |
-
for page in range(1,
|
90 |
job_listings = scrape_upwork_data(query, num_jobs, page)
|
91 |
jobs.extend(job_listings)
|
92 |
|
|
|
1 |
import time
|
2 |
+
import logging
|
|
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.responses import HTMLResponse
|
5 |
from fastapi.staticfiles import StaticFiles
|
6 |
from selenium import webdriver
|
|
|
7 |
from selenium.webdriver.common.by import By
|
8 |
+
from selenium.webdriver.chrome.service import Service
|
9 |
from selenium.webdriver.chrome.options import Options
|
10 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
+
# Configure logging
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
|
17 |
# Serve static files
|
18 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
19 |
|
20 |
def scrape_upwork_data(search_query, num_jobs, page):
|
21 |
+
# Setup Chrome options for remote WebDriver
|
22 |
+
chrome_options = webdriver.ChromeOptions()
|
23 |
+
chrome_options.add_argument("--headless")
|
24 |
+
chrome_options.add_argument("--window-size=1920x1080")
|
25 |
+
chrome_options.add_argument("--disable-gpu")
|
26 |
+
chrome_options.add_argument("--disable-extensions")
|
27 |
+
chrome_options.add_argument("--no-sandbox")
|
28 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
29 |
+
|
30 |
+
# Setup the remote WebDriver
|
31 |
+
driver = webdriver.Remote(
|
32 |
+
command_executor='http://localhost:4444/wd/hub',
|
33 |
+
options=chrome_options
|
34 |
+
)
|
35 |
|
36 |
job_listings = []
|
37 |
try:
|
|
|
56 |
# Check for budget (fixed price or hourly)
|
57 |
try:
|
58 |
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
|
59 |
+
except Exception as e:
|
60 |
+
logging.error(f'Error finding budget: {e}')
|
61 |
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
|
62 |
|
63 |
job_listings.append({
|
|
|
71 |
})
|
72 |
|
73 |
except Exception as e:
|
74 |
+
logging.error(f'Error parsing job listing: {e}')
|
75 |
|
76 |
finally:
|
77 |
driver.quit()
|
|
|
99 |
@app.get("/jobs", response_class=HTMLResponse)
|
100 |
async def get_jobs(query: str, num_jobs: int = 50):
|
101 |
jobs = []
|
102 |
+
for page in range(1, (num_jobs // 50) + 1): # Scrape as many pages as needed
|
103 |
job_listings = scrape_upwork_data(query, num_jobs, page)
|
104 |
jobs.extend(job_listings)
|
105 |
|