Spaces:
Runtime error
Runtime error
import os | |
import requests | |
from bs4 import BeautifulSoup | |
import json | |
from playwright.async_api import async_playwright | |
import asyncio | |
def create_design_directory(design_id): | |
"""Create a directory for the design if it doesn't exist""" | |
directory = f"designs/{design_id}" | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
return directory | |
def save_css(url, directory): | |
"""Download and save CSS file""" | |
response = requests.get(url) | |
css_path = f"{directory}/style.css" | |
with open(css_path, "w", encoding="utf-8") as f: | |
f.write(response.text) | |
def save_metadata(metadata, directory): | |
"""Save design metadata as JSON""" | |
metadata_path = f"{directory}/metadata.json" | |
with open(metadata_path, "w", encoding="utf-8") as f: | |
json.dump(metadata, f, indent=4) | |
async def take_screenshot(url, directory): | |
"""Take screenshots of the design at desktop and mobile widths""" | |
async with async_playwright() as p: | |
browser = await p.chromium.launch() | |
# Desktop screenshot (1920px width) | |
page = await browser.new_page(viewport={'width': 1920, 'height': 1080}) | |
await page.goto(url) | |
# Wait for network to be idle (no requests for at least 500ms) | |
await page.wait_for_load_state() | |
# Additional wait to ensure any animations/transitions complete | |
#await page.wait_for_timeout(2000) # 2 second delay | |
# Get full height | |
height = await page.evaluate('document.body.scrollHeight') | |
await page.set_viewport_size({'width': 1920, 'height': int(height)}) | |
await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True) | |
# Mobile screenshot (480px width) | |
page = await browser.new_page(viewport={'width': 480, 'height': 1080}) | |
await page.goto(url) | |
# Wait for network to be idle (no requests for at least 500ms) | |
await page.wait_for_load_state() | |
# Additional wait to ensure any animations/transitions complete | |
#await page.wait_for_timeout(2000) # 2 second delay | |
# Get full height | |
height = await page.evaluate('document.body.scrollHeight') | |
await page.set_viewport_size({'width': 480, 'height': int(height)}) | |
await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True) | |
await browser.close() | |
async def scrape_design(design_id): | |
"""Scrape a single design""" | |
# Create base URLs | |
design_url = f"https://www.csszengarden.com/{design_id}" | |
css_url = f"https://www.csszengarden.com/{design_id}/{design_id}.css" | |
# Create directory for this design | |
directory = create_design_directory(design_id) | |
# Get design page | |
response = requests.get(design_url) | |
print(f"{design_id}: Response status: {response.status_code}") | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Extract metadata with error handling | |
try: | |
metadata = { | |
"id": design_id, | |
"url": design_url, | |
"css_url": css_url | |
} | |
except Exception as e: | |
print(f"\nError extracting metadata: {str(e)}") | |
raise | |
# Save everything | |
save_css(css_url, directory) | |
save_metadata(metadata, directory) | |
await take_screenshot(design_url, directory) | |
async def main(): | |
"""Main function to scrape multiple designs""" | |
# Create designs directory if it doesn't exist | |
if not os.path.exists("designs"): | |
os.makedirs("designs") | |
# List of design IDs to scrape | |
design_ids = ["221", "220", "219"] # Add more IDs as needed | |
for design_id in design_ids: | |
try: | |
print(f"Scraping design {design_id}...") | |
await scrape_design(design_id) | |
print(f"Successfully scraped design {design_id}") | |
except Exception as e: | |
print(f"Error scraping design {design_id}: {str(e)}") | |
if __name__ == "__main__": | |
asyncio.run(main()) |