Technologic101's picture
task: move data production
25e66b2
raw
history blame
4.02 kB
import os
import requests
from bs4 import BeautifulSoup
import json
from playwright.async_api import async_playwright
import asyncio
def create_design_directory(design_id):
"""Create a directory for the design if it doesn't exist"""
directory = f"designs/{design_id}"
if not os.path.exists(directory):
os.makedirs(directory)
return directory
def save_css(url, directory):
"""Download and save CSS file"""
response = requests.get(url)
css_path = f"{directory}/style.css"
with open(css_path, "w", encoding="utf-8") as f:
f.write(response.text)
def save_metadata(metadata, directory):
"""Save design metadata as JSON"""
metadata_path = f"{directory}/metadata.json"
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4)
async def take_screenshot(url, directory):
"""Take screenshots of the design at desktop and mobile widths"""
async with async_playwright() as p:
browser = await p.chromium.launch()
# Desktop screenshot (1920px width)
page = await browser.new_page(viewport={'width': 1920, 'height': 1080})
await page.goto(url)
# Wait for network to be idle (no requests for at least 500ms)
await page.wait_for_load_state()
# Additional wait to ensure any animations/transitions complete
#await page.wait_for_timeout(2000) # 2 second delay
# Get full height
height = await page.evaluate('document.body.scrollHeight')
await page.set_viewport_size({'width': 1920, 'height': int(height)})
await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
# Mobile screenshot (480px width)
page = await browser.new_page(viewport={'width': 480, 'height': 1080})
await page.goto(url)
# Wait for network to be idle (no requests for at least 500ms)
await page.wait_for_load_state()
# Additional wait to ensure any animations/transitions complete
#await page.wait_for_timeout(2000) # 2 second delay
# Get full height
height = await page.evaluate('document.body.scrollHeight')
await page.set_viewport_size({'width': 480, 'height': int(height)})
await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True)
await browser.close()
async def scrape_design(design_id):
"""Scrape a single design"""
# Create base URLs
design_url = f"https://www.csszengarden.com/{design_id}"
css_url = f"https://www.csszengarden.com/{design_id}/{design_id}.css"
# Create directory for this design
directory = create_design_directory(design_id)
# Get design page
response = requests.get(design_url)
print(f"{design_id}: Response status: {response.status_code}")
soup = BeautifulSoup(response.text, "html.parser")
# Extract metadata with error handling
try:
metadata = {
"id": design_id,
"url": design_url,
"css_url": css_url
}
except Exception as e:
print(f"\nError extracting metadata: {str(e)}")
raise
# Save everything
save_css(css_url, directory)
save_metadata(metadata, directory)
await take_screenshot(design_url, directory)
async def main():
"""Main function to scrape multiple designs"""
# Create designs directory if it doesn't exist
if not os.path.exists("designs"):
os.makedirs("designs")
# List of design IDs to scrape
design_ids = ["221", "220", "219"] # Add more IDs as needed
for design_id in design_ids:
try:
print(f"Scraping design {design_id}...")
await scrape_design(design_id)
print(f"Successfully scraped design {design_id}")
except Exception as e:
print(f"Error scraping design {design_id}: {str(e)}")
if __name__ == "__main__":
asyncio.run(main())