Spaces:
Paused
Paused
Remove Selecium
Browse files- Dockerfile +10 -57
- app.py +3 -129
- entrypoint.sh +0 -12
Dockerfile
CHANGED
|
@@ -1,77 +1,30 @@
|
|
| 1 |
-
# Use
|
| 2 |
-
FROM
|
| 3 |
|
| 4 |
# Set up environment variables
|
| 5 |
-
ENV
|
| 6 |
-
|
| 7 |
-
PATH="/usr/bin:/home/user/.local/bin:$PATH" \
|
| 8 |
-
XDG_RUNTIME_DIR=/tmp/runtime-user
|
| 9 |
|
| 10 |
-
# Install system dependencies
|
| 11 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 12 |
ca-certificates \
|
| 13 |
curl \
|
| 14 |
-
gnupg \
|
| 15 |
-
wget \
|
| 16 |
-
unzip \
|
| 17 |
-
xvfb \
|
| 18 |
-
libxss1 \
|
| 19 |
-
libxtst6 \
|
| 20 |
-
libnss3 \
|
| 21 |
-
libatk1.0-0 \
|
| 22 |
-
libatk-bridge2.0-0 \
|
| 23 |
-
libcups2 \
|
| 24 |
-
libgtk-3-0 \
|
| 25 |
-
libgbm-dev \
|
| 26 |
-
libxshmfence1 \
|
| 27 |
-
python3.10 \
|
| 28 |
-
python3.10-dev \
|
| 29 |
-
python3.10-distutils \
|
| 30 |
-
&& rm -rf /var/lib/apt/lists/* \
|
| 31 |
-
&& ln -s /usr/bin/python3.10 /usr/bin/python3 \
|
| 32 |
-
&& ln -s /usr/bin/python3 /usr/bin/python
|
| 33 |
-
|
| 34 |
-
# Install Chrome
|
| 35 |
-
RUN curl -sS https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \
|
| 36 |
-
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
| 37 |
-
&& apt-get update \
|
| 38 |
-
&& apt-get install -y google-chrome-stable \
|
| 39 |
&& rm -rf /var/lib/apt/lists/*
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
RUN
|
| 43 |
-
&& wget -q "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/${CHROME_VERSION}/linux64/chromedriver-linux64.zip" \
|
| 44 |
-
&& unzip chromedriver-linux64.zip \
|
| 45 |
-
&& mv chromedriver-linux64/chromedriver /usr/local/bin/ \
|
| 46 |
-
&& chmod +x /usr/local/bin/chromedriver \
|
| 47 |
-
&& rm -rf chromedriver-linux64*
|
| 48 |
-
|
| 49 |
-
# Create non-root user and setup directories
|
| 50 |
-
RUN useradd -m -u 1000 user \
|
| 51 |
-
&& mkdir -p /tmp/runtime-user \
|
| 52 |
-
&& chown user:user /tmp/runtime-user \
|
| 53 |
-
&& chmod 700 /tmp/runtime-user
|
| 54 |
-
|
| 55 |
USER user
|
| 56 |
WORKDIR /app
|
| 57 |
|
| 58 |
# Install Python dependencies
|
| 59 |
COPY --chown=user requirements.txt .
|
| 60 |
-
RUN
|
| 61 |
-
&& pip install --no-cache-dir --user -r requirements.txt
|
| 62 |
|
| 63 |
# Copy application files
|
| 64 |
COPY --chown=user . .
|
| 65 |
|
| 66 |
-
# Expose port
|
| 67 |
EXPOSE 7860
|
| 68 |
-
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
| 69 |
-
CMD curl --fail http://localhost:7860 || exit 1
|
| 70 |
-
|
| 71 |
-
# Configure entrypoint
|
| 72 |
-
COPY --chown=user entrypoint.sh .
|
| 73 |
-
RUN chmod +x entrypoint.sh
|
| 74 |
-
ENTRYPOINT ["./entrypoint.sh"]
|
| 75 |
|
| 76 |
-
# Run application
|
| 77 |
CMD ["python3", "app.py"]
|
|
|
|
| 1 |
+
# Use Python slim image for efficiency
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
|
| 4 |
# Set up environment variables
|
| 5 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 6 |
+
PATH="/home/user/.local/bin:$PATH"
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# Install system dependencies
|
| 9 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
ca-certificates \
|
| 11 |
curl \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
+
# Create non-root user
|
| 15 |
+
RUN useradd -m -u 1000 user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
USER user
|
| 17 |
WORKDIR /app
|
| 18 |
|
| 19 |
# Install Python dependencies
|
| 20 |
COPY --chown=user requirements.txt .
|
| 21 |
+
RUN pip install --no-cache-dir --user -r requirements.txt
|
|
|
|
| 22 |
|
| 23 |
# Copy application files
|
| 24 |
COPY --chown=user . .
|
| 25 |
|
| 26 |
+
# Expose port
|
| 27 |
EXPOSE 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# Run application
|
| 30 |
CMD ["python3", "app.py"]
|
app.py
CHANGED
|
@@ -3,144 +3,18 @@ from pytubefix import YouTube
|
|
| 3 |
import tempfile
|
| 4 |
import base64
|
| 5 |
import logging
|
| 6 |
-
import time
|
| 7 |
-
import random
|
| 8 |
-
import json
|
| 9 |
from io import BytesIO
|
| 10 |
-
from selenium import webdriver
|
| 11 |
-
from selenium.webdriver.chrome.options import Options
|
| 12 |
-
from selenium.webdriver.common.by import By
|
| 13 |
-
from selenium.common.exceptions import WebDriverException
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
| 19 |
-
# Random user agents and browser parameters
|
| 20 |
-
USER_AGENTS = [
|
| 21 |
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
| 22 |
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
|
| 23 |
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
|
| 24 |
-
]
|
| 25 |
-
|
| 26 |
-
VIEWPORT_SIZES = [
|
| 27 |
-
"1920,1080", "1366,768", "1536,864",
|
| 28 |
-
"1440,900", "1280,720", "2560,1440"
|
| 29 |
-
]
|
| 30 |
-
|
| 31 |
-
def get_headless_browser():
|
| 32 |
-
"""Configure headless Chrome with anti-detection features"""
|
| 33 |
-
chrome_options = Options()
|
| 34 |
-
chrome_options.add_argument("--headless=new")
|
| 35 |
-
chrome_options.add_argument("--no-sandbox")
|
| 36 |
-
chrome_options.add_argument("--disable-dev-shm-usage")
|
| 37 |
-
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
| 38 |
-
chrome_options.add_argument("--disable-infobars")
|
| 39 |
-
chrome_options.add_argument("--disable-extensions")
|
| 40 |
-
chrome_options.add_argument(f"--window-size={random.choice(VIEWPORT_SIZES)}")
|
| 41 |
-
|
| 42 |
-
# Randomize user agent and language
|
| 43 |
-
user_agent = random.choice(USER_AGENTS)
|
| 44 |
-
chrome_options.add_argument(f"user-agent={user_agent}")
|
| 45 |
-
chrome_options.add_argument("--lang=en-US,en;q=0.9")
|
| 46 |
-
|
| 47 |
-
# Disable automation flags
|
| 48 |
-
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
| 49 |
-
chrome_options.add_experimental_option("useAutomationExtension", False)
|
| 50 |
-
|
| 51 |
-
# Configure WebDriver
|
| 52 |
-
driver = webdriver.Chrome(options=chrome_options)
|
| 53 |
-
|
| 54 |
-
# Mask selenium parameters
|
| 55 |
-
driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": user_agent})
|
| 56 |
-
driver.execute_cdp_cmd(
|
| 57 |
-
"Page.addScriptToEvaluateOnNewDocument",
|
| 58 |
-
{
|
| 59 |
-
"source": """
|
| 60 |
-
Object.defineProperty(navigator, 'webdriver', {
|
| 61 |
-
get: () => undefined
|
| 62 |
-
});
|
| 63 |
-
"""
|
| 64 |
-
}
|
| 65 |
-
)
|
| 66 |
-
return driver
|
| 67 |
-
|
| 68 |
-
def fetch_youtube_session(url):
|
| 69 |
-
"""Simulate human-like interaction with YouTube"""
|
| 70 |
-
driver = get_headless_browser()
|
| 71 |
-
try:
|
| 72 |
-
logger.info("Starting browser session...")
|
| 73 |
-
|
| 74 |
-
# Initial navigation with random delays
|
| 75 |
-
driver.get("https://www.youtube.com")
|
| 76 |
-
time.sleep(random.uniform(1, 3))
|
| 77 |
-
|
| 78 |
-
# Navigate to target video
|
| 79 |
-
driver.get(url)
|
| 80 |
-
time.sleep(random.uniform(2, 5))
|
| 81 |
-
|
| 82 |
-
# Scroll to trigger page behavior
|
| 83 |
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3)")
|
| 84 |
-
time.sleep(random.uniform(1, 2))
|
| 85 |
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2)")
|
| 86 |
-
time.sleep(random.uniform(1, 2))
|
| 87 |
-
|
| 88 |
-
# Handle cookie consent
|
| 89 |
-
try:
|
| 90 |
-
accept_button = driver.find_element(
|
| 91 |
-
By.XPATH,
|
| 92 |
-
'//*[contains(text(), "Accept all")]'
|
| 93 |
-
)
|
| 94 |
-
accept_button.click()
|
| 95 |
-
logger.info("Accepted cookies")
|
| 96 |
-
time.sleep(random.uniform(1, 2))
|
| 97 |
-
except Exception as e:
|
| 98 |
-
logger.warning("No cookie consent found")
|
| 99 |
-
|
| 100 |
-
# Collect browser data
|
| 101 |
-
cookies = driver.get_cookies()
|
| 102 |
-
localStorage = driver.execute_script("return JSON.stringify(window.localStorage);")
|
| 103 |
-
user_agent = driver.execute_script("return navigator.userAgent;")
|
| 104 |
-
visitor_data = next(
|
| 105 |
-
(c['value'] for c in cookies if c['name'] == 'VISITOR_INFO1_LIVE'),
|
| 106 |
-
None
|
| 107 |
-
)
|
| 108 |
-
|
| 109 |
-
return {
|
| 110 |
-
"cookies": cookies,
|
| 111 |
-
"local_storage": json.loads(localStorage),
|
| 112 |
-
"user_agent": user_agent,
|
| 113 |
-
"visitor_data": visitor_data
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
except WebDriverException as e:
|
| 117 |
-
logger.error(f"Browser error: {str(e)}")
|
| 118 |
-
raise
|
| 119 |
-
finally:
|
| 120 |
-
driver.quit()
|
| 121 |
-
|
| 122 |
def download_audio(url):
|
| 123 |
try:
|
| 124 |
-
#
|
| 125 |
-
logger.info("
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
if not session_data["visitor_data"]:
|
| 129 |
-
raise Exception("Failed to obtain visitor data")
|
| 130 |
|
| 131 |
-
# Configure YouTube client with real browser data
|
| 132 |
-
yt = YouTube(
|
| 133 |
-
url,
|
| 134 |
-
use_po_token=True,
|
| 135 |
-
po_token_verifier=lambda: (session_data["visitor_data"], "BYPASS"),
|
| 136 |
-
headers={
|
| 137 |
-
"User-Agent": session_data["user_agent"],
|
| 138 |
-
"Accept-Language": "en-US,en;q=0.9",
|
| 139 |
-
"Referer": "https://www.youtube.com/",
|
| 140 |
-
"Origin": "https://www.youtube.com"
|
| 141 |
-
}
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
# Get best audio stream
|
| 145 |
audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
|
| 146 |
if not audio_stream:
|
|
|
|
| 3 |
import tempfile
|
| 4 |
import base64
|
| 5 |
import logging
|
|
|
|
|
|
|
|
|
|
| 6 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Configure logging
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def download_audio(url):
|
| 13 |
try:
|
| 14 |
+
# Configure YouTube client
|
| 15 |
+
logger.info("Initializing YouTube object...")
|
| 16 |
+
yt = YouTube(url)
|
|
|
|
|
|
|
|
|
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Get best audio stream
|
| 19 |
audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
|
| 20 |
if not audio_stream:
|
entrypoint.sh
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
set -e
|
| 3 |
-
|
| 4 |
-
# Configure X11 to use TCP instead of UNIX sockets
|
| 5 |
-
export DISPLAY=:99
|
| 6 |
-
Xvfb $DISPLAY -screen 0 1920x1080x24 -ac +extension GLX +render -noreset -listen tcp &
|
| 7 |
-
|
| 8 |
-
# Human-like random delay
|
| 9 |
-
sleep $(( RANDOM % 3 + 1 ))
|
| 10 |
-
|
| 11 |
-
# Execute command with explicit Python path
|
| 12 |
-
exec "$@"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|