nnilayy commited on
Commit
132c75c
·
1 Parent(s): fd3bcc5

Remove Selecium

Browse files
Files changed (3) hide show
  1. Dockerfile +10 -57
  2. app.py +3 -129
  3. entrypoint.sh +0 -12
Dockerfile CHANGED
@@ -1,77 +1,30 @@
1
- # Use Ubuntu base for better compatibility
2
- FROM ubuntu:22.04
3
 
4
  # Set up environment variables
5
- ENV DEBIAN_FRONTEND=noninteractive \
6
- PYTHONUNBUFFERED=1 \
7
- PATH="/usr/bin:/home/user/.local/bin:$PATH" \
8
- XDG_RUNTIME_DIR=/tmp/runtime-user
9
 
10
- # Install system dependencies including Python 3.10
11
  RUN apt-get update && apt-get install -y --no-install-recommends \
12
  ca-certificates \
13
  curl \
14
- gnupg \
15
- wget \
16
- unzip \
17
- xvfb \
18
- libxss1 \
19
- libxtst6 \
20
- libnss3 \
21
- libatk1.0-0 \
22
- libatk-bridge2.0-0 \
23
- libcups2 \
24
- libgtk-3-0 \
25
- libgbm-dev \
26
- libxshmfence1 \
27
- python3.10 \
28
- python3.10-dev \
29
- python3.10-distutils \
30
- && rm -rf /var/lib/apt/lists/* \
31
- && ln -s /usr/bin/python3.10 /usr/bin/python3 \
32
- && ln -s /usr/bin/python3 /usr/bin/python
33
-
34
- # Install Chrome
35
- RUN curl -sS https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \
36
- && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
37
- && apt-get update \
38
- && apt-get install -y google-chrome-stable \
39
  && rm -rf /var/lib/apt/lists/*
40
 
41
- # Install ChromeDriver using Chrome for Testing
42
- RUN CHROME_VERSION=$(google-chrome-stable --version | awk -F '[ .]' '{print $3"."$4"."$5"."$6}') \
43
- && wget -q "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/${CHROME_VERSION}/linux64/chromedriver-linux64.zip" \
44
- && unzip chromedriver-linux64.zip \
45
- && mv chromedriver-linux64/chromedriver /usr/local/bin/ \
46
- && chmod +x /usr/local/bin/chromedriver \
47
- && rm -rf chromedriver-linux64*
48
-
49
- # Create non-root user and setup directories
50
- RUN useradd -m -u 1000 user \
51
- && mkdir -p /tmp/runtime-user \
52
- && chown user:user /tmp/runtime-user \
53
- && chmod 700 /tmp/runtime-user
54
-
55
  USER user
56
  WORKDIR /app
57
 
58
  # Install Python dependencies
59
  COPY --chown=user requirements.txt .
60
- RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \
61
- && pip install --no-cache-dir --user -r requirements.txt
62
 
63
  # Copy application files
64
  COPY --chown=user . .
65
 
66
- # Expose port and healthcheck
67
  EXPOSE 7860
68
- HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
69
- CMD curl --fail http://localhost:7860 || exit 1
70
-
71
- # Configure entrypoint
72
- COPY --chown=user entrypoint.sh .
73
- RUN chmod +x entrypoint.sh
74
- ENTRYPOINT ["./entrypoint.sh"]
75
 
76
- # Run application with explicit Python 3.10
77
  CMD ["python3", "app.py"]
 
1
+ # Use Python slim image for efficiency
2
+ FROM python:3.10-slim
3
 
4
  # Set up environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PATH="/home/user/.local/bin:$PATH"
 
 
7
 
8
+ # Install system dependencies
9
  RUN apt-get update && apt-get install -y --no-install-recommends \
10
  ca-certificates \
11
  curl \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
+ # Create non-root user
15
+ RUN useradd -m -u 1000 user
 
 
 
 
 
 
 
 
 
 
 
 
16
  USER user
17
  WORKDIR /app
18
 
19
  # Install Python dependencies
20
  COPY --chown=user requirements.txt .
21
+ RUN pip install --no-cache-dir --user -r requirements.txt
 
22
 
23
  # Copy application files
24
  COPY --chown=user . .
25
 
26
+ # Expose port
27
  EXPOSE 7860
 
 
 
 
 
 
 
28
 
29
+ # Run application
30
  CMD ["python3", "app.py"]
app.py CHANGED
@@ -3,144 +3,18 @@ from pytubefix import YouTube
3
  import tempfile
4
  import base64
5
  import logging
6
- import time
7
- import random
8
- import json
9
  from io import BytesIO
10
- from selenium import webdriver
11
- from selenium.webdriver.chrome.options import Options
12
- from selenium.webdriver.common.by import By
13
- from selenium.common.exceptions import WebDriverException
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
- # Random user agents and browser parameters
20
- USER_AGENTS = [
21
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
22
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
23
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
24
- ]
25
-
26
- VIEWPORT_SIZES = [
27
- "1920,1080", "1366,768", "1536,864",
28
- "1440,900", "1280,720", "2560,1440"
29
- ]
30
-
31
- def get_headless_browser():
32
- """Configure headless Chrome with anti-detection features"""
33
- chrome_options = Options()
34
- chrome_options.add_argument("--headless=new")
35
- chrome_options.add_argument("--no-sandbox")
36
- chrome_options.add_argument("--disable-dev-shm-usage")
37
- chrome_options.add_argument("--disable-blink-features=AutomationControlled")
38
- chrome_options.add_argument("--disable-infobars")
39
- chrome_options.add_argument("--disable-extensions")
40
- chrome_options.add_argument(f"--window-size={random.choice(VIEWPORT_SIZES)}")
41
-
42
- # Randomize user agent and language
43
- user_agent = random.choice(USER_AGENTS)
44
- chrome_options.add_argument(f"user-agent={user_agent}")
45
- chrome_options.add_argument("--lang=en-US,en;q=0.9")
46
-
47
- # Disable automation flags
48
- chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
49
- chrome_options.add_experimental_option("useAutomationExtension", False)
50
-
51
- # Configure WebDriver
52
- driver = webdriver.Chrome(options=chrome_options)
53
-
54
- # Mask selenium parameters
55
- driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": user_agent})
56
- driver.execute_cdp_cmd(
57
- "Page.addScriptToEvaluateOnNewDocument",
58
- {
59
- "source": """
60
- Object.defineProperty(navigator, 'webdriver', {
61
- get: () => undefined
62
- });
63
- """
64
- }
65
- )
66
- return driver
67
-
68
- def fetch_youtube_session(url):
69
- """Simulate human-like interaction with YouTube"""
70
- driver = get_headless_browser()
71
- try:
72
- logger.info("Starting browser session...")
73
-
74
- # Initial navigation with random delays
75
- driver.get("https://www.youtube.com")
76
- time.sleep(random.uniform(1, 3))
77
-
78
- # Navigate to target video
79
- driver.get(url)
80
- time.sleep(random.uniform(2, 5))
81
-
82
- # Scroll to trigger page behavior
83
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3)")
84
- time.sleep(random.uniform(1, 2))
85
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2)")
86
- time.sleep(random.uniform(1, 2))
87
-
88
- # Handle cookie consent
89
- try:
90
- accept_button = driver.find_element(
91
- By.XPATH,
92
- '//*[contains(text(), "Accept all")]'
93
- )
94
- accept_button.click()
95
- logger.info("Accepted cookies")
96
- time.sleep(random.uniform(1, 2))
97
- except Exception as e:
98
- logger.warning("No cookie consent found")
99
-
100
- # Collect browser data
101
- cookies = driver.get_cookies()
102
- localStorage = driver.execute_script("return JSON.stringify(window.localStorage);")
103
- user_agent = driver.execute_script("return navigator.userAgent;")
104
- visitor_data = next(
105
- (c['value'] for c in cookies if c['name'] == 'VISITOR_INFO1_LIVE'),
106
- None
107
- )
108
-
109
- return {
110
- "cookies": cookies,
111
- "local_storage": json.loads(localStorage),
112
- "user_agent": user_agent,
113
- "visitor_data": visitor_data
114
- }
115
-
116
- except WebDriverException as e:
117
- logger.error(f"Browser error: {str(e)}")
118
- raise
119
- finally:
120
- driver.quit()
121
-
122
  def download_audio(url):
123
  try:
124
- # Get fresh browser session data
125
- logger.info("Bypassing bot detection...")
126
- session_data = fetch_youtube_session(url)
127
-
128
- if not session_data["visitor_data"]:
129
- raise Exception("Failed to obtain visitor data")
130
 
131
- # Configure YouTube client with real browser data
132
- yt = YouTube(
133
- url,
134
- use_po_token=True,
135
- po_token_verifier=lambda: (session_data["visitor_data"], "BYPASS"),
136
- headers={
137
- "User-Agent": session_data["user_agent"],
138
- "Accept-Language": "en-US,en;q=0.9",
139
- "Referer": "https://www.youtube.com/",
140
- "Origin": "https://www.youtube.com"
141
- }
142
- )
143
-
144
  # Get best audio stream
145
  audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
146
  if not audio_stream:
 
3
  import tempfile
4
  import base64
5
  import logging
 
 
 
6
  from io import BytesIO
 
 
 
 
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def download_audio(url):
13
  try:
14
+ # Configure YouTube client
15
+ logger.info("Initializing YouTube object...")
16
+ yt = YouTube(url)
 
 
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Get best audio stream
19
  audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
20
  if not audio_stream:
entrypoint.sh DELETED
@@ -1,12 +0,0 @@
1
- #!/bin/bash
2
- set -e
3
-
4
- # Configure X11 to use TCP instead of UNIX sockets
5
- export DISPLAY=:99
6
- Xvfb $DISPLAY -screen 0 1920x1080x24 -ac +extension GLX +render -noreset -listen tcp &
7
-
8
- # Human-like random delay
9
- sleep $(( RANDOM % 3 + 1 ))
10
-
11
- # Execute command with explicit Python path
12
- exec "$@"