NitinBot001 commited on
Commit
e3f5d52
·
verified ·
1 Parent(s): a5bb39e

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +38 -0
  2. main.py +364 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 slim image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ gcc \
10
+ g++ \
11
+ curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements first for better caching
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Create necessary directories
24
+ RUN mkdir -p /app/templates /app/static /tmp
25
+
26
+ # Set environment variables
27
+ ENV PYTHONPATH="/app"
28
+ ENV PYTHONUNBUFFERED=1
29
+
30
+ # Expose port
31
+ EXPOSE 7860
32
+
33
+ # Health check
34
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
35
+ CMD curl -f http://localhost:8000/health || exit 1
36
+
37
+ # Run the application
38
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.responses import HTMLResponse, FileResponse
5
+ from pydantic import BaseModel
6
+ from typing import List, Optional, Dict, Any
7
+ import uvicorn
8
+ import asyncio
9
+ import json
10
+ import os
11
+ from datetime import datetime
12
+ import logging
13
+
14
+ # Import your scrapers
15
+ from app3 import PhoneDBScraper, GSMArenaScraperAlternative
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Create FastAPI app
22
+ app = FastAPI(
23
+ title="Phone Specifications API",
24
+ description="API for scraping phone specifications from PhoneDB and GSMArena",
25
+ version="1.0.0"
26
+ )
27
+
28
+ # Add CORS middleware
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"], # In production, specify allowed origins
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ # Mount static files for frontend
38
+ if os.path.exists("static"):
39
+ app.mount("/static", StaticFiles(directory="static"), name="static")
40
+
41
+ # Pydantic models
42
+ class PhoneSearchRequest(BaseModel):
43
+ phone_name: str
44
+ source: str = "gsmarena" # "phonedb" or "gsmarena"
45
+
46
+ class MultiplePhoneSearchRequest(BaseModel):
47
+ phone_names: List[str]
48
+ source: str = "gsmarena"
49
+
50
+ class PhoneSpecification(BaseModel):
51
+ name: str
52
+ brand: str
53
+ images: List[str]
54
+ specifications: Dict[str, Any]
55
+ source_url: str
56
+
57
+ class ApiResponse(BaseModel):
58
+ success: bool
59
+ message: str
60
+ data: Optional[Any] = None
61
+ timestamp: str = datetime.now().isoformat()
62
+
63
+ # Global scrapers
64
+ phonedb_scraper = None
65
+ gsmarena_scraper = None
66
+
67
+ @app.on_event("startup")
68
+ async def startup_event():
69
+ """Initialize scrapers on startup"""
70
+ global phonedb_scraper, gsmarena_scraper
71
+ try:
72
+ phonedb_scraper = PhoneDBScraper()
73
+ gsmarena_scraper = GSMArenaScraperAlternative()
74
+ logger.info("Scrapers initialized successfully")
75
+ except Exception as e:
76
+ logger.error(f"Error initializing scrapers: {e}")
77
+
78
+ # Routes
79
+ @app.get("/", response_class=HTMLResponse)
80
+ async def read_root():
81
+ """Serve the main HTML page"""
82
+ try:
83
+ with open("templates/index.html", "r", encoding="utf-8") as f:
84
+ return HTMLResponse(content=f.read())
85
+ except FileNotFoundError:
86
+ return HTMLResponse(content="""
87
+ <html>
88
+ <head><title>Phone Specs API</title></head>
89
+ <body>
90
+ <h1>Phone Specifications API</h1>
91
+ <p>API is running! Visit <a href="/docs">/docs</a> for API documentation.</p>
92
+ </body>
93
+ </html>
94
+ """)
95
+
96
+ @app.get("/health")
97
+ async def health_check():
98
+ """Health check endpoint"""
99
+ return ApiResponse(
100
+ success=True,
101
+ message="API is healthy",
102
+ data={"status": "running", "scrapers": {"phonedb": phonedb_scraper is not None, "gsmarena": gsmarena_scraper is not None}}
103
+ )
104
+
105
+ @app.post("/api/search", response_model=ApiResponse)
106
+ async def search_phone(request: PhoneSearchRequest):
107
+ """Search for a single phone"""
108
+ try:
109
+ logger.info(f"Searching for phone: {request.phone_name} using {request.source}")
110
+
111
+ # Choose scraper based on source
112
+ if request.source.lower() == "phonedb" and phonedb_scraper:
113
+ scraper = phonedb_scraper
114
+ elif request.source.lower() == "gsmarena" and gsmarena_scraper:
115
+ scraper = gsmarena_scraper
116
+ else:
117
+ # Default to GSMArena if available
118
+ if gsmarena_scraper:
119
+ scraper = gsmarena_scraper
120
+ elif phonedb_scraper:
121
+ scraper = phonedb_scraper
122
+ else:
123
+ raise HTTPException(status_code=503, detail="No scrapers available")
124
+
125
+ # Run scraping in background to avoid blocking
126
+ loop = asyncio.get_event_loop()
127
+ result = await loop.run_in_executor(
128
+ None,
129
+ scraper.scrape_phone_by_name,
130
+ request.phone_name
131
+ )
132
+
133
+ if result:
134
+ return ApiResponse(
135
+ success=True,
136
+ message=f"Successfully found specifications for {result['name']}",
137
+ data=result
138
+ )
139
+ else:
140
+ return ApiResponse(
141
+ success=False,
142
+ message=f"No results found for {request.phone_name}",
143
+ data=None
144
+ )
145
+
146
+ except Exception as e:
147
+ logger.error(f"Error searching for phone {request.phone_name}: {e}")
148
+ raise HTTPException(status_code=500, detail=str(e))
149
+
150
+ @app.post("/api/search/multiple", response_model=ApiResponse)
151
+ async def search_multiple_phones(request: MultiplePhoneSearchRequest):
152
+ """Search for multiple phones"""
153
+ try:
154
+ logger.info(f"Searching for {len(request.phone_names)} phones using {request.source}")
155
+
156
+ # Choose scraper
157
+ if request.source.lower() == "phonedb" and phonedb_scraper:
158
+ scraper = phonedb_scraper
159
+ elif request.source.lower() == "gsmarena" and gsmarena_scraper:
160
+ scraper = gsmarena_scraper
161
+ else:
162
+ if gsmarena_scraper:
163
+ scraper = gsmarena_scraper
164
+ elif phonedb_scraper:
165
+ scraper = phonedb_scraper
166
+ else:
167
+ raise HTTPException(status_code=503, detail="No scrapers available")
168
+
169
+ # Run scraping in background
170
+ loop = asyncio.get_event_loop()
171
+ results = await loop.run_in_executor(
172
+ None,
173
+ scraper.scrape_multiple_phones,
174
+ request.phone_names
175
+ )
176
+
177
+ success_count = len(results) if results else 0
178
+ total_count = len(request.phone_names)
179
+
180
+ return ApiResponse(
181
+ success=success_count > 0,
182
+ message=f"Successfully scraped {success_count}/{total_count} phones",
183
+ data={
184
+ "phones": results,
185
+ "success_count": success_count,
186
+ "total_count": total_count
187
+ }
188
+ )
189
+
190
+ except Exception as e:
191
+ logger.error(f"Error searching for multiple phones: {e}")
192
+ raise HTTPException(status_code=500, detail=str(e))
193
+
194
+ @app.get("/api/sources")
195
+ async def get_available_sources():
196
+ """Get available scraping sources"""
197
+ sources = []
198
+
199
+ if phonedb_scraper:
200
+ sources.append({
201
+ "id": "phonedb",
202
+ "name": "PhoneDB",
203
+ "description": "PhoneDB.net database",
204
+ "available": True
205
+ })
206
+
207
+ if gsmarena_scraper:
208
+ sources.append({
209
+ "id": "gsmarena",
210
+ "name": "GSMArena",
211
+ "description": "GSMArena.com database",
212
+ "available": True
213
+ })
214
+
215
+ return ApiResponse(
216
+ success=True,
217
+ message="Available sources retrieved",
218
+ data=sources
219
+ )
220
+
221
+ @app.get("/api/export/{phone_name}")
222
+ async def export_phone_data(phone_name: str, source: str = "gsmarena"):
223
+ """Export phone data as JSON file"""
224
+ try:
225
+ # Choose scraper
226
+ if source.lower() == "phonedb" and phonedb_scraper:
227
+ scraper = phonedb_scraper
228
+ else:
229
+ scraper = gsmarena_scraper
230
+
231
+ if not scraper:
232
+ raise HTTPException(status_code=503, detail="Scraper not available")
233
+
234
+ # Get phone data
235
+ loop = asyncio.get_event_loop()
236
+ result = await loop.run_in_executor(
237
+ None,
238
+ scraper.scrape_phone_by_name,
239
+ phone_name
240
+ )
241
+
242
+ if not result:
243
+ raise HTTPException(status_code=404, detail="Phone not found")
244
+
245
+ # Create temporary file
246
+ filename = f"{phone_name.replace(' ', '_')}_specs.json"
247
+ filepath = f"/tmp/{filename}"
248
+
249
+ with open(filepath, 'w', encoding='utf-8') as f:
250
+ json.dump(result, f, indent=2, ensure_ascii=False)
251
+
252
+ return FileResponse(
253
+ filepath,
254
+ media_type='application/json',
255
+ filename=filename
256
+ )
257
+
258
+ except Exception as e:
259
+ logger.error(f"Error exporting phone data: {e}")
260
+ raise HTTPException(status_code=500, detail=str(e))
261
+
262
+ # Background tasks for long-running scraping jobs
263
+ background_jobs = {}
264
+
265
+ @app.post("/api/scrape/background")
266
+ async def start_background_scraping(request: MultiplePhoneSearchRequest, background_tasks: BackgroundTasks):
267
+ """Start background scraping job for multiple phones"""
268
+ job_id = f"job_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
269
+
270
+ # Initialize job status
271
+ background_jobs[job_id] = {
272
+ "status": "started",
273
+ "progress": 0,
274
+ "total": len(request.phone_names),
275
+ "results": [],
276
+ "started_at": datetime.now().isoformat()
277
+ }
278
+
279
+ # Add background task
280
+ background_tasks.add_task(
281
+ run_background_scraping,
282
+ job_id,
283
+ request.phone_names,
284
+ request.source
285
+ )
286
+
287
+ return ApiResponse(
288
+ success=True,
289
+ message="Background scraping job started",
290
+ data={"job_id": job_id}
291
+ )
292
+
293
+ @app.get("/api/scrape/status/{job_id}")
294
+ async def get_scraping_status(job_id: str):
295
+ """Get status of background scraping job"""
296
+ if job_id not in background_jobs:
297
+ raise HTTPException(status_code=404, detail="Job not found")
298
+
299
+ return ApiResponse(
300
+ success=True,
301
+ message="Job status retrieved",
302
+ data=background_jobs[job_id]
303
+ )
304
+
305
+ async def run_background_scraping(job_id: str, phone_names: List[str], source: str):
306
+ """Run background scraping job"""
307
+ try:
308
+ # Choose scraper
309
+ if source.lower() == "phonedb" and phonedb_scraper:
310
+ scraper = phonedb_scraper
311
+ else:
312
+ scraper = gsmarena_scraper
313
+
314
+ if not scraper:
315
+ background_jobs[job_id]["status"] = "failed"
316
+ background_jobs[job_id]["error"] = "Scraper not available"
317
+ return
318
+
319
+ background_jobs[job_id]["status"] = "running"
320
+ results = []
321
+
322
+ for i, phone_name in enumerate(phone_names):
323
+ try:
324
+ # Update progress
325
+ background_jobs[job_id]["progress"] = i
326
+ background_jobs[job_id]["current_phone"] = phone_name
327
+
328
+ # Scrape phone
329
+ loop = asyncio.get_event_loop()
330
+ result = await loop.run_in_executor(
331
+ None,
332
+ scraper.scrape_phone_by_name,
333
+ phone_name
334
+ )
335
+
336
+ if result:
337
+ results.append(result)
338
+
339
+ # Add delay between requests
340
+ await asyncio.sleep(2)
341
+
342
+ except Exception as e:
343
+ logger.error(f"Error scraping {phone_name} in background job: {e}")
344
+ continue
345
+
346
+ # Update job status
347
+ background_jobs[job_id]["status"] = "completed"
348
+ background_jobs[job_id]["progress"] = len(phone_names)
349
+ background_jobs[job_id]["results"] = results
350
+ background_jobs[job_id]["completed_at"] = datetime.now().isoformat()
351
+
352
+ except Exception as e:
353
+ background_jobs[job_id]["status"] = "failed"
354
+ background_jobs[job_id]["error"] = str(e)
355
+ logger.error(f"Background job {job_id} failed: {e}")
356
+
357
+ if __name__ == "__main__":
358
+ uvicorn.run(
359
+ "main:app",
360
+ host="0.0.0.0",
361
+ port=7860,
362
+ reload=True,
363
+ log_level="info"
364
+ )
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ requests==2.31.0
4
+ beautifulsoup4==4.12.2
5
+ lxml==4.9.3
6
+ urllib3==2.0.7
7
+ pydantic==2.5.0
8
+ python-multipart==0.0.6
9
+ jinja2==3.1.2
10
+ aiofiles==23.2.1