|
import os |
|
from fastapi import FastAPI, HTTPException, UploadFile, Depends, Query |
|
from fastapi.security import APIKeyHeader |
|
import asyncio |
|
import json |
|
import tempfile |
|
from typing import List, Dict |
|
import logging |
|
import requests |
|
from requests_random_user_agent import UserAgent |
|
from fp.fp import FreeProxy |
|
from bs4 import BeautifulSoup |
|
|
|
app = FastAPI() |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
API_KEY_NAME = "X-API-Key" |
|
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) |
|
|
|
|
|
API_KEY = os.environ.get("API_KEY") |
|
TIMEOUT = 0.9 |
|
|
|
if not API_KEY: |
|
raise ValueError("Missing required environment variable: API_KEY") |
|
|
|
def get_proxy(): |
|
return FreeProxy( |
|
timeout=TIMEOUT, |
|
rand=True, |
|
ssl=True |
|
).get() |
|
|
|
def perform_duckduckgo_search(query: str, search_type: str = 'web', num_results: int = 5) -> dict: |
|
base_url = 'https://duckduckgo.com/html/' |
|
params = { |
|
'q': query, |
|
'ia': 'web' if search_type == 'web' else 'images' |
|
} |
|
|
|
try: |
|
proxy = get_proxy() |
|
response = requests.get( |
|
base_url, |
|
params=params, |
|
headers={'User-Agent': UserAgent().get_random_user_agent()}, |
|
proxies={'https': proxy}, |
|
timeout=TIMEOUT |
|
) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
if search_type == 'web': |
|
results = soup.find_all('div', class_='result__body')[:num_results] |
|
formatted_results = [ |
|
{ |
|
'title': result.find('h2', class_='result__title').text.strip(), |
|
'body': result.find('a', class_='result__snippet').text.strip(), |
|
'href': result.find('a', class_='result__url')['href'] |
|
} |
|
for result in results |
|
] |
|
else: |
|
results = soup.find_all('div', class_='tile--img')[:num_results] |
|
formatted_results = [ |
|
{ |
|
'title': result.find('span', class_='tile--img__title').text.strip(), |
|
'image_url': result.find('img')['src'], |
|
'thumbnail_url': result.find('img')['src'], |
|
'source_url': result.find('a')['href'], |
|
} |
|
for result in results |
|
] |
|
|
|
return { |
|
'success': True, |
|
'results': formatted_results |
|
} |
|
except Exception as e: |
|
logger.error(f"Error performing {'web' if search_type == 'web' else 'image'} search: {e}") |
|
return { |
|
'success': False, |
|
'error': f"Error performing {'web' if search_type == 'web' else 'image'} search: {str(e)}" |
|
} |
|
|
|
async def verify_api_key(api_key: str = Depends(api_key_header)): |
|
if api_key != API_KEY: |
|
raise HTTPException(status_code=401, detail="Invalid API Key") |
|
return api_key |
|
|
|
@app.get("/web-search/") |
|
async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)): |
|
result = perform_duckduckgo_search(query, 'web', num_results) |
|
if not result['success']: |
|
raise HTTPException(status_code=500, detail=result['error']) |
|
return result |
|
|
|
@app.get("/image-search/") |
|
async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)): |
|
result = perform_duckduckgo_search(query, 'images', num_results) |
|
if not result['success']: |
|
raise HTTPException(status_code=500, detail=result['error']) |
|
return result |