search / app.py
Edmond7's picture
Update app.py
90171b7 verified
raw
history blame
3.73 kB
import os
from fastapi import FastAPI, HTTPException, UploadFile, Depends, Query
from fastapi.security import APIKeyHeader
import asyncio
import json
import tempfile
from typing import List, Dict
import logging
import requests
from requests_random_user_agent import UserAgent
from fp.fp import FreeProxy
from bs4 import BeautifulSoup
app = FastAPI()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API key security
API_KEY_NAME = "X-API-Key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
# Constants
API_KEY = os.environ.get("API_KEY")
TIMEOUT = 0.9 # Set timeout to 0.9 seconds
if not API_KEY:
raise ValueError("Missing required environment variable: API_KEY")
def get_proxy():
return FreeProxy(
timeout=TIMEOUT,
rand=True,
ssl=True # Use only SSL proxies
).get()
def perform_duckduckgo_search(query: str, search_type: str = 'web', num_results: int = 5) -> dict:
base_url = 'https://duckduckgo.com/html/'
params = {
'q': query,
'ia': 'web' if search_type == 'web' else 'images'
}
try:
proxy = get_proxy()
response = requests.get(
base_url,
params=params,
headers={'User-Agent': UserAgent().get_random_user_agent()},
proxies={'https': proxy}, # Use only HTTPS proxy
timeout=TIMEOUT
)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
if search_type == 'web':
results = soup.find_all('div', class_='result__body')[:num_results]
formatted_results = [
{
'title': result.find('h2', class_='result__title').text.strip(),
'body': result.find('a', class_='result__snippet').text.strip(),
'href': result.find('a', class_='result__url')['href']
}
for result in results
]
else: # image search
results = soup.find_all('div', class_='tile--img')[:num_results]
formatted_results = [
{
'title': result.find('span', class_='tile--img__title').text.strip(),
'image_url': result.find('img')['src'],
'thumbnail_url': result.find('img')['src'],
'source_url': result.find('a')['href'],
}
for result in results
]
return {
'success': True,
'results': formatted_results
}
except Exception as e:
logger.error(f"Error performing {'web' if search_type == 'web' else 'image'} search: {e}")
return {
'success': False,
'error': f"Error performing {'web' if search_type == 'web' else 'image'} search: {str(e)}"
}
async def verify_api_key(api_key: str = Depends(api_key_header)):
if api_key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API Key")
return api_key
@app.get("/web-search/")
async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
result = perform_duckduckgo_search(query, 'web', num_results)
if not result['success']:
raise HTTPException(status_code=500, detail=result['error'])
return result
@app.get("/image-search/")
async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
result = perform_duckduckgo_search(query, 'images', num_results)
if not result['success']:
raise HTTPException(status_code=500, detail=result['error'])
return result