Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ import textract
|
|
11 |
import boto3
|
12 |
from botocore.exceptions import NoCredentialsError
|
13 |
from duckduckgo_search import DDGS
|
|
|
14 |
|
15 |
app = FastAPI()
|
16 |
|
@@ -148,31 +149,6 @@ def upload_to_s3(local_file, s3_file):
|
|
148 |
logger.error("Credentials not available")
|
149 |
return None
|
150 |
|
151 |
-
def web_search(query: str, num_results: int = 5) -> dict:
|
152 |
-
try:
|
153 |
-
with DDGS(
|
154 |
-
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
155 |
-
) as ddgs:
|
156 |
-
results = list(ddgs.text(query, max_results=num_results))
|
157 |
-
formatted_results = [
|
158 |
-
{
|
159 |
-
'title': result['title'],
|
160 |
-
'body': result['body'],
|
161 |
-
'href': result['href']
|
162 |
-
}
|
163 |
-
for result in results
|
164 |
-
]
|
165 |
-
return {
|
166 |
-
'success': True,
|
167 |
-
'results': formatted_results
|
168 |
-
}
|
169 |
-
except Exception as e:
|
170 |
-
logger.error(f"Error performing web search: {e}")
|
171 |
-
return {
|
172 |
-
'success': False,
|
173 |
-
'error': f"Error performing web search: {str(e)}"
|
174 |
-
}
|
175 |
-
|
176 |
def image_search(query: str, num_results: int = 5) -> dict:
|
177 |
try:
|
178 |
with DDGS(
|
@@ -239,16 +215,64 @@ async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key))
|
|
239 |
raise HTTPException(status_code=500, detail=result['error'])
|
240 |
return {"extracted_text": result['extracted_text']}
|
241 |
|
242 |
-
@app.get("/
|
243 |
-
async def
|
244 |
-
result =
|
245 |
if not result['success']:
|
246 |
raise HTTPException(status_code=500, detail=result['error'])
|
247 |
return result
|
248 |
|
249 |
-
|
250 |
-
async def
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
if not result['success']:
|
253 |
raise HTTPException(status_code=500, detail=result['error'])
|
254 |
-
return result
|
|
|
11 |
import boto3
|
12 |
from botocore.exceptions import NoCredentialsError
|
13 |
from duckduckgo_search import DDGS
|
14 |
+
from bs4 import BeautifulSoup
|
15 |
|
16 |
app = FastAPI()
|
17 |
|
|
|
149 |
logger.error("Credentials not available")
|
150 |
return None
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
def image_search(query: str, num_results: int = 5) -> dict:
|
153 |
try:
|
154 |
with DDGS(
|
|
|
215 |
raise HTTPException(status_code=500, detail=result['error'])
|
216 |
return {"extracted_text": result['extracted_text']}
|
217 |
|
218 |
+
@app.get("/image-search/")
|
219 |
+
async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
|
220 |
+
result = image_search(query, num_results)
|
221 |
if not result['success']:
|
222 |
raise HTTPException(status_code=500, detail=result['error'])
|
223 |
return result
|
224 |
|
225 |
+
class DuckDuckGoSearch:
|
226 |
+
async def search(self, query: str, num_results: int = 5) -> list:
|
227 |
+
url = f"https://html.duckduckgo.com/html/?q={query}"
|
228 |
+
headers = {
|
229 |
+
"User-Agent": "Mozilla/5.0",
|
230 |
+
"Referer": "https://google.com/",
|
231 |
+
"Cookie": "kl=wt-wt",
|
232 |
+
}
|
233 |
+
|
234 |
+
async with aiohttp.ClientSession() as session:
|
235 |
+
async with session.get(url, headers=headers) as response:
|
236 |
+
if response.status != 200:
|
237 |
+
raise Exception("Failed to fetch data from DuckDuckGo")
|
238 |
+
|
239 |
+
html = await response.text()
|
240 |
+
soup = BeautifulSoup(html, "html.parser")
|
241 |
+
results = []
|
242 |
+
|
243 |
+
for result in soup.select(".result"):
|
244 |
+
title = result.select_one(".result__title .result__a")
|
245 |
+
url = result.select_one(".result__url")
|
246 |
+
desc = result.select_one(".result__snippet")
|
247 |
+
|
248 |
+
if title and url and desc:
|
249 |
+
results.append({
|
250 |
+
"title": title.get_text(strip=True),
|
251 |
+
"body": desc.get_text(strip=True),
|
252 |
+
"href": f"https://{url.get_text(strip=True)}",
|
253 |
+
})
|
254 |
+
|
255 |
+
if len(results) >= num_results:
|
256 |
+
break
|
257 |
+
|
258 |
+
return results
|
259 |
+
|
260 |
+
async def web_search(query: str, num_results: int = 5) -> dict:
|
261 |
+
try:
|
262 |
+
results = await DuckDuckGoSearch().search(query, num_results)
|
263 |
+
return {
|
264 |
+
'success': True,
|
265 |
+
'results': results
|
266 |
+
}
|
267 |
+
except Exception as e:
|
268 |
+
return {
|
269 |
+
'success': False,
|
270 |
+
'error': str(e)
|
271 |
+
}
|
272 |
+
|
273 |
+
@app.get("/web-search/")
|
274 |
+
async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
|
275 |
+
result = await web_search(query, num_results)
|
276 |
if not result['success']:
|
277 |
raise HTTPException(status_code=500, detail=result['error'])
|
278 |
+
return result
|