Edmond7 commited on
Commit
01758fd
·
verified ·
1 Parent(s): 630f1ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -32
app.py CHANGED
@@ -11,6 +11,7 @@ import textract
11
  import boto3
12
  from botocore.exceptions import NoCredentialsError
13
  from duckduckgo_search import DDGS
 
14
 
15
  app = FastAPI()
16
 
@@ -148,31 +149,6 @@ def upload_to_s3(local_file, s3_file):
148
  logger.error("Credentials not available")
149
  return None
150
 
151
- def web_search(query: str, num_results: int = 5) -> dict:
152
- try:
153
- with DDGS(
154
- headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
155
- ) as ddgs:
156
- results = list(ddgs.text(query, max_results=num_results))
157
- formatted_results = [
158
- {
159
- 'title': result['title'],
160
- 'body': result['body'],
161
- 'href': result['href']
162
- }
163
- for result in results
164
- ]
165
- return {
166
- 'success': True,
167
- 'results': formatted_results
168
- }
169
- except Exception as e:
170
- logger.error(f"Error performing web search: {e}")
171
- return {
172
- 'success': False,
173
- 'error': f"Error performing web search: {str(e)}"
174
- }
175
-
176
  def image_search(query: str, num_results: int = 5) -> dict:
177
  try:
178
  with DDGS(
@@ -239,16 +215,64 @@ async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key))
239
  raise HTTPException(status_code=500, detail=result['error'])
240
  return {"extracted_text": result['extracted_text']}
241
 
242
- @app.get("/web-search/")
243
- async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
244
- result = web_search(query, num_results)
245
  if not result['success']:
246
  raise HTTPException(status_code=500, detail=result['error'])
247
  return result
248
 
249
- @app.get("/image-search/")
250
- async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
251
- result = image_search(query, num_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  if not result['success']:
253
  raise HTTPException(status_code=500, detail=result['error'])
254
- return result
 
11
  import boto3
12
  from botocore.exceptions import NoCredentialsError
13
  from duckduckgo_search import DDGS
14
+ from bs4 import BeautifulSoup
15
 
16
  app = FastAPI()
17
 
 
149
  logger.error("Credentials not available")
150
  return None
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  def image_search(query: str, num_results: int = 5) -> dict:
153
  try:
154
  with DDGS(
 
215
  raise HTTPException(status_code=500, detail=result['error'])
216
  return {"extracted_text": result['extracted_text']}
217
 
218
+ @app.get("/image-search/")
219
+ async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
220
+ result = image_search(query, num_results)
221
  if not result['success']:
222
  raise HTTPException(status_code=500, detail=result['error'])
223
  return result
224
 
225
+ class DuckDuckGoSearch:
226
+ async def search(self, query: str, num_results: int = 5) -> list:
227
+ url = f"https://html.duckduckgo.com/html/?q={query}"
228
+ headers = {
229
+ "User-Agent": "Mozilla/5.0",
230
+ "Referer": "https://google.com/",
231
+ "Cookie": "kl=wt-wt",
232
+ }
233
+
234
+ async with aiohttp.ClientSession() as session:
235
+ async with session.get(url, headers=headers) as response:
236
+ if response.status != 200:
237
+ raise Exception("Failed to fetch data from DuckDuckGo")
238
+
239
+ html = await response.text()
240
+ soup = BeautifulSoup(html, "html.parser")
241
+ results = []
242
+
243
+ for result in soup.select(".result"):
244
+ title = result.select_one(".result__title .result__a")
245
+ url = result.select_one(".result__url")
246
+ desc = result.select_one(".result__snippet")
247
+
248
+ if title and url and desc:
249
+ results.append({
250
+ "title": title.get_text(strip=True),
251
+ "body": desc.get_text(strip=True),
252
+ "href": f"https://{url.get_text(strip=True)}",
253
+ })
254
+
255
+ if len(results) >= num_results:
256
+ break
257
+
258
+ return results
259
+
260
+ async def web_search(query: str, num_results: int = 5) -> dict:
261
+ try:
262
+ results = await DuckDuckGoSearch().search(query, num_results)
263
+ return {
264
+ 'success': True,
265
+ 'results': results
266
+ }
267
+ except Exception as e:
268
+ return {
269
+ 'success': False,
270
+ 'error': str(e)
271
+ }
272
+
273
+ @app.get("/web-search/")
274
+ async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
275
+ result = await web_search(query, num_results)
276
  if not result['success']:
277
  raise HTTPException(status_code=500, detail=result['error'])
278
+ return result