Spaces:

Edmond98
/

search

Sleeping

App Files Files Community

Edmond7 commited on Oct 15, 2024

Commit

01758fd

verified ·

1 Parent(s): 630f1ad

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -32

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import textract
 import boto3
 from botocore.exceptions import NoCredentialsError
 from duckduckgo_search import DDGS
 app = FastAPI()
@@ -148,31 +149,6 @@ def upload_to_s3(local_file, s3_file):
         logger.error("Credentials not available")
         return None
-def web_search(query: str, num_results: int = 5) -> dict:
-    try:
-        with DDGS(
-            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
-        ) as ddgs:
-            results = list(ddgs.text(query, max_results=num_results))
-            formatted_results = [
-                {
-                    'title': result['title'],
-                    'body': result['body'],
-                    'href': result['href']
-                }
-                for result in results
-            ]
-        return {
-            'success': True,
-            'results': formatted_results
-        }
-    except Exception as e:
-        logger.error(f"Error performing web search: {e}")
-        return {
-            'success': False,
-            'error': f"Error performing web search: {str(e)}"
-        }
 def image_search(query: str, num_results: int = 5) -> dict:
     try:
         with DDGS(
@@ -239,16 +215,64 @@ async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key))
         raise HTTPException(status_code=500, detail=result['error'])
     return {"extracted_text": result['extracted_text']}
-@app.get("/web-search/")
-async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
-    result = web_search(query, num_results)
     if not result['success']:
         raise HTTPException(status_code=500, detail=result['error'])
     return result
-@app.get("/image-search/")
-async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
-    result = image_search(query, num_results)
     if not result['success']:
         raise HTTPException(status_code=500, detail=result['error'])
-    return result

 import boto3
 from botocore.exceptions import NoCredentialsError
 from duckduckgo_search import DDGS
+from bs4 import BeautifulSoup
 app = FastAPI()
         logger.error("Credentials not available")
         return None
 def image_search(query: str, num_results: int = 5) -> dict:
     try:
         with DDGS(
         raise HTTPException(status_code=500, detail=result['error'])
     return {"extracted_text": result['extracted_text']}
+@app.get("/image-search/")
+async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
+    result = image_search(query, num_results)
     if not result['success']:
         raise HTTPException(status_code=500, detail=result['error'])
     return result
+class DuckDuckGoSearch:
+    async def search(self, query: str, num_results: int = 5) -> list:
+        url = f"https://html.duckduckgo.com/html/?q={query}"
+        headers = {
+            "User-Agent": "Mozilla/5.0",
+            "Referer": "https://google.com/",
+            "Cookie": "kl=wt-wt",
+        }
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers) as response:
+                if response.status != 200:
+                    raise Exception("Failed to fetch data from DuckDuckGo")
+                html = await response.text()
+                soup = BeautifulSoup(html, "html.parser")
+                results = []
+                for result in soup.select(".result"):
+                    title = result.select_one(".result__title .result__a")
+                    url = result.select_one(".result__url")
+                    desc = result.select_one(".result__snippet")
+                    if title and url and desc:
+                        results.append({
+                            "title": title.get_text(strip=True),
+                            "body": desc.get_text(strip=True),
+                            "href": f"https://{url.get_text(strip=True)}",
+                        })
+                    if len(results) >= num_results:
+                        break
+                return results
+async def web_search(query: str, num_results: int = 5) -> dict:
+    try:
+        results = await DuckDuckGoSearch().search(query, num_results)
+        return {
+            'success': True,
+            'results': results
+        }
+    except Exception as e:
+        return {
+            'success': False,
+            'error': str(e)
+        }
+@app.get("/web-search/")
+async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
+    result = await web_search(query, num_results)
     if not result['success']:
         raise HTTPException(status_code=500, detail=result['error'])
+    return result