import requests from bs4 import BeautifulSoup from fastapi import FastAPI#, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel import re import replicate import os import json from supabase import create_client, Client url: str = os.environ.get("DB_URL") key: str = os.environ.get("DB_KEY") supabase: Client = create_client(url, key) class Item(BaseModel): url: str max_tokens: int app = FastAPI() def extract_article_content(url): try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) return ARTICLE except Exception as e: return "" @app.get("/") async def root(): return {"status": "OK"} @app.post("/summarize-v1") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'summary': ""} event_list = [] for event in replicate.stream("snowflake/snowflake-arctic-instruct", input= { "prompt": "summarize this following news article:" + article, "temperature": 0.2}): # Convert the event to a string and append it to the list event_list.append(str(event)) # After the event stream ends, process the collected events output_variable = "".join(event_list) return output_variable except requests.RequestException as e: return {"error": str(e), "status_code": 500} # @app.post("/summarize-v2") # async def root(item: Item): # try: # article = extract_article_content(item.url) # if len(article) == 0: # return {'summary': ""} # def event_generator(): # for event in replicate.stream("snowflake/snowflake-arctic-instruct", input={ # "prompt": f"summarize this news article in {item.max_tokens} lines:" + article, # "temperature": 0.2, # "max_new_tokens" : 1000 # }): # # Yield the event as a string # yield str(event) # #print(str(event), end="") # # Use StreamingResponse to stream the events # return StreamingResponse(event_generator(), media_type='text/event-stream') # except requests.RequestException as e: # return {"error": str(e), "status_code": 500} @app.post("/summarize-v2") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'summary': ""} api_url = 'https://yashxx07-hf-llm-api.hf.space/api/v1/chat/completions' headers = { "content-type": "application/json" } data = { "model": "mixtral-8x7b", "messages": [ { "role": "user", "content": f"summarize this news article in {item.max_tokens} lines:" + article } ], "temperature": 0.5, "top_p": 0.95, "max_tokens": -1, "use_cache": False, "stream": True } def get_stream(url): s = requests.Session() with s.post(url, headers=headers, stream=True, json=data ) as resp: for line in resp.iter_lines(): if line: obj = json.loads(line[5:]) try: ouput = obj["choices"][0]["delta"]["content"] yield str(ouput) except: pass # Use StreamingResponse to stream the events return StreamingResponse(get_stream(api_url), media_type='text/event-stream') except requests.RequestException as e: return {"error": str(e), "status_code": 500} @app.post("/extract-content") async def root(item: Item): try: article = extract_article_content(item.url) if len(article) == 0: return {'ERROR': "AHHHHHHHHH"} return {"content":article} except requests.RequestException as e: return {"error": str(e), "status_code": 500} async def insert_image(url , id): data, count = await supabase.table('news').update({'image_url': url}).eq('id', id).execute() @app.get("/extract-images") async def root(site: str = 'abcnews.go'): try: ii_list = [] response = supabase.table('news').select("*").eq('source', f'www.{site}.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() for i in range(len(response.data)): url = response.data[i]['article_url'] try: res = requests.get(url) soup = BeautifulSoup(res.text, 'html.parser') results = soup.find_all(['img']) url = results[1]['src'] ii_list.append(url) await insert_image(url, response.data[i]['id'] ) except Exception as e: print(e) results = None return {"RESULTS": ii_list} except requests.RequestException as e: return {"error": str(e), "status_code": 500} @app.get("/extract-images-livemint") async def root(site: str = 'livemint'): try: ii_list = [] response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() for i in range(len(response.data)): url = response.data[i]['article_url'] try: res = requests.get(url) soup = BeautifulSoup(res.text, 'html.parser') results = soup.find_all(['img']) url = results[18]['src'] ii_list.append(url) await insert_image(url, response.data[i]['id'] ) except Exception as e: print(e) results = None return {"RESULTS": ii_list} except requests.RequestException as e: return {"error": str(e), "status_code": 500}