Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
from fastapi import FastAPI#, Request | |
from fastapi.responses import StreamingResponse | |
from pydantic import BaseModel | |
import re | |
import replicate | |
import os | |
import json | |
from supabase import create_client, Client | |
url: str = os.environ.get("DB_URL") | |
key: str = os.environ.get("DB_KEY") | |
supabase: Client = create_client(url, key) | |
class Item(BaseModel): | |
url: str | |
max_tokens: int | |
app = FastAPI() | |
def extract_article_content(url): | |
try: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
results = soup.find_all(['h1', 'p']) | |
text = [result.text for result in results] | |
ARTICLE = ' '.join(text) | |
return ARTICLE | |
except Exception as e: | |
return "" | |
async def root(): | |
return {"status": "OK"} | |
async def root(item: Item): | |
try: | |
article = extract_article_content(item.url) | |
if len(article) == 0: | |
return {'summary': ""} | |
event_list = [] | |
for event in replicate.stream("snowflake/snowflake-arctic-instruct", input= { | |
"prompt": "summarize this following news article:" + article, | |
"temperature": 0.2}): | |
# Convert the event to a string and append it to the list | |
event_list.append(str(event)) | |
# After the event stream ends, process the collected events | |
output_variable = "".join(event_list) | |
return output_variable | |
except requests.RequestException as e: | |
return {"error": str(e), "status_code": 500} | |
# @app.post("/summarize-v2") | |
# async def root(item: Item): | |
# try: | |
# article = extract_article_content(item.url) | |
# if len(article) == 0: | |
# return {'summary': ""} | |
# def event_generator(): | |
# for event in replicate.stream("snowflake/snowflake-arctic-instruct", input={ | |
# "prompt": f"summarize this news article in {item.max_tokens} lines:" + article, | |
# "temperature": 0.2, | |
# "max_new_tokens" : 1000 | |
# }): | |
# # Yield the event as a string | |
# yield str(event) | |
# #print(str(event), end="") | |
# # Use StreamingResponse to stream the events | |
# return StreamingResponse(event_generator(), media_type='text/event-stream') | |
# except requests.RequestException as e: | |
# return {"error": str(e), "status_code": 500} | |
async def root(item: Item): | |
try: | |
article = extract_article_content(item.url) | |
if len(article) == 0: | |
return {'summary': ""} | |
api_url = 'https://yashxx07-hf-llm-api.hf.space/api/v1/chat/completions' | |
headers = { "content-type": "application/json" } | |
data = { | |
"model": "mixtral-8x7b", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": f"summarize this news article in {item.max_tokens} lines:" + article | |
} | |
], | |
"temperature": 0.5, | |
"top_p": 0.95, | |
"max_tokens": -1, | |
"use_cache": False, | |
"stream": True | |
} | |
def get_stream(url): | |
s = requests.Session() | |
with s.post(url, headers=headers, stream=True, json=data ) as resp: | |
for line in resp.iter_lines(): | |
if line: | |
obj = json.loads(line[5:]) | |
try: | |
ouput = obj["choices"][0]["delta"]["content"] | |
yield str(ouput) | |
except: | |
pass | |
# Use StreamingResponse to stream the events | |
return StreamingResponse(get_stream(api_url), media_type='text/event-stream') | |
except requests.RequestException as e: | |
return {"error": str(e), "status_code": 500} | |
async def root(item: Item): | |
try: | |
article = extract_article_content(item.url) | |
if len(article) == 0: | |
return {'ERROR': "AHHHHHHHHH"} | |
return {"content":article} | |
except requests.RequestException as e: | |
return {"error": str(e), "status_code": 500} | |
async def insert_image(url , id): | |
data, count = await supabase.table('news').update({'image_url': url}).eq('id', id).execute() | |
async def root(site: str = 'abcnews.go'): | |
try: | |
ii_list = [] | |
response = supabase.table('news').select("*").eq('source', f'www.{site}.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() | |
for i in range(len(response.data)): | |
url = response.data[i]['article_url'] | |
try: | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text, 'html.parser') | |
results = soup.find_all(['img']) | |
url = results[1]['src'] | |
ii_list.append(url) | |
await insert_image(url, response.data[i]['id'] ) | |
except Exception as e: | |
print(e) | |
results = None | |
return {"RESULTS": ii_list} | |
except requests.RequestException as e: | |
return {"error": str(e), "status_code": 500} | |
async def root(site: str = 'livemint'): | |
try: | |
ii_list = [] | |
response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() | |
for i in range(len(response.data)): | |
url = response.data[i]['article_url'] | |
try: | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text, 'html.parser') | |
results = soup.find_all(['img']) | |
url = results[18]['src'] | |
ii_list.append(url) | |
await insert_image(url, response.data[i]['id'] ) | |
except Exception as e: | |
print(e) | |
results = None | |
return {"RESULTS": ii_list} | |
except requests.RequestException as e: | |
return {"error": str(e), "status_code": 500} | |