Spaces:
Running
Running
File size: 6,334 Bytes
72e6d3f bad2aa2 f59d018 72e6d3f 7e60c15 5d8f070 74ed26d f8ee914 bad2aa2 72e6d3f 43e8e4e 09c5387 72e6d3f 43e8e4e 72e6d3f 0a66a84 72e6d3f bad2aa2 72e6d3f 7d8d250 72e6d3f 43e8e4e 085c288 4526ad3 085c288 e5868d9 051a524 e5868d9 085c288 e5868d9 4526ad3 085c288 f59d018 4526ad3 72e6d3f 74ed26d f59d018 72e6d3f f59d018 72e6d3f 74ed26d af49d90 74ed26d e340a3c 74ed26d f8eba03 74ed26d ca69c77 f59d018 af49d90 ca69c77 f59d018 7385806 ca69c77 78e320e ca69c77 f8ee914 97b207f f8ee914 2bdc593 9528425 f8ee914 2bdc593 f8ee914 63f5def f8ee914 2bdc593 f8ee914 2bdc593 f8ee914 980e4d4 63f5def 980e4d4 72e6d3f |
|
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI#, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import re
import replicate
import os
import json
from supabase import create_client, Client
url: str = os.environ.get("DB_URL")
key: str = os.environ.get("DB_KEY")
supabase: Client = create_client(url, key)
class Item(BaseModel):
url: str
max_tokens: int
app = FastAPI()
def extract_article_content(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)
return ARTICLE
except Exception as e:
return ""
@app.get("/")
async def root():
return {"status": "OK"}
@app.post("/summarize-v1")
async def root(item: Item):
try:
article = extract_article_content(item.url)
if len(article) == 0:
return {'summary': ""}
event_list = []
for event in replicate.stream("snowflake/snowflake-arctic-instruct", input= {
"prompt": "summarize this following news article:" + article,
"temperature": 0.2}):
# Convert the event to a string and append it to the list
event_list.append(str(event))
# After the event stream ends, process the collected events
output_variable = "".join(event_list)
return output_variable
except requests.RequestException as e:
return {"error": str(e), "status_code": 500}
# @app.post("/summarize-v2")
# async def root(item: Item):
# try:
# article = extract_article_content(item.url)
# if len(article) == 0:
# return {'summary': ""}
# def event_generator():
# for event in replicate.stream("snowflake/snowflake-arctic-instruct", input={
# "prompt": f"summarize this news article in {item.max_tokens} lines:" + article,
# "temperature": 0.2,
# "max_new_tokens" : 1000
# }):
# # Yield the event as a string
# yield str(event)
# #print(str(event), end="")
# # Use StreamingResponse to stream the events
# return StreamingResponse(event_generator(), media_type='text/event-stream')
# except requests.RequestException as e:
# return {"error": str(e), "status_code": 500}
@app.post("/summarize-v2")
async def root(item: Item):
try:
article = extract_article_content(item.url)
if len(article) == 0:
return {'summary': ""}
api_url = 'https://yashxx07-hf-llm-api.hf.space/api/v1/chat/completions'
headers = { "content-type": "application/json" }
data = {
"model": "mixtral-8x7b",
"messages": [
{
"role": "user",
"content": f"summarize this news article in {item.max_tokens} lines:" + article
}
],
"temperature": 0.5,
"top_p": 0.95,
"max_tokens": -1,
"use_cache": False,
"stream": True
}
def get_stream(url):
s = requests.Session()
with s.post(url, headers=headers, stream=True, json=data ) as resp:
for line in resp.iter_lines():
if line:
obj = json.loads(line[5:])
try:
ouput = obj["choices"][0]["delta"]["content"]
yield str(ouput)
except:
pass
# Use StreamingResponse to stream the events
return StreamingResponse(get_stream(api_url), media_type='text/event-stream')
except requests.RequestException as e:
return {"error": str(e), "status_code": 500}
@app.post("/extract-content")
async def root(item: Item):
try:
article = extract_article_content(item.url)
if len(article) == 0:
return {'ERROR': "AHHHHHHHHH"}
return {"content":article}
except requests.RequestException as e:
return {"error": str(e), "status_code": 500}
async def insert_image(url , id):
data, count = await supabase.table('news').update({'image_url': url}).eq('id', id).execute()
@app.get("/extract-images")
async def root(site: str = 'abcnews.go'):
try:
ii_list = []
response = supabase.table('news').select("*").eq('source', f'www.{site}.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute()
for i in range(len(response.data)):
url = response.data[i]['article_url']
try:
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
results = soup.find_all(['img'])
url = results[1]['src']
ii_list.append(url)
await insert_image(url, response.data[i]['id'] )
except Exception as e:
print(e)
results = None
return {"RESULTS": ii_list}
except requests.RequestException as e:
return {"error": str(e), "status_code": 500}
@app.get("/extract-images-livemint")
async def root(site: str = 'livemint'):
try:
ii_list = []
response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute()
for i in range(len(response.data)):
url = response.data[i]['article_url']
try:
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
results = soup.find_all(['img'])
url = results[18]['src']
ii_list.append(url)
await insert_image(url, response.data[i]['id'] )
except Exception as e:
print(e)
results = None
return {"RESULTS": ii_list}
except requests.RequestException as e:
return {"error": str(e), "status_code": 500}
|