Spaces:
Runtime error
Runtime error
initial commit: research summarizer with FLAN-T5
Browse files
app.py
CHANGED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 5 |
+
|
| 6 |
+
# Lightweight model for instruction-tuned summarization
|
| 7 |
+
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
|
| 8 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
|
| 9 |
+
|
| 10 |
+
llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
| 11 |
+
|
| 12 |
+
def extract_arxiv_abstract(url):
|
| 13 |
+
try:
|
| 14 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 15 |
+
response = requests.get(url, headers=headers)
|
| 16 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 17 |
+
abstract = soup.find("blockquote", class_="abstract")
|
| 18 |
+
if abstract:
|
| 19 |
+
return abstract.get_text(strip=True).replace("Abstract:", "")
|
| 20 |
+
else:
|
| 21 |
+
return "Abstract not found on this page. Please check the URL."
|
| 22 |
+
except Exception as e:
|
| 23 |
+
return f"Failed to fetch abstract: {str(e)}"
|
| 24 |
+
|
| 25 |
+
def summarize_research_paper(arxiv_url):
|
| 26 |
+
abstract_text = extract_arxiv_abstract(arxiv_url)
|
| 27 |
+
if abstract_text.startswith("Failed"):
|
| 28 |
+
return abstract_text
|
| 29 |
+
|
| 30 |
+
prompt = f"summarize: {abstract_text}"
|
| 31 |
+
summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
|
| 32 |
+
return f"📄 Original Abstract:\n{abstract_text}\n\n🧠 Summary:\n{summary}"
|
| 33 |
+
|
| 34 |
+
gr.Interface(
|
| 35 |
+
fn=summarize_research_paper,
|
| 36 |
+
inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"),
|
| 37 |
+
outputs=gr.Textbox(label="Summary", lines=15),
|
| 38 |
+
title="🧠 Research Paper Summarizer",
|
| 39 |
+
description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces."
|
| 40 |
+
).launch()
|