Spaces:

Rogerjs
/

NeuroLitExplorer

Sleeping

App Files Files Community

NeuroLitExplorer / app.py

Rogerjs

Create app.py

2912f11 verified 3 months ago

raw

history blame

4.37 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import re

	# ---------------------
	# Disclaimer
	# ---------------------
	DISCLAIMER = """
	Disclaimer:
	This application is provided for research and educational purposes only.
	All summaries are generated using an automated language model and may contain inaccuracies or omissions.
	It is not intended to replace professional judgment, peer-reviewed references, or expert consultation.
	The authors and developers assume no legal liability for any misuse, misinterpretation, or unintended consequences
	arising from the use of this tool. Please use responsibly and cross-check results with credible sources.
	"""

	# ---------------------
	# Model Setup
	# ---------------------
	MODEL_NAME = "allenai/scibert_scivocab_cased" # Example model for tokenization/embedding
	SUMMARIZATION_MODEL = "allenai/led-base-16384" # Example summarization model with a large context window

	# Load summarization model and tokenizer
	summarizer_tokenizer = AutoTokenizer.from_pretrained(SUMMARIZATION_MODEL)
	summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZATION_MODEL)

	# ---------------------
	# Utility Functions
	# ---------------------
	def extract_text_from_pdf(pdf_file):
	# This function extracts text from a PDF file. Requires PyPDF2 or similar library.
	# For Hugging Face Spaces, PyPDF2 often works.
	try:
	import PyPDF2
	reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text
	except Exception as e:
	return f"Error reading PDF: {e}"

	def clean_text(text):
	# Basic cleaning function
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def summarize_text(text):
	# Summarize the given text
	inputs = summarizer_tokenizer(text, return_tensors="pt", truncation=True, max_length=16384)
	with torch.no_grad():
	summary_ids = summarizer_model.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	num_beams=4,
	length_penalty=2.0,
	max_length=512,
	early_stopping=True
	)
	summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return summary

	def analyze_text(text):
	# In a more elaborate system, you might:
	# 1. Extract main findings using IE or NER.
	# 2. Identify methods mentioned.
	# 3. Extract references (regex patterns for citations).
	# Here we just do a simple summarization.
	text_clean = clean_text(text)
	if len(text_clean) < 50:
	return "Please provide a longer text snippet or PDF."

	summary = summarize_text(text_clean)

	# Dummy logic for key methods and references (in a real app, use NLP-based extraction)
	methods = "Key methods extraction is not yet implemented."
	references = "Reference extraction is not yet implemented."
	return summary, methods, references

	def process_input(pdf_file, text_snippet):
	# If PDF is provided, extract text from PDF
	input_text = ""
	if pdf_file is not None:
	input_text = extract_text_from_pdf(pdf_file)

	# If a text snippet is provided, append it.
	if text_snippet is not None and text_snippet.strip():
	input_text = input_text + " " + text_snippet.strip()

	if not input_text.strip():
	return "No input provided.", "", ""

	summary, methods, references = analyze_text(input_text)
	return summary, methods, references

	# ---------------------
	# Gradio Interface
	# ---------------------
	with gr.Blocks() as demo:
	gr.Markdown("# NeuroLit Explorer")
	gr.Markdown(DISCLAIMER)
	gr.Markdown("Instructions: Upload a PDF or paste a text snippet from a neuroscience article, then click 'Summarize'.")
	with gr.Row():
	pdf_input = gr.File(label="Upload PDF")
	text_input = gr.Textbox(label="Or Paste Article Text")
	summarize_button = gr.Button("Summarize")

	summary_output = gr.Textbox(label="Summary")
	methods_output = gr.Textbox(label="Key Methods")
	references_output = gr.Textbox(label="Relevant References")

	summarize_button.click(fn=process_input, inputs=[pdf_input, text_input], outputs=[summary_output, methods_output, references_output])

	demo.launch()