import gradio as gr
import requests
import pandas as pd
import io
from docx import Document
import tempfile
import os
os.system("pip install python-docx")
API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"
global_df = None # Global variable to store search results for export
def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
"""
Fetches PubMed articles and applies sorting and filtering.
"""
try:
url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
response = requests.get(url)
if response.status_code != 200:
return f"⚠️ API Error: {response.status_code} - {response.text}", None
articles = response.json()
if not articles:
return "No articles found for this query.", None
for article in articles:
try:
article["Year"] = int(article["Year"])
except:
article["Year"] = 0
# Apply journal filtering
if filter_journal and filter_journal != "All":
articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]
# Apply year filtering
if min_year:
articles = [a for a in articles if a["Year"] >= int(min_year)]
if max_year:
articles = [a for a in articles if a["Year"] <= int(max_year)]
# Apply sorting
if sort_by == "Year":
articles.sort(key=lambda x: x["Year"], reverse=True)
elif sort_by == "Title":
articles.sort(key=lambda x: x["Title"])
elif sort_by == "Journal":
articles.sort(key=lambda x: x["Journal"])
# Format results
formatted_results = []
for article in articles:
formatted_results.append(
f"## 📰 {article['Title']}\n"
f"📖 **{article['Journal']}** ({article['Year']})\n"
f"👨🔬 **{article['Authors']}**\n"
f"🔗 [Read on PubMed]({article['PubMed_URL']})\n\n"
f"📄 **Show Abstract**
\n{article['Abstract']}\n "
f"\n---\n"
)
df = pd.DataFrame(articles)
return "\n\n".join(formatted_results), df
except Exception as e:
return f"⚠️ Error fetching data: {str(e)}", None
def export_results(df, format_type):
"""
Exports search results as a CSV or DOCX file.
- Returns the file path instead of BytesIO to avoid TypeError in Gradio.
"""
if df is None or df.empty:
return None
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
temp_file_path = temp_file.name # Store the temporary file path
if format_type == "CSV":
df.to_csv(temp_file_path, index=False)
elif format_type == "DOCX":
doc = Document()
doc.add_heading("PubMed Search Results", level=1)
for _, row in df.iterrows():
doc.add_heading(row["Title"], level=2)
doc.add_paragraph(f"📖 Journal: {row['Journal']} ({row['Year']})")
doc.add_paragraph(f"👨🔬 Authors: {row['Authors']}")
doc.add_paragraph(f"🔗 Link: {row['PubMed_URL']}")
doc.add_paragraph(f"📄 Abstract: {row['Abstract']}")
doc.add_paragraph("---")
doc.save(temp_file_path)
temp_file.close() # Close the file before returning the path
return temp_file_path # Return file path instead of BytesIO
with gr.Blocks() as app:
gr.Markdown("""
# 🔍 **PubMed Search Tool with Advanced Features**
## 📖 **How to Use This App**
1️⃣ **Enter a Search Query** *(e.g., "Deep Learning in Psychiatry")*
2️⃣ **Set the Number of Results & Page Number** *(Default: 10 results per page)*
3️⃣ **Choose Sorting Option** *(Year, Title, or Journal - Default: Year)*
4️⃣ **(Optional) Filter by Journal Name** *(e.g., "Nature", "JAMA")*
5️⃣ **(Optional) Filter by Year Range** *(Set min & max year, e.g., 2015 - 2023)*
6️⃣ **Click "🔍 Search" to fetch results**
7️⃣ **Click "📂 Export as CSV" or "📄 Export as Word DOCX" to save articles**
8️⃣ **Click "📄 Show Abstract" under each result to expand full abstract**
## ⚠️ **Important Notes**
- **Sorting & Filtering can be combined** *(e.g., show only "Nature" articles from 2020-2024, sorted by Title)*
""")
with gr.Row():
query_input = gr.Textbox(label="🔎 Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')", lines=1)
with gr.Row():
max_results_input = gr.Slider(1, 50, value=10, step=1, label="📄 Number of Results per Page")
page_input = gr.Slider(1, 200, value=1, step=1, label="📄 Page Number")
with gr.Row():
sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="🔄 Sort By")
journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")
with gr.Row():
min_year_input = gr.Number(label="📅 Min Year", value=None)
max_year_input = gr.Number(label="📅 Max Year", value=None)
with gr.Row():
search_button = gr.Button("🔍 Search")
export_csv_button = gr.Button("📂 Export as CSV")
export_docx_button = gr.Button("📄 Export as Word DOCX")
results_output = gr.HTML()
export_csv_output = gr.File(label="Download CSV")
export_docx_output = gr.File(label="Download Word DOCX")
def search_and_display(query, max_results, page, sort_by, journal_filter, min_year, max_year):
global global_df
result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
global_df = df
return result_text
def export_csv():
if global_df is not None:
return export_results(global_df, "CSV")
def export_docx():
if global_df is not None:
return export_results(global_df, "DOCX")
search_button.click(search_and_display,
inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
outputs=results_output)
export_csv_button.click(export_csv, outputs=export_csv_output)
export_docx_button.click(export_docx, outputs=export_docx_output)
if __name__ == "__main__":
app.launch(inbrowser=True)