Spaces:
Sleeping
Sleeping
import gradio as gr | |
from Bio import PDB | |
import requests | |
import os | |
import pandas as pd | |
# Function to format PDB headers with bullet points and HTML styling | |
def format_pdb_headers(headers, indent=0): | |
ind_text = ' ' * indent | |
output = "" | |
for header, content in headers.items(): | |
if isinstance(content, dict): | |
output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>' | |
output += format_pdb_headers(content, indent + 4) | |
elif isinstance(content, list): | |
output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>' | |
for elem in content: | |
output += f'<div>{ind_text}- {elem}</div>' | |
else: | |
output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>' | |
return output | |
# Function to download and parse the PDB file | |
def fetch_and_parse_pdb(url): | |
# Download the PDB file | |
response = requests.get(url) | |
if response.status_code != 200: | |
return "Failed to download the PDB file. Please check the URL." | |
# Save the file locally | |
pdb_filename = url.split("/")[-1] | |
with open(pdb_filename, 'wb') as f: | |
f.write(response.content) | |
# Parse the PDB file | |
parser = PDB.PDBParser() | |
structure = parser.get_structure(pdb_filename, pdb_filename) | |
# Prepare header information | |
headers = structure.header | |
header_output = format_pdb_headers(headers) | |
# Prepare components information | |
components_info = [] | |
for model in structure: | |
for chain in model: | |
for residue in chain: | |
residue_info = { | |
"Chain ID": chain.id, | |
"Residue Name": residue.get_resname(), | |
"Residue Number": residue.id[1], | |
"Missing": residue.is_disordered() | |
} | |
components_info.append(residue_info) | |
# Convert components info to DataFrame | |
components_df = pd.DataFrame(components_info) | |
# Clean up the downloaded file | |
os.remove(pdb_filename) | |
# Combine header output and components into one string with color coding | |
full_output = f"<h3>Header Information:</h3>{header_output}<hr>" | |
full_output += "<h3>Components Information:</h3>" | |
for index, row in components_df.iterrows(): | |
color = "#f9f9f9" if index % 2 == 0 else "#ffffff" | |
full_output += f'<div style="background-color: {color}; padding: 5px;">' | |
full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} | ' | |
full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} | ' | |
full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} | ' | |
full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>' | |
return full_output | |
# Create Gradio interface with an informative title and description | |
iface = gr.Interface( | |
fn=fetch_and_parse_pdb, | |
inputs=gr.Textbox(value="https://files.rcsb.org/download/1TUP.pdb", label="PDB File URL", elem_id="pdb-url"), | |
outputs="html", # Change output to HTML for styled display | |
title="PDB Metadata Explorer", | |
description=( | |
"This tool allows you to input the URL of a Protein Data Bank (PDB) file " | |
"and retrieve its metadata." | |
) | |
) | |
# Launch the interface | |
iface.launch() | |