import gradio as gr
from Bio import PDB
import requests
import os
import pandas as pd
# Function to format PDB headers with bullet points and HTML styling
def format_pdb_headers(headers, indent=0):
ind_text = ' ' * indent
output = ""
for header, content in headers.items():
if isinstance(content, dict):
output += f'
{ind_text}{header}:
'
output += format_pdb_headers(content, indent + 4)
elif isinstance(content, list):
output += f'{ind_text}{header}:
'
for elem in content:
output += f'{ind_text}- {elem}
'
else:
output += f'{ind_text}{header}: {content}
'
return output
# Function to download and parse the PDB file
def fetch_and_parse_pdb(url):
# Download the PDB file
response = requests.get(url)
if response.status_code != 200:
return "Failed to download the PDB file. Please check the URL."
# Save the file locally
pdb_filename = url.split("/")[-1]
with open(pdb_filename, 'wb') as f:
f.write(response.content)
# Parse the PDB file
parser = PDB.PDBParser()
structure = parser.get_structure(pdb_filename, pdb_filename)
# Prepare header information
headers = structure.header
header_output = format_pdb_headers(headers)
# Prepare components information
components_info = []
for model in structure:
for chain in model:
for residue in chain:
residue_info = {
"Chain ID": chain.id,
"Residue Name": residue.get_resname(),
"Residue Number": residue.id[1],
"Missing": residue.is_disordered()
}
components_info.append(residue_info)
# Convert components info to DataFrame
components_df = pd.DataFrame(components_info)
# Clean up the downloaded file
os.remove(pdb_filename)
# Combine header output and components into one string with color coding
full_output = f"Header Information:
{header_output}
"
full_output += "Components Information:
"
for index, row in components_df.iterrows():
color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
full_output += f''
full_output += f'- Chain ID: {row["Chain ID"]} | '
full_output += f'Residue Name: {row["Residue Name"]} | '
full_output += f'Residue Number: {row["Residue Number"]} | '
full_output += f'Missing: {row["Missing"]}
'
return full_output
# Create Gradio interface with an informative title and description
iface = gr.Interface(
fn=fetch_and_parse_pdb,
inputs=gr.Textbox(value="https://files.rcsb.org/download/1TUP.pdb", label="PDB File URL", elem_id="pdb-url"),
outputs="html", # Change output to HTML for styled display
title="PDB Metadata Explorer",
description=(
"This tool allows you to input the URL of a Protein Data Bank (PDB) file "
"and retrieve its metadata."
)
)
# Launch the interface
iface.launch()