Spaces:

nooshinbah
/

PDB_Metadata_Explorer

Sleeping

File size: 3,293 Bytes

eb2312a
 
 
 
 
 
bd5ad98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb2312a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd5ad98
 
eb2312a
bd5ad98
eb2312a
bd5ad98
 
 
 
 
 
 
 
 
 
 
 
eb2312a
bd5ad98
 
eb2312a
 
 
 
bd5ad98
 
 
 
 
 
 
 
 
 
 
 
eb2312a
885b9dd
eb2312a
 
e10cecd
bd5ad98
 
885b9dd
 
bd5ad98
885b9dd
eb2312a

import gradio as gr
from Bio import PDB
import requests
import os
import pandas as pd

# Function to format PDB headers with bullet points and HTML styling
def format_pdb_headers(headers, indent=0):
    ind_text = ' ' * indent
    output = ""
    for header, content in headers.items():
        if isinstance(content, dict):
            output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
            output += format_pdb_headers(content, indent + 4)
        elif isinstance(content, list):
            output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
            for elem in content:
                output += f'<div>{ind_text}- {elem}</div>'
        else:
            output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>'
    return output

# Function to download and parse the PDB file
def fetch_and_parse_pdb(url):
    # Download the PDB file
    response = requests.get(url)
    if response.status_code != 200:
        return "Failed to download the PDB file. Please check the URL."

    # Save the file locally
    pdb_filename = url.split("/")[-1]
    with open(pdb_filename, 'wb') as f:
        f.write(response.content)

    # Parse the PDB file
    parser = PDB.PDBParser()
    structure = parser.get_structure(pdb_filename, pdb_filename)

    # Prepare header information
    headers = structure.header
    header_output = format_pdb_headers(headers)

    # Prepare components information
    components_info = []
    for model in structure:
        for chain in model:
            for residue in chain:
                residue_info = {
                    "Chain ID": chain.id,
                    "Residue Name": residue.get_resname(),
                    "Residue Number": residue.id[1],
                    "Missing": residue.is_disordered()
                }
                components_info.append(residue_info)

    # Convert components info to DataFrame
    components_df = pd.DataFrame(components_info)

    # Clean up the downloaded file
    os.remove(pdb_filename)

    # Combine header output and components into one string with color coding
    full_output = f"<h3>Header Information:</h3>{header_output}<hr>"
    full_output += "<h3>Components Information:</h3>"
    for index, row in components_df.iterrows():
        color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
        full_output += f'<div style="background-color: {color}; padding: 5px;">'
        full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} | '
        full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} | '
        full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} | '
        full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>'

    return full_output

# Create Gradio interface with an informative title and description
iface = gr.Interface(
    fn=fetch_and_parse_pdb,
    inputs=gr.Textbox(value="https://files.rcsb.org/download/1TUP.pdb", label="PDB File URL", elem_id="pdb-url"),
    outputs="html",  # Change output to HTML for styled display
    title="PDB Metadata Explorer",
    description=(
        "This tool allows you to input the URL of a Protein Data Bank (PDB) file "
        "and retrieve its metadata."
    )
)

# Launch the interface
iface.launch()