Spaces:

nooshinbah
/

PDB_Metadata_Explorer

Sleeping

App Files Files Community

nooshinbah commited on Oct 19, 2024

Commit

bd5ad98

verified ·

1 Parent(s): 5579a62

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -19

app.py CHANGED Viewed

@@ -4,6 +4,22 @@ import requests
 import os
 import pandas as pd
 # Function to download and parse the PDB file
 def fetch_and_parse_pdb(url):
     # Download the PDB file
@@ -19,39 +35,52 @@ def fetch_and_parse_pdb(url):
     # Parse the PDB file
     parser = PDB.PDBParser()
     structure = parser.get_structure(pdb_filename, pdb_filename)
-    # Prepare the header information for output
     headers = structure.header
-    header_items = []
-    for key, value in headers.items():
-        if isinstance(value, list):
-            for item in value:
-                header_items.append((key, item))
-        else:
-            header_items.append((key, value))
-    # Convert to DataFrame for better formatting
-    df = pd.DataFrame(header_items, columns=['Header', 'Content'])
     # Clean up the downloaded file
     os.remove(pdb_filename)
-    return df
 # Create Gradio interface with an informative title and description
 iface = gr.Interface(
     fn=fetch_and_parse_pdb,
     inputs="text",
-    outputs=gr.Dataframe(interactive=True),
-    title="PDB Header Explorer",
     description=(
         "This tool allows you to input the URL of a Protein Data Bank (PDB) file "
-        "and retrieve its structural metadata. The information is displayed in an "
-        "interactive table, enabling sorting and filtering for easier analysis. "
-        "Understanding PDB headers is crucial for researchers and biologists as it provides "
-        "insight into the molecular structure and function of proteins, which is foundational "
-        "in fields like drug discovery, bioinformatics, and structural biology."
     )
 )

 import os
 import pandas as pd
+# Function to format PDB headers with bullet points and HTML styling
+def format_pdb_headers(headers, indent=0):
+    ind_text = ' ' * indent
+    output = ""
+    for header, content in headers.items():
+        if isinstance(content, dict):
+            output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
+            output += format_pdb_headers(content, indent + 4)
+        elif isinstance(content, list):
+            output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
+            for elem in content:
+                output += f'<div>{ind_text}- {elem}</div>'
+        else:
+            output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>'
+    return output
 # Function to download and parse the PDB file
 def fetch_and_parse_pdb(url):
     # Download the PDB file
     # Parse the PDB file
     parser = PDB.PDBParser()
     structure = parser.get_structure(pdb_filename, pdb_filename)
+    # Prepare header information
     headers = structure.header
+    header_output = format_pdb_headers(headers)
+    # Prepare components information
+    components_info = []
+    for model in structure:
+        for chain in model:
+            for residue in chain:
+                residue_info = {
+                    "Chain ID": chain.id,
+                    "Residue Name": residue.get_resname(),
+                    "Residue Number": residue.id[1],
+                    "Missing": residue.is_disordered()
+                }
+                components_info.append(residue_info)
+    # Convert components info to DataFrame
+    components_df = pd.DataFrame(components_info)
     # Clean up the downloaded file
     os.remove(pdb_filename)
+    # Combine header output and components into one string with color coding
+    full_output = f"<h3>Header Information:</h3>{header_output}<hr>"
+    full_output += "<h3>Components Information:</h3>"
+    for index, row in components_df.iterrows():
+        color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
+        full_output += f'<div style="background-color: {color}; padding: 5px;">'
+        full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} | '
+        full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} | '
+        full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} | '
+        full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>'
+    return full_output
 # Create Gradio interface with an informative title and description
 iface = gr.Interface(
     fn=fetch_and_parse_pdb,
     inputs="text",
+    outputs="html",  # Change output to HTML for styled display
+    title="PDB Metadata Explorer",
     description=(
         "This tool allows you to input the URL of a Protein Data Bank (PDB) file "
+        "and retrieve its metadata."
     )
 )