Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,22 @@ import requests
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Function to download and parse the PDB file
|
8 |
def fetch_and_parse_pdb(url):
|
9 |
# Download the PDB file
|
@@ -19,39 +35,52 @@ def fetch_and_parse_pdb(url):
|
|
19 |
# Parse the PDB file
|
20 |
parser = PDB.PDBParser()
|
21 |
structure = parser.get_structure(pdb_filename, pdb_filename)
|
22 |
-
|
23 |
-
# Prepare
|
24 |
headers = structure.header
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
# Convert to DataFrame
|
35 |
-
|
36 |
|
37 |
# Clean up the downloaded file
|
38 |
os.remove(pdb_filename)
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# Create Gradio interface with an informative title and description
|
43 |
iface = gr.Interface(
|
44 |
fn=fetch_and_parse_pdb,
|
45 |
inputs="text",
|
46 |
-
outputs=
|
47 |
-
title="PDB
|
48 |
description=(
|
49 |
"This tool allows you to input the URL of a Protein Data Bank (PDB) file "
|
50 |
-
"and retrieve its
|
51 |
-
"interactive table, enabling sorting and filtering for easier analysis. "
|
52 |
-
"Understanding PDB headers is crucial for researchers and biologists as it provides "
|
53 |
-
"insight into the molecular structure and function of proteins, which is foundational "
|
54 |
-
"in fields like drug discovery, bioinformatics, and structural biology."
|
55 |
)
|
56 |
)
|
57 |
|
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
|
7 |
+
# Function to format PDB headers with bullet points and HTML styling
|
8 |
+
def format_pdb_headers(headers, indent=0):
|
9 |
+
ind_text = ' ' * indent
|
10 |
+
output = ""
|
11 |
+
for header, content in headers.items():
|
12 |
+
if isinstance(content, dict):
|
13 |
+
output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
|
14 |
+
output += format_pdb_headers(content, indent + 4)
|
15 |
+
elif isinstance(content, list):
|
16 |
+
output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
|
17 |
+
for elem in content:
|
18 |
+
output += f'<div>{ind_text}- {elem}</div>'
|
19 |
+
else:
|
20 |
+
output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>'
|
21 |
+
return output
|
22 |
+
|
23 |
# Function to download and parse the PDB file
|
24 |
def fetch_and_parse_pdb(url):
|
25 |
# Download the PDB file
|
|
|
35 |
# Parse the PDB file
|
36 |
parser = PDB.PDBParser()
|
37 |
structure = parser.get_structure(pdb_filename, pdb_filename)
|
38 |
+
|
39 |
+
# Prepare header information
|
40 |
headers = structure.header
|
41 |
+
header_output = format_pdb_headers(headers)
|
42 |
|
43 |
+
# Prepare components information
|
44 |
+
components_info = []
|
45 |
+
for model in structure:
|
46 |
+
for chain in model:
|
47 |
+
for residue in chain:
|
48 |
+
residue_info = {
|
49 |
+
"Chain ID": chain.id,
|
50 |
+
"Residue Name": residue.get_resname(),
|
51 |
+
"Residue Number": residue.id[1],
|
52 |
+
"Missing": residue.is_disordered()
|
53 |
+
}
|
54 |
+
components_info.append(residue_info)
|
55 |
|
56 |
+
# Convert components info to DataFrame
|
57 |
+
components_df = pd.DataFrame(components_info)
|
58 |
|
59 |
# Clean up the downloaded file
|
60 |
os.remove(pdb_filename)
|
61 |
|
62 |
+
# Combine header output and components into one string with color coding
|
63 |
+
full_output = f"<h3>Header Information:</h3>{header_output}<hr>"
|
64 |
+
full_output += "<h3>Components Information:</h3>"
|
65 |
+
for index, row in components_df.iterrows():
|
66 |
+
color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
|
67 |
+
full_output += f'<div style="background-color: {color}; padding: 5px;">'
|
68 |
+
full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} | '
|
69 |
+
full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} | '
|
70 |
+
full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} | '
|
71 |
+
full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>'
|
72 |
+
|
73 |
+
return full_output
|
74 |
|
75 |
# Create Gradio interface with an informative title and description
|
76 |
iface = gr.Interface(
|
77 |
fn=fetch_and_parse_pdb,
|
78 |
inputs="text",
|
79 |
+
outputs="html", # Change output to HTML for styled display
|
80 |
+
title="PDB Metadata Explorer",
|
81 |
description=(
|
82 |
"This tool allows you to input the URL of a Protein Data Bank (PDB) file "
|
83 |
+
"and retrieve its metadata."
|
|
|
|
|
|
|
|
|
84 |
)
|
85 |
)
|
86 |
|