nooshinbah commited on
Commit
bd5ad98
·
verified ·
1 Parent(s): 5579a62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -19
app.py CHANGED
@@ -4,6 +4,22 @@ import requests
4
  import os
5
  import pandas as pd
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # Function to download and parse the PDB file
8
  def fetch_and_parse_pdb(url):
9
  # Download the PDB file
@@ -19,39 +35,52 @@ def fetch_and_parse_pdb(url):
19
  # Parse the PDB file
20
  parser = PDB.PDBParser()
21
  structure = parser.get_structure(pdb_filename, pdb_filename)
22
-
23
- # Prepare the header information for output
24
  headers = structure.header
25
- header_items = []
26
 
27
- for key, value in headers.items():
28
- if isinstance(value, list):
29
- for item in value:
30
- header_items.append((key, item))
31
- else:
32
- header_items.append((key, value))
 
 
 
 
 
 
33
 
34
- # Convert to DataFrame for better formatting
35
- df = pd.DataFrame(header_items, columns=['Header', 'Content'])
36
 
37
  # Clean up the downloaded file
38
  os.remove(pdb_filename)
39
 
40
- return df
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Create Gradio interface with an informative title and description
43
  iface = gr.Interface(
44
  fn=fetch_and_parse_pdb,
45
  inputs="text",
46
- outputs=gr.Dataframe(interactive=True),
47
- title="PDB Header Explorer",
48
  description=(
49
  "This tool allows you to input the URL of a Protein Data Bank (PDB) file "
50
- "and retrieve its structural metadata. The information is displayed in an "
51
- "interactive table, enabling sorting and filtering for easier analysis. "
52
- "Understanding PDB headers is crucial for researchers and biologists as it provides "
53
- "insight into the molecular structure and function of proteins, which is foundational "
54
- "in fields like drug discovery, bioinformatics, and structural biology."
55
  )
56
  )
57
 
 
4
  import os
5
  import pandas as pd
6
 
7
+ # Function to format PDB headers with bullet points and HTML styling
8
+ def format_pdb_headers(headers, indent=0):
9
+ ind_text = ' ' * indent
10
+ output = ""
11
+ for header, content in headers.items():
12
+ if isinstance(content, dict):
13
+ output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
14
+ output += format_pdb_headers(content, indent + 4)
15
+ elif isinstance(content, list):
16
+ output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
17
+ for elem in content:
18
+ output += f'<div>{ind_text}- {elem}</div>'
19
+ else:
20
+ output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>'
21
+ return output
22
+
23
  # Function to download and parse the PDB file
24
  def fetch_and_parse_pdb(url):
25
  # Download the PDB file
 
35
  # Parse the PDB file
36
  parser = PDB.PDBParser()
37
  structure = parser.get_structure(pdb_filename, pdb_filename)
38
+
39
+ # Prepare header information
40
  headers = structure.header
41
+ header_output = format_pdb_headers(headers)
42
 
43
+ # Prepare components information
44
+ components_info = []
45
+ for model in structure:
46
+ for chain in model:
47
+ for residue in chain:
48
+ residue_info = {
49
+ "Chain ID": chain.id,
50
+ "Residue Name": residue.get_resname(),
51
+ "Residue Number": residue.id[1],
52
+ "Missing": residue.is_disordered()
53
+ }
54
+ components_info.append(residue_info)
55
 
56
+ # Convert components info to DataFrame
57
+ components_df = pd.DataFrame(components_info)
58
 
59
  # Clean up the downloaded file
60
  os.remove(pdb_filename)
61
 
62
+ # Combine header output and components into one string with color coding
63
+ full_output = f"<h3>Header Information:</h3>{header_output}<hr>"
64
+ full_output += "<h3>Components Information:</h3>"
65
+ for index, row in components_df.iterrows():
66
+ color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
67
+ full_output += f'<div style="background-color: {color}; padding: 5px;">'
68
+ full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} | '
69
+ full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} | '
70
+ full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} | '
71
+ full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>'
72
+
73
+ return full_output
74
 
75
  # Create Gradio interface with an informative title and description
76
  iface = gr.Interface(
77
  fn=fetch_and_parse_pdb,
78
  inputs="text",
79
+ outputs="html", # Change output to HTML for styled display
80
+ title="PDB Metadata Explorer",
81
  description=(
82
  "This tool allows you to input the URL of a Protein Data Bank (PDB) file "
83
+ "and retrieve its metadata."
 
 
 
 
84
  )
85
  )
86