Spaces:

nooshinbah
/

PDB_Metadata_Explorer

Sleeping

App Files Files Community

PDB_Metadata_Explorer / app.py

nooshinbah

Update app.py

e10cecd verified 4 months ago

raw

history blame contribute delete

3.29 kB

	import gradio as gr
	from Bio import PDB
	import requests
	import os
	import pandas as pd

	# Function to format PDB headers with bullet points and HTML styling
	def format_pdb_headers(headers, indent=0):
	ind_text = ' ' * indent
	output = ""
	for header, content in headers.items():
	if isinstance(content, dict):
	output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
	output += format_pdb_headers(content, indent + 4)
	elif isinstance(content, list):
	output += f'<div style="font-weight: bold;">{ind_text}{header}:</div>'
	for elem in content:
	output += f'<div>{ind_text}- {elem}</div>'
	else:
	output += f'<div>{ind_text}<strong>{header}:</strong> {content}</div>'
	return output

	# Function to download and parse the PDB file
	def fetch_and_parse_pdb(url):
	# Download the PDB file
	response = requests.get(url)
	if response.status_code != 200:
	return "Failed to download the PDB file. Please check the URL."

	# Save the file locally
	pdb_filename = url.split("/")[-1]
	with open(pdb_filename, 'wb') as f:
	f.write(response.content)

	# Parse the PDB file
	parser = PDB.PDBParser()
	structure = parser.get_structure(pdb_filename, pdb_filename)

	# Prepare header information
	headers = structure.header
	header_output = format_pdb_headers(headers)

	# Prepare components information
	components_info = []
	for model in structure:
	for chain in model:
	for residue in chain:
	residue_info = {
	"Chain ID": chain.id,
	"Residue Name": residue.get_resname(),
	"Residue Number": residue.id[1],
	"Missing": residue.is_disordered()
	}
	components_info.append(residue_info)

	# Convert components info to DataFrame
	components_df = pd.DataFrame(components_info)

	# Clean up the downloaded file
	os.remove(pdb_filename)

	# Combine header output and components into one string with color coding
	full_output = f"<h3>Header Information:</h3>{header_output}<hr>"
	full_output += "<h3>Components Information:</h3>"
	for index, row in components_df.iterrows():
	color = "#f9f9f9" if index % 2 == 0 else "#ffffff"
	full_output += f'<div style="background-color: {color}; padding: 5px;">'
	full_output += f'- <strong>Chain ID:</strong> {row["Chain ID"]} \| '
	full_output += f'<strong>Residue Name:</strong> {row["Residue Name"]} \| '
	full_output += f'<strong>Residue Number:</strong> {row["Residue Number"]} \| '
	full_output += f'<strong>Missing:</strong> {row["Missing"]}</div>'

	return full_output

	# Create Gradio interface with an informative title and description
	iface = gr.Interface(
	fn=fetch_and_parse_pdb,
	inputs=gr.Textbox(value="https://files.rcsb.org/download/1TUP.pdb", label="PDB File URL", elem_id="pdb-url"),
	outputs="html", # Change output to HTML for styled display
	title="PDB Metadata Explorer",
	description=(
	"This tool allows you to input the URL of a Protein Data Bank (PDB) file "
	"and retrieve its metadata."
	)
	)

	# Launch the interface
	iface.launch()