Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import tempfile | |
| import Bio.PDB | |
| import Bio.SeqUtils | |
| from Bio import pairwise2 | |
| from run_pretrained_model import run_on_folder | |
| def get_seq_based_on_template(seq: str, template_path: str, output_path: str): | |
| # get a list of all residues in template | |
| parser = Bio.PDB.PDBParser() | |
| template_structure = parser.get_structure("template", template_path) | |
| chain = template_structure[0].get_chains().__next__() | |
| template_residues = [i for i in chain.get_residues() if "CA" in i | |
| and Bio.SeqUtils.seq1(i.get_resname()) not in ("X", "", " ")] | |
| template_seq = "".join([Bio.SeqUtils.seq1(i.get_resname()) for i in template_residues]) | |
| # align the sequence to the template | |
| alignment = pairwise2.align.globalxx(seq, template_seq, one_alignment_only=True)[0] | |
| aligned_seq, aligned_template_seq = alignment.seqA, alignment.seqB | |
| # create a new pdb file with the aligned residues | |
| new_structure = Bio.PDB.Structure.Structure("new_structure") | |
| new_model = Bio.PDB.Model.Model(0) | |
| new_structure.add(new_model) | |
| new_chain = Bio.PDB.Chain.Chain("A") # Using chain ID 'A' for the output | |
| new_model.add(new_chain) | |
| template_ind = -1 | |
| seq_ind = 0 | |
| print(aligned_seq, aligned_template_seq, len(template_residues)) | |
| for seq_res, template_res in zip(aligned_seq, aligned_template_seq): | |
| if template_res != "-": | |
| template_ind += 1 | |
| if seq_res != "-": | |
| seq_ind += 1 | |
| if seq_res == "-": | |
| continue | |
| if template_res == "-": | |
| seq_res_3_letter = Bio.SeqUtils.seq3(seq_res).upper() | |
| residue = Bio.PDB.Residue.Residue((' ', seq_ind, ' '), seq_res_3_letter, '') | |
| atom = Bio.PDB.Atom.Atom("C", (0.0, 0.0, 0.0), 1.0, 1.0, ' ', "CA", 0, element="C") | |
| residue.add(atom) | |
| new_chain.add(residue) | |
| else: | |
| residue = template_residues[template_ind].copy() | |
| residue.detach_parent() | |
| residue.id = (' ', seq_ind, ' ') | |
| new_chain.add(residue) | |
| io = Bio.PDB.PDBIO() | |
| io.set_structure(new_structure) | |
| io.save(output_path) | |
| def run_on_sample_seqs(seq1: str, template1_path: str, seq2: str, template_path2: str, output_path: str, | |
| run_config_path: str): | |
| temp_dir = tempfile.TemporaryDirectory() | |
| temp_dir_path = temp_dir.name | |
| get_seq_based_on_template(seq1, template1_path, f"{temp_dir_path}/prot_r.pdb") | |
| get_seq_based_on_template(seq2, template_path2, f"{temp_dir_path}/prot_l.pdb") | |
| json_data = { | |
| "input_r_structure": f"prot_r.pdb", | |
| "input_l_structure": f"prot_l.pdb", | |
| } | |
| tmp_json_folder = f"{temp_dir_path}/jsons" | |
| os.makedirs(tmp_json_folder, exist_ok=True) | |
| json.dump(json_data, open(f"{tmp_json_folder}/input.json", "w")) | |
| tmp_output_folder = f"{temp_dir_path}/output" | |
| run_on_folder(tmp_json_folder, tmp_output_folder, run_config_path, skip_relaxation=True, | |
| long_sequence_inference=False, skip_exists=False) | |
| os.rename(tmp_output_folder + "/predictions/input_predicted_joined.pdb", output_path) | |
| print("moved output to ", output_path) | |
| temp_dir.cleanup() | |