Spaces:
Running
on
Zero
Running
on
Zero
fix: pass full residue set to knapsack
Browse files
app.py
CHANGED
|
@@ -68,12 +68,11 @@ Path(log_file).touch()
|
|
| 68 |
|
| 69 |
logger = logging.getLogger("instanovo")
|
| 70 |
logger.setLevel(logging.INFO)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
logger.addHandler(stream_handler)
|
| 77 |
|
| 78 |
|
| 79 |
def load_models_and_knapsack():
|
|
@@ -147,33 +146,29 @@ def load_models_and_knapsack():
|
|
| 147 |
if not knapsack_exists:
|
| 148 |
logger.info("Knapsack not found or failed to load. Generating knapsack...")
|
| 149 |
try:
|
| 150 |
-
|
| 151 |
special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
|
| 152 |
-
k for k, v in
|
| 153 |
]
|
| 154 |
if special_and_nonpositive:
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
if res in residue_masses_knapsack
|
| 164 |
-
}
|
| 165 |
-
|
| 166 |
-
if not residue_masses_knapsack:
|
| 167 |
raise ValueError("No valid residues with positive mass found for knapsack generation.")
|
| 168 |
|
|
|
|
| 169 |
KNAPSACK = Knapsack.construct_knapsack(
|
| 170 |
-
residue_masses=
|
| 171 |
-
residue_indices=
|
| 172 |
max_mass=MAX_MASS,
|
| 173 |
mass_scale=MASS_SCALE,
|
| 174 |
)
|
| 175 |
logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
|
| 176 |
-
KNAPSACK_DIR.mkdir(parents=True, exist_ok=True)
|
| 177 |
KNAPSACK.save(str(KNAPSACK_DIR))
|
| 178 |
logger.info("Knapsack saved.")
|
| 179 |
except Exception as e:
|
|
@@ -717,6 +712,10 @@ with gr.Blocks(
|
|
| 717 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
| 718 |
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
| 719 |
* Check logs for progress, especially for large files or slower methods.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
""",
|
| 721 |
elem_classes="feedback"
|
| 722 |
)
|
|
|
|
| 68 |
|
| 69 |
logger = logging.getLogger("instanovo")
|
| 70 |
logger.setLevel(logging.INFO)
|
| 71 |
+
file_handler = logging.FileHandler(log_file)
|
| 72 |
+
file_handler.setLevel(logging.INFO)
|
| 73 |
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
| 74 |
+
file_handler.setFormatter(formatter)
|
| 75 |
+
logger.addHandler(file_handler)
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
def load_models_and_knapsack():
|
|
|
|
| 146 |
if not knapsack_exists:
|
| 147 |
logger.info("Knapsack not found or failed to load. Generating knapsack...")
|
| 148 |
try:
|
| 149 |
+
residue_masses_for_calc = dict(RESIDUE_SET.residue_masses.copy())
|
| 150 |
special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
|
| 151 |
+
k for k, v in residue_masses_for_calc.items() if v <= 0
|
| 152 |
]
|
| 153 |
if special_and_nonpositive:
|
| 154 |
+
logger.info(f"Excluding special/non-positive mass residues from knapsack: {special_and_nonpositive}")
|
| 155 |
+
for res in set(special_and_nonpositive):
|
| 156 |
+
if res in residue_masses_for_calc:
|
| 157 |
+
del residue_masses_for_calc[res]
|
| 158 |
+
|
| 159 |
+
full_residue_indices = RESIDUE_SET.residue_to_index
|
| 160 |
+
|
| 161 |
+
if not residue_masses_for_calc: # Check if any residues are left for calculation
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
raise ValueError("No valid residues with positive mass found for knapsack generation.")
|
| 163 |
|
| 164 |
+
logger.info("Generating knapsack. This will take a few minutes, please be patient.")
|
| 165 |
KNAPSACK = Knapsack.construct_knapsack(
|
| 166 |
+
residue_masses=residue_masses_for_calc,
|
| 167 |
+
residue_indices=full_residue_indices,
|
| 168 |
max_mass=MAX_MASS,
|
| 169 |
mass_scale=MASS_SCALE,
|
| 170 |
)
|
| 171 |
logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
|
|
|
|
| 172 |
KNAPSACK.save(str(KNAPSACK_DIR))
|
| 173 |
logger.info("Knapsack saved.")
|
| 174 |
except Exception as e:
|
|
|
|
| 712 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
| 713 |
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
| 714 |
* Check logs for progress, especially for large files or slower methods.
|
| 715 |
+
|
| 716 |
+
**Links:**
|
| 717 |
+
* [InstaNovo enables diffusion-powered de novo peptide sequencing in large-scale proteomics experiments](https://www.nature.com/articles/s42256-025-01019-5), Eloff, Kalogeropoulos et al. 2025, Nature Machine Intelligence.
|
| 718 |
+
* [GitHub Repository for InstaNovo](https://github.com/instadeepai/instanovo)
|
| 719 |
""",
|
| 720 |
elem_classes="feedback"
|
| 721 |
)
|