|
import argparse |
|
import csv |
|
from pathlib import Path |
|
from typing import Optional |
|
import logging |
|
import gradio as gr |
|
import os |
|
import uuid |
|
from datetime import datetime |
|
import numpy as np |
|
|
|
import pandas as pd |
|
from joblib import Parallel, delayed |
|
from tqdm import tqdm |
|
|
|
from protenix.data.data_pipeline import DataPipeline |
|
from protenix.utils.file_io import dump_gzip_pickle |
|
from configs.configs_base import configs as configs_base |
|
from configs.configs_data import data_configs |
|
from configs.configs_inference import inference_configs |
|
from protenix.config import parse_configs |
|
from protenix.data.dataloader import KeySumBalancedSampler |
|
from protenix.data.dataset import BaseSingleDataset |
|
from runner.inference import download_infercence_cache, update_inference_configs, infer_detect, InferenceRunner |
|
from scripts.prepare_training_data import run_gen_data |
|
from torch.utils.data import DataLoader |
|
|
|
|
|
def process_data(path): |
|
try: |
|
run_gen_data( |
|
input_path=path, |
|
output_indices_csv=os.path.join(path, 'output.csv'), |
|
bioassembly_output_dir=path, |
|
cluster_file=None, |
|
distillation=False, |
|
num_workers=1, |
|
) |
|
return False |
|
except: |
|
print('Use Distillation') |
|
run_gen_data( |
|
input_path=path, |
|
output_indices_csv=os.path.join(path, 'output.csv'), |
|
bioassembly_output_dir=path, |
|
cluster_file=None, |
|
distillation=True, |
|
num_workers=1, |
|
) |
|
return True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|