Spaces:

zama-fhe
/

encrypted_dna

Running

File size: 5,317 Bytes

dbdd71f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45b7f9a
dbdd71f
e530681
dbdd71f
 
 
45b7f9a
 
 
dbdd71f
45b7f9a
 
 
dbdd71f

import pickle
import random
import shutil
from collections import Counter
from pathlib import Path

import numpy

import zipfile


SERVER_URL = "http://localhost:8000/"

INPUT_BROWSER_LIMIT = 550

DATA_DIR = Path("./data")
DEPLOYMENT_DIR = Path("./deployment")
ROOT_DIR = DEPLOYMENT_DIR / "users"

SHARED_BASE_MODULE_DIR = DEPLOYMENT_DIR / "base_modules"
SHARED_SMOOTHER_MODULE_DIR = DEPLOYMENT_DIR / "smoother_module"

KEY_SMOOTHER_MODULE_DIR = "EvaluationKey_Smoother"
KEY_BASE_MODULE_DIR = "EvaluationKey_Base_Modules"
ENCRYPTED_INPUT_DIR = "Encrypt_Input"
ENCRYPTED_OUTPUT_DIR = "Encrypt_Output"
FHE_COMPUTATION_TIMELINE = Path("server_fhe_computation_timeline.txt")

LABELS = ["European", "African", "Americas", "East Asian", "South Asian"]

ID_POPULATION = {0: "European", 3: "African", 2: "Americas", 1: "East Asian", 4: "South Asian"}

POPULATION_ID = {"European": 0, "African": 3, "Americas": 2, "East Asian": 1, "South Asian": 4}

COLORS = ["#FFD208", "#FFE46C", "#FFED9C", "#FFF6CE", "#FFD9A0"]

# load_pickle("data/meta_dict.pkl")
META = {"A": 5, "C": 1059079, "M": 10589, "NW": 100, "CT": 1059, "CTR": 0.1, "WSCM": 0.2, "SS": 75}

BUILD_GENS = [1, 2, 4, 6, 8, 12, 16, 24, 32, 48]

import os 

def load_pickle_from_zip(file_name, zip_path="data.zip"):
    """
    Load a pickle file from within a zip archive.
    """
    if not os.path.exists(zip_path):
        raise FileNotFoundError(f"The zip file '{zip_path}' does not exist.")
    
    with zipfile.ZipFile(zip_path, 'r') as z:
        if file_name not in z.namelist():
            print("-----", file_name, z.namelist())
            raise KeyError(f"The file '{file_name}' does not exist in the zip archive '{zip_path}'.")
        with z.open(file_name) as f:
            return pickle.load(f)

def generate_weighted_percentages():
    dominant_percentage = random.randint(50, 70)
    remaining_percentage = 100 - dominant_percentage
    other_percentages = [random.random() for _ in range(4)]

    total = sum(other_percentages)
    other_percentages = [round(p / total * remaining_percentage, 2) for p in other_percentages]

    percentages = [dominant_percentage] + other_percentages

    # Adjust the total to be exactly 100 (if rounding errors occurred)
    diff = round(100 - sum(percentages), 2)
    if diff != 0:
        percentages[0] += diff  # Adjust the dominant percentage to make the total 100

    return percentages


def select_random_ancestors():
    ancestors = list(ID_POPULATION.keys())
    random.shuffle(ancestors)
    return ancestors


def read_pickle(path):
    with open(path, "rb") as f:
        data = pickle.load(f)
    return data


def compute_distribution(y, size=5):
    y_pred = numpy.zeros(size)
    for k, v in Counter(y).items():
        y_pred[k] = v / len(y)
    return y_pred


def slide_window(data, smooth_win_size, y=None):
    N, W, A = data.shape

    pad = (smooth_win_size + 1) // 2
    data_padded = numpy.pad(data, ((0, 0), (pad, pad), (0, 0)), mode="reflect")
    X_slide = numpy.lib.stride_tricks.sliding_window_view(data_padded, (1, smooth_win_size, A))
    X_slide = X_slide[:, :W, :].reshape(N * W, -1)
    y_slide = None if y is None else y.reshape(N * W)

    return X_slide, y_slide


# def read_vcf(vcf_file):
#     return allel.read_vcf(vcf_file, region=None, fields="*")


def clean_dir(directory):
    """Remove the specified directory if it exists."""
    if directory.exists() and directory.is_dir():
        print(f"Removing existing model directory: {directory}")
        shutil.rmtree(directory)


def process_data_for_base_modules(meta, X_t):

    n_windows = meta["NW"]  # meta["C"] // meta["M"]
    context = meta["CT"]  # int(meta["M"] * meta['CTR'])

    if context != 0.0:
        pad_left = numpy.flip(X_t[:, 0:context], axis=1)
        pad_right = numpy.flip(X_t[:, -context:], axis=1)
        X_t = numpy.concatenate([pad_left, X_t, pad_right], axis=1)

    M_ = meta["M"] + 2 * context
    idx = numpy.arange(0, meta["C"], meta["M"])[:-2]
    X_b = numpy.lib.stride_tricks.sliding_window_view(X_t, M_, axis=1)[:, idx, :]
    rem = meta["C"] - meta["M"] * n_windows

    # print(f"{X_t.shape=} -> {X_b.shape=} | {n_windows=}, {context=}, {M_=}, {rem=}")

    return X_b, n_windows, M_, rem


def extract_model_number(path):
    try:
        return int(path.split("_")[-1])
    except (ValueError, IndexError):
        print(f"Error: Unable to extract model number from path: {path}")
        return None


def is_none(obj) -> bool:
    """
    Check if the object is None.
    Args:
        obj (any): The input to be checked.
    Returns:
        bool: True if the object is None or empty, False otherwise.
    """
    return obj is None or (obj is not None and (hasattr(obj, "__len__") and len(obj) == 0))


def load_pickle(path: str) -> numpy.array:
    """Load data.

    Args:
        path (str):

    Returns:
        Dict: The genome.
    """
    with open(path, "rb") as f:
        data = pickle.load(f)
    return data


def write_pickle(path: str, data) -> numpy.array:
    with open(path, "wb") as f:
        pickle.dump(data, f)


def write_bytes(path, data):
    """Save binary data."""
    with path.open("wb") as f:
        f.write(data)


def read_bytes(path):
    """Load data from a binary file."""
    with path.open("rb") as f:
        return f.read()