Spaces:
Sleeping
Sleeping
| # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb. | |
| # %% auto 0 | |
| __all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer', | |
| 'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function', | |
| 'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves', | |
| 'learner_module_leaves_subtables'] | |
| # %% ../nbs/utils.ipynb 3 | |
| from .imports import * | |
| from fastcore.all import * | |
| import wandb | |
| import pickle | |
| import pandas as pd | |
| import numpy as np | |
| #import tensorflow as tf | |
| import torch.nn as nn | |
| from fastai.basics import * | |
| # %% ../nbs/utils.ipynb 5 | |
| def generate_TS_df(rows, cols): | |
| "Generates a dataframe containing a multivariate time series, where each column \ | |
| represents a variable and each row a time point (sample). The timestamp is in the \ | |
| index of the dataframe, and it is created with a even space of 1 second between samples" | |
| index = np.arange(pd.Timestamp.now(), | |
| pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'), | |
| pd.Timedelta(1, 'seconds')) | |
| data = np.random.randn(len(index), cols) | |
| return pd.DataFrame(data, index=index) | |
| # %% ../nbs/utils.ipynb 10 | |
| def normalize_columns(df:pd.DataFrame): | |
| "Normalize columns from `df` to have 0 mean and 1 standard deviation" | |
| mean = df.mean() | |
| std = df.std() + 1e-7 | |
| return (df-mean)/std | |
| # %% ../nbs/utils.ipynb 16 | |
| def remove_constant_columns(df:pd.DataFrame): | |
| return df.loc[:, (df != df.iloc[0]).any()] | |
| # %% ../nbs/utils.ipynb 21 | |
| class ReferenceArtifact(wandb.Artifact): | |
| default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home() | |
| "This class is meant to create an artifact with a single reference to an object \ | |
| passed as argument in the contructor. The object will be pickled, hashed and stored \ | |
| in a specified folder." | |
| def __init__(self, obj, name, type='object', folder=None, **kwargs): | |
| super().__init__(type=type, name=name, **kwargs) | |
| # pickle dumps the object and then hash it | |
| hash_code = str(hash(pickle.dumps(obj))) | |
| folder = Path(ifnone(folder, Path.home()/self.default_storage_path)) | |
| with open(f'{folder}/{hash_code}', 'wb') as f: | |
| pickle.dump(obj, f) | |
| self.add_reference(f'file://{folder}/{hash_code}') | |
| if self.metadata is None: | |
| self.metadata = dict() | |
| self.metadata['ref'] = dict() | |
| self.metadata['ref']['hash'] = hash_code | |
| self.metadata['ref']['type'] = str(obj.__class__) | |
| # %% ../nbs/utils.ipynb 24 | |
| def to_obj(self:wandb.apis.public.Artifact): | |
| """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \ | |
| come from a call to `run.use_artifact` with a proper wandb run.""" | |
| if self.metadata.get('ref') is None: | |
| print(f'ERROR:{self} does not come from a saved ReferenceArtifact') | |
| return None | |
| original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash'] | |
| path = original_path if original_path.exists() else Path(self.download()).ls()[0] | |
| with open(path, 'rb') as f: | |
| obj = pickle.load(f) | |
| return obj | |
| # %% ../nbs/utils.ipynb 33 | |
| import torch.nn as nn | |
| class PrintLayer(nn.Module): | |
| def __init__(self): | |
| super(PrintLayer, self).__init__() | |
| def forward(self, x): | |
| # Do your print / debug stuff here | |
| print(x.shape) | |
| return x | |
| # %% ../nbs/utils.ipynb 34 | |
| def export_and_get(self:Learner, keep_exported_file=False): | |
| """ | |
| Export the learner into an auxiliary file, load it and return it back. | |
| """ | |
| aux_path = Path('aux.pkl') | |
| self.export(fname='aux.pkl') | |
| aux_learn = load_learner('aux.pkl') | |
| if not keep_exported_file: aux_path.unlink() | |
| return aux_learn | |
| # %% ../nbs/utils.ipynb 35 | |
| def get_wandb_artifacts(project_path, type=None, name=None, last_version=True): | |
| """ | |
| Get the artifacts logged in a wandb project. | |
| Input: | |
| - `project_path` (str): entity/project_name | |
| - `type` (str): whether to return only one type of artifacts | |
| - `name` (str): Leave none to have all artifact names | |
| - `last_version`: whether to return only the last version of each artifact or not | |
| Output: List of artifacts | |
| """ | |
| public_api = wandb.Api() | |
| if type is not None: | |
| types = [public_api.artifact_type(type, project_path)] | |
| else: | |
| types = public_api.artifact_types(project_path) | |
| res = L() | |
| for kind in types: | |
| for collection in kind.collections(): | |
| if name is None or name == collection.name: | |
| versions = public_api.artifact_versions( | |
| kind.type, | |
| "/".join([kind.entity, kind.project, collection.name]), | |
| per_page=1, | |
| ) | |
| if last_version: res += next(versions) | |
| else: res += L(versions) | |
| return list(res) | |
| # %% ../nbs/utils.ipynb 39 | |
| def get_pickle_artifact(filename): | |
| with open(filename, "rb") as f: | |
| df = pickle.load(f) | |
| return df | |
| # %% ../nbs/utils.ipynb 41 | |
| import pyarrow.feather as ft | |
| import pickle | |
| # %% ../nbs/utils.ipynb 42 | |
| def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs): | |
| result = None | |
| if not (path is none): | |
| if print_flag: print("--> Exec with feather | reading input from ", path) | |
| input = ft.read_feather(path) | |
| if print_flag: print("--> Exec with feather | Apply function ", path) | |
| result = function(input, *args, **kwargs) | |
| if print_flag: print("Exec with feather --> ", path) | |
| return result | |
| # %% ../nbs/utils.ipynb 43 | |
| def py_function(module_name, function_name, print_flag = False): | |
| try: | |
| function = getattr(__import__('__main__'), function_name) | |
| except: | |
| module = __import__(module_name, fromlist=['']) | |
| function = getattr(module, function_name) | |
| print("py function: ", function_name, ": ", function) | |
| return function | |
| # %% ../nbs/utils.ipynb 46 | |
| import time | |
| def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs): | |
| result = None | |
| function = py_function(module_name, function_name, print_flag) | |
| if time_flag: t_start = time.time() | |
| if not (path is None): | |
| if print_flag: print("--> Exec with feather | reading input from ", path) | |
| input = ft.read_feather(path) | |
| if print_flag: print("--> Exec with feather | Apply function ", path) | |
| result = function(input, *args, **kwargs)[k_output] | |
| if time_flag: | |
| t_end = time.time() | |
| print("Exec with feather | time: ", t_end-t_start) | |
| if print_flag: print("Exec with feather --> ", path) | |
| return result | |
| # %% ../nbs/utils.ipynb 48 | |
| def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs): | |
| result = None | |
| function = py_function(module_name, function_name, print_flag) | |
| if time_flag: t_start = time.time() | |
| if not (path_input is None): | |
| if print_flag: print("--> Exec with feather | reading input from ", path_input) | |
| input = ft.read_feather(path_input) | |
| if print_flag: | |
| print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input)) | |
| result = function(input, *args, **kwargs)[k_output] | |
| ft.write_feather(df, path, compression = 'lz4') | |
| if time_flag: | |
| t_end = time.time() | |
| print("Exec with feather | time: ", t_end-t_start) | |
| if print_flag: print("Exec with feather --> ", path_output) | |
| return path_output | |
| # %% ../nbs/utils.ipynb 52 | |
| def learner_module_leaves(learner): | |
| modules = list(learner.modules())[0] # Obtener el módulo raíz | |
| rows = [] | |
| def find_leave_modules(module, path=[]): | |
| for name, sub_module in module.named_children(): | |
| current_path = path + [f"{type(sub_module).__name__}"] | |
| if not list(sub_module.children()): | |
| leave_name = ' -> '.join(current_path) | |
| leave_params = str(sub_module).strip() | |
| rows.append([ | |
| leave_name, | |
| f"{type(sub_module).__name__}", | |
| name, | |
| leave_params | |
| ] | |
| ) | |
| find_leave_modules(sub_module, current_path) | |
| find_leave_modules(modules) | |
| df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module']) | |
| return df | |
| # %% ../nbs/utils.ipynb 56 | |
| def learner_module_leaves_subtables(learner, print_flag = False): | |
| df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module']) | |
| md = learner_module_leaves(learner).drop( | |
| 'Path', axis = 1 | |
| ).sort_values( | |
| by = 'Module_type' | |
| ) | |
| if print_flag: print("The layers are of this types:") | |
| md_types = pd.DataFrame(md['Module_type'].drop_duplicates()) | |
| if print_flag: | |
| display(md_types) | |
| print("And they are called with this parameters:") | |
| md_modules = pd.DataFrame(md['Module'].drop_duplicates()) | |
| if print_flag: display(md_modules) | |
| return md_types, md_modules | |