Spaces:
Sleeping
Sleeping
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig | |
| import torch | |
| def load_local_model(model_path: str, device: int = -1, token: str = None): | |
| """ | |
| Load a Hugging Face model (CPU by default) with optional token for private repos. | |
| Args: | |
| model_path (str): Hugging Face repo ID or local path. | |
| device (int): -1 for CPU, >=0 for GPU index. | |
| token (str): HF token for private models. | |
| Returns: | |
| model, tokenizer | |
| """ | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=token) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load tokenizer: {e}") | |
| try: | |
| config = AutoConfig.from_pretrained(model_path, use_auth_token=token) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_path, config=config, use_auth_token=token | |
| ) | |
| # Device mapping | |
| if device >= 0 and torch.cuda.is_available(): | |
| model.to(f"cuda:{device}") | |
| else: | |
| model.to("cpu") | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load model: {e}") | |
| return model, tokenizer | |