File size: 5,499 Bytes

869063e

import os
from typing import Optional

from huggingface_hub import HfApi, create_repo
from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor


class SpecVisionModelRegistration:
    """
    Handles the registration and pushing of SpecVision model to Hugging Face Hub.
    """
    
    def __init__(self, 
                 model_path: str,
                 repo_name: str,
                 organization: Optional[str] = None,
                 token: Optional[str] = None):
        """
        Initialize the registration handler.
        
        Args:
            model_path: Local path to your model files
            repo_name: Name for the Hugging Face repository
            organization: Optional organization name on Hugging Face
            token: Hugging Face API token
        """
        self.model_path = model_path
        self.repo_name = repo_name
        self.organization = organization
        self.token = token or os.getenv("HF_TOKEN")
        
        if not self.token:
            raise ValueError("Please provide a Hugging Face token either directly or via HF_TOKEN environment variable")
            
        self.api = HfApi()
        
    def register_model_components(self):
        """
        Register the SpecVision model architecture components with the transformers library.
        """
        # Import your custom model classes
        from configuration_spec_vision import SpecVisionConfig
        from modeling_spec_vision import SpecVisionForCausalLM
        from processing_spec_vision import SpecVisionProcessor

        # Register the model architecture
        AutoConfig.register("spec_vision", SpecVisionConfig)
        AutoModelForCausalLM.register(SpecVisionConfig, SpecVisionForCausalLM)
        AutoProcessor.register(SpecVisionConfig, SpecVisionProcessor)
        
        print("✓ Successfully registered SpecVision model architecture")
        
    def create_huggingface_repo(self):
        """
        Create a new repository on the Hugging Face Hub.
        """
        repo_id = f"{self.organization}/{self.repo_name}" if self.organization else self.repo_name
        
        try:
            create_repo(
                repo_id,
                token=self.token,
                private=False,
                exist_ok=True
            )
            print(f"✓ Created/accessed repository: {repo_id}")
            return repo_id
        except Exception as e:
            raise Exception(f"Failed to create repository: {str(e)}")
            
    def update_model_card(self):
        """
        Create or update the model card (README.md) with necessary information.
        """
        model_card = f"""---
language: en
tags:
- spec-vision
- vision-language-model
- transformers
license: apache-2.0
---

# SpecVision Model

This is the SpecVision model, a vision-language model based on the transformers architecture.

## Model Description

SpecVision is designed for vision-language tasks, combining visual and textual understanding capabilities.

## Usage

```python
from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor

# Load the model and processor
model = AutoModelForCausalLM.from_pretrained("{self.repo_name}")
processor = AutoProcessor.from_pretrained("{self.repo_name}")

# Process inputs
inputs = processor(images=image, text=text, return_tensors="pt")
outputs = model(**inputs)
```

## Training and Evaluation

[Add your training and evaluation details here]

## Limitations and Biases

[Add any known limitations and biases here]
"""
        
        with open(os.path.join(self.model_path, "README.md"), "w") as f:
            f.write(model_card)
            
        print("✓ Created/updated model card")
        
    def push_to_hub(self):
        """
        Push the model, configurations, and related files to Hugging Face Hub.
        """
        repo_id = self.create_huggingface_repo()
        
        # Update the model card first
        self.update_model_card()
        
        # Create a dictionary of files to upload
        files_to_upload = {}
        for filename in os.listdir(self.model_path):
            if filename.endswith(('.json', '.py', '.md', '.txt', '.safetensors')):
                filepath = os.path.join(self.model_path, filename)
                files_to_upload[filename] = filepath
        
        # Upload all files
        for filename, filepath in files_to_upload.items():
            self.api.upload_file(
                path_or_fileobj=filepath,
                path_in_repo=filename,
                repo_id=repo_id,
                token=self.token
            )
            print(f"✓ Uploaded {filename}")
            
        print(f"\nModel successfully pushed to https://huggingface.co/{repo_id}")

def main():
    """
    Main function to execute the registration and push process.
    """
    # You can set your HF_TOKEN as an environment variable or pass it directly
    TOKEN = os.getenv("HF_TOKEN")  # or "your_token_here"
    
    registration = SpecVisionModelRegistration(
        model_path="./",  # Assuming you're running from the model directory
        repo_name="Spec-4B-Vision-V1",  # Change this to your desired repo name
        organization="SVECTOR-CORPORATION",  # Your organization name
        token=TOKEN
    )
    
    # Register the model architecture
    registration.register_model_components()
    
    # Push everything to the Hub
    registration.push_to_hub()
    
if __name__ == "__main__":
    main()