from fastapi import FastAPI, HTTPException, BackgroundTasks from huggingface_hub import snapshot_download, HfApi import os import shutil import asyncio from concurrent.futures import ThreadPoolExecutor from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() app = FastAPI(title="Hugging Face Model Transfer Service", version="1.0.0") # Thread pool for running blocking operations executor = ThreadPoolExecutor(max_workers=2) # Local directory to save downloaded model data temporarily DOWNLOAD_DIR = "./downloaded_model_data" # Ensure the download directory exists os.makedirs(DOWNLOAD_DIR, exist_ok=True) # Hardcoded model repository ID HARDCODED_MODEL_REPO_ID = "openai/gpt-oss-120b" # Change this to your desired model # Hardcoded dataset repository ID HARDCODED_DATASET_REPO_ID = "Fred808/helium_memory" # Change this to your dataset # Hardcoded path in repository HARDCODED_PATH_IN_REPO = "model_data2/" # Get Hugging Face token from environment variable HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") if not HF_TOKEN: raise ValueError("HUGGINGFACE_HUB_TOKEN environment variable is not set") def download_full_model() -> str: """Downloads the hardcoded model from Hugging Face model repository.""" print(f"Downloading hardcoded model {HARDCODED_MODEL_REPO_ID}...") try: local_dir = snapshot_download( repo_id=HARDCODED_MODEL_REPO_ID, cache_dir=DOWNLOAD_DIR, token=HF_TOKEN ) print(f"Downloaded to: {local_dir}") return local_dir except Exception as e: print(f"Download failed: {str(e)}") raise def upload_folder_to_dataset(folder_path: str): """Uploads a folder to the hardcoded Hugging Face dataset repository.""" api = HfApi(token=HF_TOKEN) print(f"Uploading {folder_path} to {HARDCODED_DATASET_REPO_ID} at {HARDCODED_PATH_IN_REPO}...") try: api.upload_folder( folder_path=folder_path, path_in_repo=HARDCODED_PATH_IN_REPO, repo_id=HARDCODED_DATASET_REPO_ID, repo_type="dataset", ) print("Upload complete!") except Exception as e: print(f"Upload failed: {str(e)}") raise def cleanup_download(local_dir: str): """Clean up downloaded files.""" try: if os.path.exists(local_dir): shutil.rmtree(local_dir) print(f"Cleaned up: {local_dir}") except Exception as e: print(f"Cleanup failed: {str(e)}") async def transfer_model(): """Download the hardcoded model and upload it to the hardcoded dataset repository.""" try: # Download the model loop = asyncio.get_event_loop() local_dir = await loop.run_in_executor( executor, download_full_model ) # Upload to dataset await loop.run_in_executor( executor, upload_folder_to_dataset, local_dir ) # Clean up downloaded files cleanup_download(local_dir) print(f"Model {HARDCODED_MODEL_REPO_ID} transferred successfully to {HARDCODED_DATASET_REPO_ID}") except Exception as e: print(f"Transfer failed: {str(e)}") raise @app.get("/") async def root(): """Health check endpoint.""" return { "message": "Hugging Face Model Transfer Service is running", "hardcoded_model": HARDCODED_MODEL_REPO_ID, "hardcoded_dataset": HARDCODED_DATASET_REPO_ID } @app.on_event("startup") async def startup_event(): """Run the transfer process when the application starts.""" print("Starting model transfer process...") asyncio.create_task(transfer_model()) @app.get("/status") async def get_status(): """Get server status and available disk space.""" try: disk_usage = shutil.disk_usage(DOWNLOAD_DIR) return { "status": "healthy", "download_dir": DOWNLOAD_DIR, "disk_space": { "total": disk_usage.total, "used": disk_usage.used, "free": disk_usage.free }, "model": HARDCODED_MODEL_REPO_ID, "dataset": HARDCODED_DATASET_REPO_ID } except Exception as e: raise HTTPException(status_code=500, detail=f"Status check failed: {str(e)}") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)