import gradio as gr import torch import os from transformers import AutoProcessor, Qwen2VLForConditionalGeneration from peft import PeftModel from huggingface_hub import HfApi import shutil import gc # Configuration - UPDATE THESE LORA_REPO = "your-username/qwen25vl-lora-adapter" # Your LoRA repo OUTPUT_REPO = "your-username/qwen25vl-invoice-merged" # Output repo BASE_MODEL = "unsloth/Qwen2.5-VL-7B-Instruct" HF_TOKEN = os.environ.get("HF_TOKEN") # Set in Space secrets def merge_model(): """Merge LoRA with base model and upload to Hub""" try: # Use Space's disk efficiently work_dir = "/tmp/merge" if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) # Update status yield "Loading base model..." # Load model with CPU offload to save GPU memory model = Qwen2VLForConditionalGeneration.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, cache_dir=work_dir, low_cpu_mem_usage=True ) yield "Loading processor..." processor = AutoProcessor.from_pretrained( BASE_MODEL, trust_remote_code=True, cache_dir=work_dir ) yield "Loading LoRA adapter from Hub..." model = PeftModel.from_pretrained(model, LORA_REPO) yield "Merging weights... This may take a few minutes..." model = model.merge_and_unload() # Clear GPU cache torch.cuda.empty_cache() gc.collect() yield "Saving merged model..." output_dir = os.path.join(work_dir, "merged") os.makedirs(output_dir, exist_ok=True) # Save with smaller shards model.save_pretrained( output_dir, max_shard_size="2GB", safe_serialization=True ) processor.save_pretrained(output_dir) yield "Uploading to HuggingFace Hub..." api = HfApi(token=HF_TOKEN) # Create output repo api.create_repo(OUTPUT_REPO, exist_ok=True, private=True) # Upload the merged model api.upload_folder( folder_path=output_dir, repo_id=OUTPUT_REPO, repo_type="model", commit_message="Merged LoRA adapter with base model" ) # Cleanup shutil.rmtree(work_dir) yield f"✅ Success! Model merged and uploaded to: {OUTPUT_REPO}" except Exception as e: yield f"❌ Error: {str(e)}" # Cleanup on error if os.path.exists("/tmp/merge"): shutil.rmtree("/tmp/merge") # Create Gradio interface def create_interface(): with gr.Blocks(title="Qwen2.5-VL LoRA Merger") as demo: gr.Markdown( """ # Qwen2.5-VL LoRA Merger This Space will merge your LoRA adapter with the base model and upload to HuggingFace. **Configuration:** - Base Model: `{}` - LoRA Adapter: `{}` - Output Repo: `{}` """.format(BASE_MODEL, LORA_REPO, OUTPUT_REPO) ) status = gr.Textbox(label="Status", lines=10) merge_btn = gr.Button("Start Merge", variant="primary") merge_btn.click( fn=merge_model, inputs=[], outputs=[status] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch()