Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import os | |
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration | |
from peft import PeftModel | |
from huggingface_hub import HfApi | |
import shutil | |
import gc | |
# Configuration - UPDATE THESE | |
LORA_REPO = "your-username/qwen25vl-lora-adapter" # Your LoRA repo | |
OUTPUT_REPO = "your-username/qwen25vl-invoice-merged" # Output repo | |
BASE_MODEL = "unsloth/Qwen2.5-VL-7B-Instruct" | |
HF_TOKEN = os.environ.get("HF_TOKEN") # Set in Space secrets | |
def merge_model(): | |
"""Merge LoRA with base model and upload to Hub""" | |
try: | |
# Use Space's disk efficiently | |
work_dir = "/tmp/merge" | |
if os.path.exists(work_dir): | |
shutil.rmtree(work_dir) | |
os.makedirs(work_dir) | |
# Update status | |
yield "Loading base model..." | |
# Load model with CPU offload to save GPU memory | |
model = Qwen2VLForConditionalGeneration.from_pretrained( | |
BASE_MODEL, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True, | |
cache_dir=work_dir, | |
low_cpu_mem_usage=True | |
) | |
yield "Loading processor..." | |
processor = AutoProcessor.from_pretrained( | |
BASE_MODEL, | |
trust_remote_code=True, | |
cache_dir=work_dir | |
) | |
yield "Loading LoRA adapter from Hub..." | |
model = PeftModel.from_pretrained(model, LORA_REPO) | |
yield "Merging weights... This may take a few minutes..." | |
model = model.merge_and_unload() | |
# Clear GPU cache | |
torch.cuda.empty_cache() | |
gc.collect() | |
yield "Saving merged model..." | |
output_dir = os.path.join(work_dir, "merged") | |
os.makedirs(output_dir, exist_ok=True) | |
# Save with smaller shards | |
model.save_pretrained( | |
output_dir, | |
max_shard_size="2GB", | |
safe_serialization=True | |
) | |
processor.save_pretrained(output_dir) | |
yield "Uploading to HuggingFace Hub..." | |
api = HfApi(token=HF_TOKEN) | |
# Create output repo | |
api.create_repo(OUTPUT_REPO, exist_ok=True, private=True) | |
# Upload the merged model | |
api.upload_folder( | |
folder_path=output_dir, | |
repo_id=OUTPUT_REPO, | |
repo_type="model", | |
commit_message="Merged LoRA adapter with base model" | |
) | |
# Cleanup | |
shutil.rmtree(work_dir) | |
yield f"β Success! Model merged and uploaded to: {OUTPUT_REPO}" | |
except Exception as e: | |
yield f"β Error: {str(e)}" | |
# Cleanup on error | |
if os.path.exists("/tmp/merge"): | |
shutil.rmtree("/tmp/merge") | |
# Create Gradio interface | |
def create_interface(): | |
with gr.Blocks(title="Qwen2.5-VL LoRA Merger") as demo: | |
gr.Markdown( | |
""" | |
# Qwen2.5-VL LoRA Merger | |
This Space will merge your LoRA adapter with the base model and upload to HuggingFace. | |
**Configuration:** | |
- Base Model: `{}` | |
- LoRA Adapter: `{}` | |
- Output Repo: `{}` | |
""".format(BASE_MODEL, LORA_REPO, OUTPUT_REPO) | |
) | |
status = gr.Textbox(label="Status", lines=10) | |
merge_btn = gr.Button("Start Merge", variant="primary") | |
merge_btn.click( | |
fn=merge_model, | |
inputs=[], | |
outputs=[status] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |