File size: 3,661 Bytes
30bb964
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import torch
import os
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from peft import PeftModel
from huggingface_hub import HfApi
import shutil
import gc

# Configuration - UPDATE THESE
LORA_REPO = "your-username/qwen25vl-lora-adapter"  # Your LoRA repo
OUTPUT_REPO = "your-username/qwen25vl-invoice-merged"  # Output repo
BASE_MODEL = "unsloth/Qwen2.5-VL-7B-Instruct"
HF_TOKEN = os.environ.get("HF_TOKEN")  # Set in Space secrets

def merge_model():
    """Merge LoRA with base model and upload to Hub"""
    try:
        # Use Space's disk efficiently
        work_dir = "/tmp/merge"
        if os.path.exists(work_dir):
            shutil.rmtree(work_dir)
        os.makedirs(work_dir)
        
        # Update status
        yield "Loading base model..."
        
        # Load model with CPU offload to save GPU memory
        model = Qwen2VLForConditionalGeneration.from_pretrained(
            BASE_MODEL,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
            cache_dir=work_dir,
            low_cpu_mem_usage=True
        )
        
        yield "Loading processor..."
        processor = AutoProcessor.from_pretrained(
            BASE_MODEL,
            trust_remote_code=True,
            cache_dir=work_dir
        )
        
        yield "Loading LoRA adapter from Hub..."
        model = PeftModel.from_pretrained(model, LORA_REPO)
        
        yield "Merging weights... This may take a few minutes..."
        model = model.merge_and_unload()
        
        # Clear GPU cache
        torch.cuda.empty_cache()
        gc.collect()
        
        yield "Saving merged model..."
        output_dir = os.path.join(work_dir, "merged")
        os.makedirs(output_dir, exist_ok=True)
        
        # Save with smaller shards
        model.save_pretrained(
            output_dir,
            max_shard_size="2GB",
            safe_serialization=True
        )
        processor.save_pretrained(output_dir)
        
        yield "Uploading to HuggingFace Hub..."
        api = HfApi(token=HF_TOKEN)
        
        # Create output repo
        api.create_repo(OUTPUT_REPO, exist_ok=True, private=True)
        
        # Upload the merged model
        api.upload_folder(
            folder_path=output_dir,
            repo_id=OUTPUT_REPO,
            repo_type="model",
            commit_message="Merged LoRA adapter with base model"
        )
        
        # Cleanup
        shutil.rmtree(work_dir)
        
        yield f"βœ… Success! Model merged and uploaded to: {OUTPUT_REPO}"
        
    except Exception as e:
        yield f"❌ Error: {str(e)}"
        # Cleanup on error
        if os.path.exists("/tmp/merge"):
            shutil.rmtree("/tmp/merge")

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="Qwen2.5-VL LoRA Merger") as demo:
        gr.Markdown(
            """
            # Qwen2.5-VL LoRA Merger
            
            This Space will merge your LoRA adapter with the base model and upload to HuggingFace.
            
            **Configuration:**
            - Base Model: `{}`
            - LoRA Adapter: `{}`
            - Output Repo: `{}`
            """.format(BASE_MODEL, LORA_REPO, OUTPUT_REPO)
        )
        
        status = gr.Textbox(label="Status", lines=10)
        merge_btn = gr.Button("Start Merge", variant="primary")
        
        merge_btn.click(
            fn=merge_model,
            inputs=[],
            outputs=[status]
        )
        
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()