Spaces:

PhantHive
/

Phearion-bigbrain-v0.0.1

Paused

Phearion-bigbrain-v0.0.1

File size: 1,157 Bytes

8194866
c90008b
 
3bf71d2
 
8194866
ef29b9f
 
 
 
 
 
 
 
3bf71d2
98e2a1f
d2222b4
42f33f5
 
 
d2222b4
3bf71d2
d2222b4
 
d08a677
3bf71d2
c90008b
1fdbfe6
3bf71d2
a6a2fe3
 
3bf71d2
d08a677
3bf71d2

import gradio as gr
from
 peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available!")
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

# Load the model and config when the script starts
peft_model_id = "phearion/bigbrain-v0.0.1"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,                          
    torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

def greet(text):
    batch = tokenizer(f"\"{text}\" ->: ", return_tensors='pt')
    
    # Use torch.no_grad to disable gradient calculation
    with torch.no_grad():
        output_tokens = model.generate(**batch, do_sample=True, max_new_tokens=15)

    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)


iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()