Spaces:
Runtime error
Runtime error
File size: 3,637 Bytes
97822ab 3547e01 97822ab 9af533e 97822ab ed92110 97822ab 4cb428b 97822ab ed92110 97822ab ed92110 97822ab 12555c1 989f6d4 12555c1 989f6d4 12555c1 989f6d4 97822ab 12555c1 97822ab 989f6d4 474745c 97822ab 7124f9d 97822ab 474745c 97822ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from transformers import AutoTokenizer, MistralForCausalLM
import torch
import gradio as gr
import random
from textwrap import wrap
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import os
import huggingface
from huggingface_hub import login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(hf_token)
# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
lines = text.split('\n')
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
# Combine user input and system prompt
formatted_input = f"{user_input}{system_prompt}"
# Encode the input text
encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
model_inputs = encodeds.to(device)
# Generate a response using the model
output = model.generate(
**model_inputs,
max_length=max_length,
use_cache=True,
early_stopping=True,
bos_token_id=model.config.bos_token_id,
eos_token_id=model.config.eos_token_id,
pad_token_id=model.config.eos_token_id,
temperature=0.1,
do_sample=True
)
# Decode the response
response_text = tokenizer.decode(output[0], skip_special_tokens=True)
return response_text
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Use the base model's ID
base_model_id = "stabilityai/stablelm-3b-4e1t"
model_directory = "vaishakgkumar/stablemedv1"
# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("vaishakgkumar/stablemedv3", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
# Load the PEFT model
peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
class ChatBot:
def __init__(self):
self.history = []
def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
inputs = tokenizer.apply_chat_template(
prompt,
add_generation_prompt=True,
return_tensors='pt'
)
# Generate a response using the model
tokens = model.generate(
inputs.to(model.device),
max_new_tokens=250,
temperature=0.8,
do_sample=False
)
# Decode the response
response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
# Free up memory
del tokens
torch.cuda.empty_cache()
return response_text
bot = ChatBot()
title = "StableDoc Chat"
description = """
You can use this Space to test out the current model vaishakgkumar/stablemedv3.
"""
iface = gr.Interface(
fn=bot.predict,
title=title,
description=description,
inputs=["text"], # Take user input and system prompt separately
outputs="text",
theme="ParityError/Anime"
)
iface.launch() |