mistral_setup1 / app.py
VidyaPeddinti's picture
Update app.py
234fb26 verified
import gradio as gr
gr.load("models/mistralai/Mistral-7B-v0.1").launch()
# import gradio as gr
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# import os
# # Get the Hugging Face token from environment variables
# hf_token = os.getenv("API_KEY")
# # Load model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.1"
# model = AutoModelForCausalLM.from_pretrained(
# model_name,
# device_map="auto",
# use_auth_token=hf_token
# )
# tokenizer = AutoTokenizer.from_pretrained(
# model_name,
# use_auth_token=hf_token
# )
# # Define the generation function
# def generate_response(prompt):
# # Tokenize input text
# inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
# # Generate response
# generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=True)
# # Decode and return response
# return tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# # Set up Gradio interface
# with gr.Blocks() as demo:
# gr.Markdown("# Text Generation")
# input_text = gr.Textbox(placeholder="Enter your input here", lines=2)
# output_text = gr.Textbox(label="Generated Output", lines=2)
# submit_btn = gr.Button("Generate")
# submit_btn.click(generate_response, inputs=input_text, outputs=output_text)
# # Launch the interface
# if __name__ == "__main__":
# demo.launch()
# import os
# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import gradio as gr
# # Define device
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# # Define model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.3"
# hf_token = os.getenv("API_KEY")
# model = AutoModelForCausalLM.from_pretrained(
# model_name, token=hf_token, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
# ).to(device)
# tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
# # Add padding token if not present
# if tokenizer.pad_token is None:
# tokenizer.pad_token = tokenizer.eos_token
# def generate_response(text):
# # Tokenize input text without max_length and truncation
# inputs = tokenizer(
# text,
# return_tensors="pt",
# padding=True # Ensure padding is applied if necessary
# )
# input_ids = inputs['input_ids'].to(device)
# attention_mask = inputs['attention_mask'].to(device)
# # Generate response
# output = model.generate(
# input_ids=input_ids,
# attention_mask=attention_mask,
# max_new_tokens=50 # Adjust based on your needs
# )
# response = tokenizer.decode(output[0], skip_special_tokens=True)
# return response
# # Define Gradio interface
# iface = gr.Interface(
# fn=generate_response,
# inputs=gr.Textbox(placeholder="Enter your input here", lines=2),
# outputs=gr.Textbox(),
# title="Text Generation with Mistral",
# description="Enter some text and get a response from the Mistral-7B model."
# )
# iface.launch()