import gradio as gr | |
gr.load("models/mistralai/Mistral-7B-v0.1").launch() | |
# import gradio as gr | |
# from transformers import AutoModelForCausalLM, AutoTokenizer | |
# import torch | |
# import os | |
# # Get the Hugging Face token from environment variables | |
# hf_token = os.getenv("API_KEY") | |
# # Load model and tokenizer | |
# model_name = "mistralai/Mistral-7B-v0.1" | |
# model = AutoModelForCausalLM.from_pretrained( | |
# model_name, | |
# device_map="auto", | |
# use_auth_token=hf_token | |
# ) | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# model_name, | |
# use_auth_token=hf_token | |
# ) | |
# # Define the generation function | |
# def generate_response(prompt): | |
# # Tokenize input text | |
# inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
# # Generate response | |
# generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=True) | |
# # Decode and return response | |
# return tokenizer.decode(generated_ids[0], skip_special_tokens=True) | |
# # Set up Gradio interface | |
# with gr.Blocks() as demo: | |
# gr.Markdown("# Text Generation") | |
# input_text = gr.Textbox(placeholder="Enter your input here", lines=2) | |
# output_text = gr.Textbox(label="Generated Output", lines=2) | |
# submit_btn = gr.Button("Generate") | |
# submit_btn.click(generate_response, inputs=input_text, outputs=output_text) | |
# # Launch the interface | |
# if __name__ == "__main__": | |
# demo.launch() | |
# import os | |
# import torch | |
# from transformers import AutoModelForCausalLM, AutoTokenizer | |
# import gradio as gr | |
# # Define device | |
# device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
# # Define model and tokenizer | |
# model_name = "mistralai/Mistral-7B-v0.3" | |
# hf_token = os.getenv("API_KEY") | |
# model = AutoModelForCausalLM.from_pretrained( | |
# model_name, token=hf_token, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True | |
# ).to(device) | |
# tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token) | |
# # Add padding token if not present | |
# if tokenizer.pad_token is None: | |
# tokenizer.pad_token = tokenizer.eos_token | |
# def generate_response(text): | |
# # Tokenize input text without max_length and truncation | |
# inputs = tokenizer( | |
# text, | |
# return_tensors="pt", | |
# padding=True # Ensure padding is applied if necessary | |
# ) | |
# input_ids = inputs['input_ids'].to(device) | |
# attention_mask = inputs['attention_mask'].to(device) | |
# # Generate response | |
# output = model.generate( | |
# input_ids=input_ids, | |
# attention_mask=attention_mask, | |
# max_new_tokens=50 # Adjust based on your needs | |
# ) | |
# response = tokenizer.decode(output[0], skip_special_tokens=True) | |
# return response | |
# # Define Gradio interface | |
# iface = gr.Interface( | |
# fn=generate_response, | |
# inputs=gr.Textbox(placeholder="Enter your input here", lines=2), | |
# outputs=gr.Textbox(), | |
# title="Text Generation with Mistral", | |
# description="Enter some text and get a response from the Mistral-7B model." | |
# ) | |
# iface.launch() | |