File size: 3,051 Bytes
f7c9fe2 234fb26 e6d5f2a 234fb26 e6d5f2a 234fb26 f7c9fe2 234fb26 f7c9fe2 234fb26 ff201e3 61012aa 45631d4 e6d5f2a 61012aa e6d5f2a 21fe9a0 e6d5f2a 21fe9a0 e6d5f2a 1a97e58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
gr.load("models/mistralai/Mistral-7B-v0.1").launch()
# import gradio as gr
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# import os
# # Get the Hugging Face token from environment variables
# hf_token = os.getenv("API_KEY")
# # Load model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.1"
# model = AutoModelForCausalLM.from_pretrained(
# model_name,
# device_map="auto",
# use_auth_token=hf_token
# )
# tokenizer = AutoTokenizer.from_pretrained(
# model_name,
# use_auth_token=hf_token
# )
# # Define the generation function
# def generate_response(prompt):
# # Tokenize input text
# inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
# # Generate response
# generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=True)
# # Decode and return response
# return tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# # Set up Gradio interface
# with gr.Blocks() as demo:
# gr.Markdown("# Text Generation")
# input_text = gr.Textbox(placeholder="Enter your input here", lines=2)
# output_text = gr.Textbox(label="Generated Output", lines=2)
# submit_btn = gr.Button("Generate")
# submit_btn.click(generate_response, inputs=input_text, outputs=output_text)
# # Launch the interface
# if __name__ == "__main__":
# demo.launch()
# import os
# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import gradio as gr
# # Define device
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# # Define model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.3"
# hf_token = os.getenv("API_KEY")
# model = AutoModelForCausalLM.from_pretrained(
# model_name, token=hf_token, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
# ).to(device)
# tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
# # Add padding token if not present
# if tokenizer.pad_token is None:
# tokenizer.pad_token = tokenizer.eos_token
# def generate_response(text):
# # Tokenize input text without max_length and truncation
# inputs = tokenizer(
# text,
# return_tensors="pt",
# padding=True # Ensure padding is applied if necessary
# )
# input_ids = inputs['input_ids'].to(device)
# attention_mask = inputs['attention_mask'].to(device)
# # Generate response
# output = model.generate(
# input_ids=input_ids,
# attention_mask=attention_mask,
# max_new_tokens=50 # Adjust based on your needs
# )
# response = tokenizer.decode(output[0], skip_special_tokens=True)
# return response
# # Define Gradio interface
# iface = gr.Interface(
# fn=generate_response,
# inputs=gr.Textbox(placeholder="Enter your input here", lines=2),
# outputs=gr.Textbox(),
# title="Text Generation with Mistral",
# description="Enter some text and get a response from the Mistral-7B model."
# )
# iface.launch()
|