File size: 3,051 Bytes
f7c9fe2
234fb26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6d5f2a
234fb26
 
e6d5f2a
234fb26
 
 
 
 
 
f7c9fe2
234fb26
f7c9fe2
234fb26
 
 
ff201e3
 
 
61012aa
45631d4
e6d5f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61012aa
e6d5f2a
 
21fe9a0
e6d5f2a
 
 
 
 
 
21fe9a0
e6d5f2a
 
 
 
 
 
 
 
 
 
 
 
 
1a97e58
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr

gr.load("models/mistralai/Mistral-7B-v0.1").launch()


# import gradio as gr
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# import os

# # Get the Hugging Face token from environment variables
# hf_token = os.getenv("API_KEY")



# # Load model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.1"
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     use_auth_token=hf_token  
# )
# tokenizer = AutoTokenizer.from_pretrained(
#     model_name,
#     use_auth_token=hf_token  
# )

# # Define the generation function
# def generate_response(prompt):
#     # Tokenize input text
#     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

#     # Generate response
#     generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=True)
    
#     # Decode and return response
#     return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

# # Set up Gradio interface
# with gr.Blocks() as demo:
#     gr.Markdown("# Text Generation")
#     input_text = gr.Textbox(placeholder="Enter your input here", lines=2)
#     output_text = gr.Textbox(label="Generated Output", lines=2)
#     submit_btn = gr.Button("Generate")

#     submit_btn.click(generate_response, inputs=input_text, outputs=output_text)

# # Launch the interface
# if __name__ == "__main__":
#     demo.launch()





# import os
# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import gradio as gr

# # Define device
# device = 'cuda' if torch.cuda.is_available() else 'cpu'

# # Define model and tokenizer
# model_name = "mistralai/Mistral-7B-v0.3"
# hf_token = os.getenv("API_KEY")

# model = AutoModelForCausalLM.from_pretrained(
#     model_name, token=hf_token, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
# ).to(device)

# tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

# # Add padding token if not present
# if tokenizer.pad_token is None:
#     tokenizer.pad_token = tokenizer.eos_token

# def generate_response(text):
#     # Tokenize input text without max_length and truncation
#     inputs = tokenizer(
#         text,
#         return_tensors="pt",
#         padding=True  # Ensure padding is applied if necessary
#     )
    
#     input_ids = inputs['input_ids'].to(device)
#     attention_mask = inputs['attention_mask'].to(device)
    
#     # Generate response
#     output = model.generate(
#         input_ids=input_ids,
#         attention_mask=attention_mask,
#         max_new_tokens=50  # Adjust based on your needs
#     )
    
#     response = tokenizer.decode(output[0], skip_special_tokens=True)
#     return response

# # Define Gradio interface
# iface = gr.Interface(
#     fn=generate_response,
#     inputs=gr.Textbox(placeholder="Enter your input here", lines=2),
#     outputs=gr.Textbox(),
#     title="Text Generation with Mistral",
#     description="Enter some text and get a response from the Mistral-7B model."
# )

# iface.launch()