import gradio as gr | |
import transformers | |
# Load the model and tokenizer | |
model_name = "Qwen/CodeQwen1.5-7B-Chat" | |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto") | |
max_new_tokens:int=2048 | |
do_sample:bool=True | |
num_beams:int=1 | |
temperature:float=0.5 | |
top_p:float=0.95 | |
top_k:float=40 | |
repetition_penalty:float=1.1 | |
pipe = transformers.pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=max_new_tokens, | |
do_sample=do_sample, | |
num_beams=num_beams, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
repetition_penalty=repetition_penalty, | |
) | |
def generate_response(input_text): | |
messages = [ | |
{ | |
"role": "system", "content": "You are a helpful coding chatbot. You will answer the user's questions to the best of your ability.", | |
"role": "user", "content": input_text, | |
}, | |
] | |
return pipe(messages)[0]['generated_text'][-1]['content'].replace("\\n", "\n") | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs="text", | |
outputs="text", | |
title="Artigenz Coder - 6.7B Model", | |
description="A code-generation model from Artigenz. Enter a prompt to get code suggestions or completions." | |
) | |
# Launch the interface | |
iface.launch() | |