import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer from threading import Thread import torch # Load the model and tokenizer model_name = "Qwen/Qwen2.5-Coder-1.5B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True) def generate_code(prompt, max_length): inputs = tokenizer(prompt, return_tensors="pt") streamer = TextIteratorStreamer(tokenizer) generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=int(max_length)) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" for new_text in streamer: generated_text += new_text yield generated_text # Gradio interface iface = gr.Interface( fn=generate_code, inputs=[ gr.Textbox(lines=5, label="Enter your prompt"), gr.Slider(minimum=50, maximum=500, value=200, step=10, label="Max Length") ], outputs=gr.Code(language="python", label="Generated Code"), title="Qwen2.5-Coder-1.5B Code Generator", description="Enter a prompt to generate Python code using Qwen2.5-Coder-1.5B", examples=[ ["Write a Python function to calculate the factorial of a number.", 200], ["Create a class representing a simple bank account with deposit and withdraw methods.", 300] ] ) iface.launch()