|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
def load_model(): |
|
""" |
|
Load the DeepSeek-R1 model. |
|
Note: We rely on flash_attn, so this should now work |
|
once PyTorch+CUDA and flash_attn are installed. |
|
""" |
|
try: |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"deepseek-ai/DeepSeek-R1", |
|
trust_remote_code=True |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"deepseek-ai/DeepSeek-R1", |
|
trust_remote_code=True |
|
) |
|
|
|
return pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
except Exception as e: |
|
return f"Model Loading Error: {e}" |
|
|
|
model_pipeline = load_model() |
|
|
|
def process_text(input_text): |
|
""" |
|
Uses the loaded DeepSeek-R1 pipeline to generate text. |
|
""" |
|
if isinstance(model_pipeline, str): |
|
return f"Error: {model_pipeline}" |
|
try: |
|
|
|
outputs = model_pipeline(input_text, max_length=200, num_return_sequences=1) |
|
return outputs[0]["generated_text"] |
|
except Exception as e: |
|
return f"Inference Error: {e}" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
"# DeepSeek-R1 Text Generator\n" |
|
"Enter a prompt and generate text using the DeepSeek-R1 model." |
|
) |
|
input_box = gr.Textbox( |
|
lines=5, label="Input Prompt", placeholder="Type your prompt here..." |
|
) |
|
generate_btn = gr.Button("Generate") |
|
output_box = gr.Textbox( |
|
lines=10, label="Generated Text", placeholder="Generated text appears here..." |
|
) |
|
|
|
generate_btn.click(fn=process_text, inputs=input_box, outputs=output_box) |
|
|
|
demo.launch() |
|
|