import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline

model = pipeline("text-generation" , model="appvoid/text-arco")

@spaces.GPU
def predict(prompt):
    completion = model(prompt, max_new_tokens=64, temperature=0.3)[0]["generated_text"]
    return completion

gr.Interface(
    fn=predict, 
    inputs="text", 
    outputs="text",
    title="text arco",
    ).launch()