gradiotest / app.py
Abrahamau's picture
Update app.py
33da63d verified
raw
history blame
2.88 kB
import torch
import os
import random
import gradio as gr
from TTS.api import TTS
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
import spaces
@spaces.GPU
def guessanImage(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU
def guessanAge(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU(duration=120)
def text2speech(text, sample):
print(TTS().list_models())
print(voice)
if len(text) > 0:
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
wav = tts.tts(text="Hello world!", speaker_wav=sample, language="en")
return wav
@spaces.GPU
def ImageGenFromText(text, model):
api_key = os.getenv("fluxauth")
login(token=api_key)
if len(text) > 0:
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
seed = random.randint(0, MAX_SEED)
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt = text,
width = 512,
height = 512,
num_inference_steps = 4,
generator = generator,
guidance_scale=0.0
).images[0]
print(image)
return image
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
fn=guessanImage,
inputs=[radio1, gr.Image(type="pil")],
outputs=["text"],
)
radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
fn=guessanAge,
inputs=[radio2, gr.Image(type="pil")],
outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
tab3 = gr.Interface(
fn=text2speech,
inputs=[textbox, "microphone"],
outputs=["audio"],
)
radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
fn=ImageGenFromText,
inputs=["text", radio4],
outputs=["image"],
)
demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["Describe", "Estimage Age", "Speak", "Generate Image"])
demo.launch()