flx-upscale

Running on Zero

App Files Files Community

fantaxy commited on Aug 20, 2024

Commit

c866b24

verified ·

1 Parent(s): 054b0cd

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import gradio as gr
 import spaces
 import os
 import uuid
 # Importing the model-related functions
 from stable_audio_tools import get_pretrained_model
@@ -17,12 +18,18 @@ def load_model():
     print("Model loaded successfully.")
     return model, model_config
 # Function to set up, generate, and process the audio
 @spaces.GPU(duration=120)  # Allocate GPU only when this function is called
 def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
-    print(f"Prompt received: {prompt}")
-    print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"Using device: {device}")
@@ -42,7 +49,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
     # Set up text and timing conditioning
     conditioning = [{
-        "prompt": prompt,
         "seconds_start": 0,
         "seconds_total": seconds_total
     }]
@@ -88,17 +95,16 @@ footer {
 }
 """
 # Setting up the Gradio Interface
 interface = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
     fn=generate_audio,
     inputs=[
-        gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
-        gr.Slider(0, 47, value=30, label="Duration in Seconds"),
-        gr.Slider(10, 150, value=100, step=10, label="Number of Diffusion Steps"),
-        gr.Slider(1, 15, value=7, step=0.1, label="CFG Scale")
     ],
-    outputs=gr.Audio(type="filepath", label="Generated Audio"),
 )
 # Pre-load the model to avoid multiprocessing issues

 import spaces
 import os
 import uuid
+from transformers import pipeline
 # Importing the model-related functions
 from stable_audio_tools import get_pretrained_model
     print("Model loaded successfully.")
     return model, model_config
+# 번역 모델 로드
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
 # Function to set up, generate, and process the audio
 @spaces.GPU(duration=120)  # Allocate GPU only when this function is called
 def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
+    print(f"Original Prompt: {prompt}")
+    # 한글 텍스트를 영어로 번역
+    translated_prompt = translator(prompt, max_length=512)[0]['translation_text']
+    print(f"Translated Prompt: {translated_prompt}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"Using device: {device}")
     # Set up text and timing conditioning
     conditioning = [{
+        "prompt": translated_prompt,
         "seconds_start": 0,
         "seconds_total": seconds_total
     }]
 }
 """
 # Setting up the Gradio Interface
 interface = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
     fn=generate_audio,
     inputs=[
+        gr.Textbox(label="프롬프트", placeholder="여기에 텍스트 프롬프트를 입력하세요"),
+        gr.Slider(0, 47, value=30, label="오디오 길이 (초)"),
+        gr.Slider(10, 150, value=100, step=10, label="디퓨전 단계 수"),
+        gr.Slider(1, 15, value=7, step=0.1, label="CFG 스케일")
     ],
+    outputs=gr.Audio(type="filepath", label="생성된 오디오"),
 )
 # Pre-load the model to avoid multiprocessing issues