Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,8 @@ import shutil
|
|
18 |
import matplotlib.pyplot as plt
|
19 |
import gc # Import the garbage collector
|
20 |
from audio import *
|
|
|
|
|
21 |
# Download necessary NLTK data
|
22 |
try:
|
23 |
nltk.data.find('tokenizers/punkt')
|
@@ -70,8 +72,6 @@ def load_min_dalle_model(models_root: str = 'pretrained', fp16: bool = True):
|
|
70 |
# Initialize the MinDalle model
|
71 |
min_dalle_model = load_min_dalle_model()
|
72 |
|
73 |
-
|
74 |
-
|
75 |
def generate_image_with_min_dalle(
|
76 |
model: MinDalle,
|
77 |
text: str,
|
@@ -114,10 +114,6 @@ def generate_image_with_min_dalle(
|
|
114 |
from pydub import AudioSegment
|
115 |
import os
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
# Function to generate video from text
|
122 |
def get_output_video(text):
|
123 |
print("DEBUG: Starting get_output_video function...")
|
@@ -169,31 +165,46 @@ def get_output_video(text):
|
|
169 |
assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
|
170 |
sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
|
171 |
|
172 |
-
# Add subtitles to images
|
173 |
-
def
|
|
|
|
|
|
|
|
|
|
|
174 |
draw = ImageDraw.Draw(image)
|
175 |
-
image_width,
|
176 |
y_text = text_start_height
|
177 |
-
lines = textwrap.wrap(text, width=
|
178 |
for line in lines:
|
179 |
line_width, line_height = font.getbbox(line)[2:]
|
180 |
-
draw.text(((image_width - line_width) / 2, y_text),
|
181 |
-
|
182 |
-
y_text += line_height
|
183 |
|
184 |
def add_text_to_img(text1, image_input):
|
185 |
print(f"DEBUG: Adding text to image: '{text1}'")
|
186 |
-
|
|
|
|
|
|
|
187 |
path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
|
188 |
if not os.path.exists(path_font):
|
189 |
path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
|
|
|
190 |
|
191 |
-
font = ImageFont.truetype(path_font, fontsize)
|
192 |
text_color = (255, 255, 0)
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return image_input
|
196 |
|
|
|
|
|
197 |
generated_images_sub = []
|
198 |
for k, image in enumerate(generated_images):
|
199 |
text_to_add = sub_names[k][0]
|
@@ -201,6 +212,8 @@ def get_output_video(text):
|
|
201 |
generated_images_sub.append(result)
|
202 |
result.save(f"image_{k}/generated_image_with_subtitles.png")
|
203 |
|
|
|
|
|
204 |
# Generate audio for each subtitle
|
205 |
mp3_names = []
|
206 |
mp3_lengths = []
|
@@ -277,4 +290,4 @@ with demo:
|
|
277 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
278 |
|
279 |
# Launch the Gradio app
|
280 |
-
demo.launch(debug=True, share=
|
|
|
18 |
import matplotlib.pyplot as plt
|
19 |
import gc # Import the garbage collector
|
20 |
from audio import *
|
21 |
+
import os
|
22 |
+
|
23 |
# Download necessary NLTK data
|
24 |
try:
|
25 |
nltk.data.find('tokenizers/punkt')
|
|
|
72 |
# Initialize the MinDalle model
|
73 |
min_dalle_model = load_min_dalle_model()
|
74 |
|
|
|
|
|
75 |
def generate_image_with_min_dalle(
|
76 |
model: MinDalle,
|
77 |
text: str,
|
|
|
114 |
from pydub import AudioSegment
|
115 |
import os
|
116 |
|
|
|
|
|
|
|
|
|
117 |
# Function to generate video from text
|
118 |
def get_output_video(text):
|
119 |
print("DEBUG: Starting get_output_video function...")
|
|
|
165 |
assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
|
166 |
sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
|
167 |
|
168 |
+
# Add subtitles to images with dynamic adjustments
|
169 |
+
def get_dynamic_wrap_width(font, text, image_width, padding):
|
170 |
+
# Estimate the number of characters per line dynamically
|
171 |
+
avg_char_width = sum(font.getbbox(c)[2] for c in text) / len(text)
|
172 |
+
return max(1, (image_width - padding * 2) // avg_char_width)
|
173 |
+
|
174 |
+
def draw_multiple_line_text(image, text, font, text_color, text_start_height, padding=10):
|
175 |
draw = ImageDraw.Draw(image)
|
176 |
+
image_width, _ = image.size
|
177 |
y_text = text_start_height
|
178 |
+
lines = textwrap.wrap(text, width=get_dynamic_wrap_width(font, text, image_width, padding))
|
179 |
for line in lines:
|
180 |
line_width, line_height = font.getbbox(line)[2:]
|
181 |
+
draw.text(((image_width - line_width) / 2, y_text), line, font=font, fill=text_color)
|
182 |
+
y_text += line_height + padding
|
|
|
183 |
|
184 |
def add_text_to_img(text1, image_input):
|
185 |
print(f"DEBUG: Adding text to image: '{text1}'")
|
186 |
+
# Scale font size dynamically
|
187 |
+
base_font_size = 30
|
188 |
+
image_width, image_height = image_input.size
|
189 |
+
scaled_font_size = max(10, int(base_font_size * (image_width / 800))) # Adjust 800 based on typical image width
|
190 |
path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
|
191 |
if not os.path.exists(path_font):
|
192 |
path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
|
193 |
+
font = ImageFont.truetype(path_font, scaled_font_size)
|
194 |
|
|
|
195 |
text_color = (255, 255, 0)
|
196 |
+
padding = 10
|
197 |
+
|
198 |
+
# Estimate starting height dynamically
|
199 |
+
line_height = font.getbbox("A")[3] + padding
|
200 |
+
total_text_height = len(textwrap.wrap(text1, get_dynamic_wrap_width(font, text1, image_width, padding))) * line_height
|
201 |
+
text_start_height = image_height - total_text_height - 20
|
202 |
+
|
203 |
+
draw_multiple_line_text(image_input, text1, font, text_color, text_start_height, padding)
|
204 |
return image_input
|
205 |
|
206 |
+
|
207 |
+
# Process images with subtitles
|
208 |
generated_images_sub = []
|
209 |
for k, image in enumerate(generated_images):
|
210 |
text_to_add = sub_names[k][0]
|
|
|
212 |
generated_images_sub.append(result)
|
213 |
result.save(f"image_{k}/generated_image_with_subtitles.png")
|
214 |
|
215 |
+
|
216 |
+
|
217 |
# Generate audio for each subtitle
|
218 |
mp3_names = []
|
219 |
mp3_lengths = []
|
|
|
290 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
291 |
|
292 |
# Launch the Gradio app
|
293 |
+
demo.launch(debug=True, share=True)
|