ruslanmv commited on
Commit
18c8652
·
verified ·
1 Parent(s): e3c35a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -18,6 +18,8 @@ import shutil
18
  import matplotlib.pyplot as plt
19
  import gc # Import the garbage collector
20
  from audio import *
 
 
21
  # Download necessary NLTK data
22
  try:
23
  nltk.data.find('tokenizers/punkt')
@@ -70,8 +72,6 @@ def load_min_dalle_model(models_root: str = 'pretrained', fp16: bool = True):
70
  # Initialize the MinDalle model
71
  min_dalle_model = load_min_dalle_model()
72
 
73
-
74
-
75
  def generate_image_with_min_dalle(
76
  model: MinDalle,
77
  text: str,
@@ -114,10 +114,6 @@ def generate_image_with_min_dalle(
114
  from pydub import AudioSegment
115
  import os
116
 
117
-
118
-
119
-
120
-
121
  # Function to generate video from text
122
  def get_output_video(text):
123
  print("DEBUG: Starting get_output_video function...")
@@ -169,31 +165,46 @@ def get_output_video(text):
169
  assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
170
  sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
171
 
172
- # Add subtitles to images
173
- def draw_multiple_line_text(image, text, font, text_color, text_start_height):
 
 
 
 
 
174
  draw = ImageDraw.Draw(image)
175
- image_width, image_height = image.size
176
  y_text = text_start_height
177
- lines = textwrap.wrap(text, width=40)
178
  for line in lines:
179
  line_width, line_height = font.getbbox(line)[2:]
180
- draw.text(((image_width - line_width) / 2, y_text),
181
- line, font=font, fill=text_color)
182
- y_text += line_height
183
 
184
  def add_text_to_img(text1, image_input):
185
  print(f"DEBUG: Adding text to image: '{text1}'")
186
- fontsize = 30
 
 
 
187
  path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
188
  if not os.path.exists(path_font):
189
  path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
 
190
 
191
- font = ImageFont.truetype(path_font, fontsize)
192
  text_color = (255, 255, 0)
193
- text_start_height = image_input.height - (fontsize * len(textwrap.wrap(text1, width=40))) - 20
194
- draw_multiple_line_text(image_input, text1, font, text_color, text_start_height)
 
 
 
 
 
 
195
  return image_input
196
 
 
 
197
  generated_images_sub = []
198
  for k, image in enumerate(generated_images):
199
  text_to_add = sub_names[k][0]
@@ -201,6 +212,8 @@ def get_output_video(text):
201
  generated_images_sub.append(result)
202
  result.save(f"image_{k}/generated_image_with_subtitles.png")
203
 
 
 
204
  # Generate audio for each subtitle
205
  mp3_names = []
206
  mp3_lengths = []
@@ -277,4 +290,4 @@ with demo:
277
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
278
 
279
  # Launch the Gradio app
280
- demo.launch(debug=True, share=False)
 
18
  import matplotlib.pyplot as plt
19
  import gc # Import the garbage collector
20
  from audio import *
21
+ import os
22
+
23
  # Download necessary NLTK data
24
  try:
25
  nltk.data.find('tokenizers/punkt')
 
72
  # Initialize the MinDalle model
73
  min_dalle_model = load_min_dalle_model()
74
 
 
 
75
  def generate_image_with_min_dalle(
76
  model: MinDalle,
77
  text: str,
 
114
  from pydub import AudioSegment
115
  import os
116
 
 
 
 
 
117
  # Function to generate video from text
118
  def get_output_video(text):
119
  print("DEBUG: Starting get_output_video function...")
 
165
  assert len(generated_images) == len(sentences), "Mismatch in number of images and sentences."
166
  sub_names = [nltk.tokenize.sent_tokenize(sentence) for sentence in sentences]
167
 
168
+ # Add subtitles to images with dynamic adjustments
169
+ def get_dynamic_wrap_width(font, text, image_width, padding):
170
+ # Estimate the number of characters per line dynamically
171
+ avg_char_width = sum(font.getbbox(c)[2] for c in text) / len(text)
172
+ return max(1, (image_width - padding * 2) // avg_char_width)
173
+
174
+ def draw_multiple_line_text(image, text, font, text_color, text_start_height, padding=10):
175
  draw = ImageDraw.Draw(image)
176
+ image_width, _ = image.size
177
  y_text = text_start_height
178
+ lines = textwrap.wrap(text, width=get_dynamic_wrap_width(font, text, image_width, padding))
179
  for line in lines:
180
  line_width, line_height = font.getbbox(line)[2:]
181
+ draw.text(((image_width - line_width) / 2, y_text), line, font=font, fill=text_color)
182
+ y_text += line_height + padding
 
183
 
184
  def add_text_to_img(text1, image_input):
185
  print(f"DEBUG: Adding text to image: '{text1}'")
186
+ # Scale font size dynamically
187
+ base_font_size = 30
188
+ image_width, image_height = image_input.size
189
+ scaled_font_size = max(10, int(base_font_size * (image_width / 800))) # Adjust 800 based on typical image width
190
  path_font = "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"
191
  if not os.path.exists(path_font):
192
  path_font = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
193
+ font = ImageFont.truetype(path_font, scaled_font_size)
194
 
 
195
  text_color = (255, 255, 0)
196
+ padding = 10
197
+
198
+ # Estimate starting height dynamically
199
+ line_height = font.getbbox("A")[3] + padding
200
+ total_text_height = len(textwrap.wrap(text1, get_dynamic_wrap_width(font, text1, image_width, padding))) * line_height
201
+ text_start_height = image_height - total_text_height - 20
202
+
203
+ draw_multiple_line_text(image_input, text1, font, text_color, text_start_height, padding)
204
  return image_input
205
 
206
+
207
+ # Process images with subtitles
208
  generated_images_sub = []
209
  for k, image in enumerate(generated_images):
210
  text_to_add = sub_names[k][0]
 
212
  generated_images_sub.append(result)
213
  result.save(f"image_{k}/generated_image_with_subtitles.png")
214
 
215
+
216
+
217
  # Generate audio for each subtitle
218
  mp3_names = []
219
  mp3_lengths = []
 
290
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
291
 
292
  # Launch the Gradio app
293
+ demo.launch(debug=True, share=True)