Xu Xuenan commited on
Commit
f7325de
·
1 Parent(s): d0fc9a1

Update app.py

Browse files
Files changed (2) hide show
  1. app.py +62 -5
  2. mm_story_agent/__init__.py +24 -14
app.py CHANGED
@@ -85,7 +85,7 @@ def write_story_fn(story_topic, main_role, scene,
85
  # story_data, story_accordion, story_content
86
  return pages, gr.update(visible=True), pages[current_page], gr.update()
87
 
88
- @spaces.GPU(duration=600)
89
  def modality_assets_generation_fn(
90
  height, width, image_seed, sound_guidance_scale, sound_seed,
91
  n_candidate_per_text, music_duration,
@@ -119,6 +119,57 @@ def modality_assets_generation_fn(
119
  # image gallery
120
  return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def compose_storytelling_video_fn(
123
  fade_duration, slide_duration, zoom_speed, move_ratio,
124
  sound_volume, music_volume, bg_speech_ratio, fps,
@@ -228,10 +279,16 @@ if __name__ == "__main__":
228
  inputs=[gr.State("Generating Modality Assets")],
229
  outputs=video_generation_information
230
  ).then(
231
- fn=modality_assets_generation_fn,
232
- inputs=[height, width, image_seed, sound_guidance_scale, sound_seed,
233
- n_candidate_per_text, music_duration,
234
- story_data],
 
 
 
 
 
 
235
  outputs=[image_gallery]
236
  ).then(
237
  fn=set_generating_progress_text,
 
85
  # story_data, story_accordion, story_content
86
  return pages, gr.update(visible=True), pages[current_page], gr.update()
87
 
88
+ @spaces.GPU()
89
  def modality_assets_generation_fn(
90
  height, width, image_seed, sound_guidance_scale, sound_seed,
91
  n_candidate_per_text, music_duration,
 
119
  # image gallery
120
  return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
121
 
122
+ def speech_generation_fn(story_data):
123
+ story_gen_agent = MMStoryAgent()
124
+ story_gen_agent.generate_speech(config, story_data)
125
+
126
+ @spaces.GPU(duration=120)
127
+ def sound_generation_fn(sound_guidance_scale, sound_seed, n_candidate_per_text,
128
+ story_data, progress=gr.Progress(track_tqdm=True)):
129
+ deep_update(config, {
130
+ "sound_generation": {
131
+ "call_cfg": {
132
+ "guidance_scale": sound_guidance_scale,
133
+ "seed": sound_seed,
134
+ "n_candidate_per_text": n_candidate_per_text
135
+ }
136
+ }
137
+ })
138
+ story_gen_agent = MMStoryAgent()
139
+ story_gen_agent.generate_sound(config, story_data)
140
+
141
+ @spaces.GPU(duration=120)
142
+ def music_generation_fn(music_duration,
143
+ story_data, progress=gr.Progress(track_tqdm=True)):
144
+ deep_update(config, {
145
+ "music_generation": {
146
+ "call_cfg": {
147
+ "duration": music_duration
148
+ }
149
+ }
150
+ })
151
+ story_gen_agent = MMStoryAgent()
152
+ story_gen_agent.generate_music(config, story_data)
153
+
154
+ @spaces.GPU(duration=120)
155
+ def image_generation_fn(height, width, image_seed,
156
+ story_data, progress=gr.Progress(track_tqdm=True)):
157
+ deep_update(config, {
158
+ "image_generation": {
159
+ "obj_cfg": {
160
+ "height": height,
161
+ "width": width,
162
+ },
163
+ "call_cfg": {
164
+ "seed": image_seed
165
+ }
166
+ },
167
+ })
168
+ story_gen_agent = MMStoryAgent()
169
+ result = story_gen_agent.generate_image(config, story_data)
170
+ images = result["images"]
171
+ return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
172
+
173
  def compose_storytelling_video_fn(
174
  fade_duration, slide_duration, zoom_speed, move_ratio,
175
  sound_volume, music_volume, bg_speech_ratio, fps,
 
279
  inputs=[gr.State("Generating Modality Assets")],
280
  outputs=video_generation_information
281
  ).then(
282
+ fn=speech_generation_fn,
283
+ ).then(
284
+ fn=sound_generation_fn,
285
+ inputs=[sound_guidance_scale, sound_seed, n_candidate_per_text, story_data]
286
+ ).then(
287
+ fn=music_generation_fn,
288
+ inputs=[music_duration, story_data]
289
+ ).then(
290
+ fn=image_generation_fn,
291
+ inputs=[height, width, image_seed, story_data],
292
  outputs=[image_gallery]
293
  ).then(
294
  fn=set_generating_progress_text,
mm_story_agent/__init__.py CHANGED
@@ -34,6 +34,22 @@ class MMStoryAgent:
34
  pages = story_writer.call(config["story_setting"])
35
  return pages
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def generate_modality_assets(self, config, pages):
38
  script_data = {"pages": [{"story": page} for page in pages]}
39
  story_dir = Path(config["story_dir"])
@@ -45,22 +61,16 @@ class MMStoryAgent:
45
  for modality in self.modalities:
46
  agents[modality] = self.modality_agent_class[modality](config[modality + "_generation"])
47
 
48
- # processes = []
49
- # return_dict = mp.Manager().dict()
50
 
51
- # for modality in self.modalities:
52
- # p = mp.Process(target=self.call_modality_agent, args=(agents[modality], pages, story_dir / modality, return_dict), daemon=False)
53
- # processes.append(p)
54
- # p.start()
55
-
56
- # for p in processes:
57
- # p.join()
58
-
59
-
60
- return_dict = {}
61
-
62
  for modality in self.modalities:
63
- self.call_modality_agent(agents[modality], pages, story_dir / modality, return_dict)
 
 
 
 
 
64
 
65
  for modality, result in return_dict.items():
66
  try:
 
34
  pages = story_writer.call(config["story_setting"])
35
  return pages
36
 
37
+ def generate_speech(self, config, pages):
38
+ speech_agent = CosyVoiceAgent(config["speech_generation"])
39
+ speech_agent.call(pages, config["story_dir"] / "speech")
40
+
41
+ def generate_sound(self, config, pages):
42
+ sound_agent = AudioLDM2Agent(config["sound_generation"])
43
+ sound_agent.call(pages, config["story_dir"] / "sound")
44
+
45
+ def generate_music(self, config, pages):
46
+ music_agent = MusicGenAgent(config["music_generation"])
47
+ music_agent.call(pages, config["story_dir"] / "music")
48
+
49
+ def generate_image(self, config, pages):
50
+ image_agent = StoryDiffusionAgent(config["image_generation"])
51
+ image_agent.call(pages, config["story_dir"] / "image")
52
+
53
  def generate_modality_assets(self, config, pages):
54
  script_data = {"pages": [{"story": page} for page in pages]}
55
  story_dir = Path(config["story_dir"])
 
61
  for modality in self.modalities:
62
  agents[modality] = self.modality_agent_class[modality](config[modality + "_generation"])
63
 
64
+ processes = []
65
+ return_dict = mp.Manager().dict()
66
 
 
 
 
 
 
 
 
 
 
 
 
67
  for modality in self.modalities:
68
+ p = mp.Process(target=self.call_modality_agent, args=(agents[modality], pages, story_dir / modality, return_dict), daemon=False)
69
+ processes.append(p)
70
+ p.start()
71
+
72
+ for p in processes:
73
+ p.join()
74
 
75
  for modality, result in return_dict.items():
76
  try: