Spaces:
Running
on
L4
Running
on
L4
Xu Xuenan
commited on
Commit
·
f7325de
1
Parent(s):
d0fc9a1
Update app.py
Browse files- app.py +62 -5
- mm_story_agent/__init__.py +24 -14
app.py
CHANGED
@@ -85,7 +85,7 @@ def write_story_fn(story_topic, main_role, scene,
|
|
85 |
# story_data, story_accordion, story_content
|
86 |
return pages, gr.update(visible=True), pages[current_page], gr.update()
|
87 |
|
88 |
-
@spaces.GPU(
|
89 |
def modality_assets_generation_fn(
|
90 |
height, width, image_seed, sound_guidance_scale, sound_seed,
|
91 |
n_candidate_per_text, music_duration,
|
@@ -119,6 +119,57 @@ def modality_assets_generation_fn(
|
|
119 |
# image gallery
|
120 |
return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
def compose_storytelling_video_fn(
|
123 |
fade_duration, slide_duration, zoom_speed, move_ratio,
|
124 |
sound_volume, music_volume, bg_speech_ratio, fps,
|
@@ -228,10 +279,16 @@ if __name__ == "__main__":
|
|
228 |
inputs=[gr.State("Generating Modality Assets")],
|
229 |
outputs=video_generation_information
|
230 |
).then(
|
231 |
-
fn=
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
outputs=[image_gallery]
|
236 |
).then(
|
237 |
fn=set_generating_progress_text,
|
|
|
85 |
# story_data, story_accordion, story_content
|
86 |
return pages, gr.update(visible=True), pages[current_page], gr.update()
|
87 |
|
88 |
+
@spaces.GPU()
|
89 |
def modality_assets_generation_fn(
|
90 |
height, width, image_seed, sound_guidance_scale, sound_seed,
|
91 |
n_candidate_per_text, music_duration,
|
|
|
119 |
# image gallery
|
120 |
return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
|
121 |
|
122 |
+
def speech_generation_fn(story_data):
|
123 |
+
story_gen_agent = MMStoryAgent()
|
124 |
+
story_gen_agent.generate_speech(config, story_data)
|
125 |
+
|
126 |
+
@spaces.GPU(duration=120)
|
127 |
+
def sound_generation_fn(sound_guidance_scale, sound_seed, n_candidate_per_text,
|
128 |
+
story_data, progress=gr.Progress(track_tqdm=True)):
|
129 |
+
deep_update(config, {
|
130 |
+
"sound_generation": {
|
131 |
+
"call_cfg": {
|
132 |
+
"guidance_scale": sound_guidance_scale,
|
133 |
+
"seed": sound_seed,
|
134 |
+
"n_candidate_per_text": n_candidate_per_text
|
135 |
+
}
|
136 |
+
}
|
137 |
+
})
|
138 |
+
story_gen_agent = MMStoryAgent()
|
139 |
+
story_gen_agent.generate_sound(config, story_data)
|
140 |
+
|
141 |
+
@spaces.GPU(duration=120)
|
142 |
+
def music_generation_fn(music_duration,
|
143 |
+
story_data, progress=gr.Progress(track_tqdm=True)):
|
144 |
+
deep_update(config, {
|
145 |
+
"music_generation": {
|
146 |
+
"call_cfg": {
|
147 |
+
"duration": music_duration
|
148 |
+
}
|
149 |
+
}
|
150 |
+
})
|
151 |
+
story_gen_agent = MMStoryAgent()
|
152 |
+
story_gen_agent.generate_music(config, story_data)
|
153 |
+
|
154 |
+
@spaces.GPU(duration=120)
|
155 |
+
def image_generation_fn(height, width, image_seed,
|
156 |
+
story_data, progress=gr.Progress(track_tqdm=True)):
|
157 |
+
deep_update(config, {
|
158 |
+
"image_generation": {
|
159 |
+
"obj_cfg": {
|
160 |
+
"height": height,
|
161 |
+
"width": width,
|
162 |
+
},
|
163 |
+
"call_cfg": {
|
164 |
+
"seed": image_seed
|
165 |
+
}
|
166 |
+
},
|
167 |
+
})
|
168 |
+
story_gen_agent = MMStoryAgent()
|
169 |
+
result = story_gen_agent.generate_image(config, story_data)
|
170 |
+
images = result["images"]
|
171 |
+
return gr.update(visible=True, value=images, columns=[len(images)], rows=[1], height="auto")
|
172 |
+
|
173 |
def compose_storytelling_video_fn(
|
174 |
fade_duration, slide_duration, zoom_speed, move_ratio,
|
175 |
sound_volume, music_volume, bg_speech_ratio, fps,
|
|
|
279 |
inputs=[gr.State("Generating Modality Assets")],
|
280 |
outputs=video_generation_information
|
281 |
).then(
|
282 |
+
fn=speech_generation_fn,
|
283 |
+
).then(
|
284 |
+
fn=sound_generation_fn,
|
285 |
+
inputs=[sound_guidance_scale, sound_seed, n_candidate_per_text, story_data]
|
286 |
+
).then(
|
287 |
+
fn=music_generation_fn,
|
288 |
+
inputs=[music_duration, story_data]
|
289 |
+
).then(
|
290 |
+
fn=image_generation_fn,
|
291 |
+
inputs=[height, width, image_seed, story_data],
|
292 |
outputs=[image_gallery]
|
293 |
).then(
|
294 |
fn=set_generating_progress_text,
|
mm_story_agent/__init__.py
CHANGED
@@ -34,6 +34,22 @@ class MMStoryAgent:
|
|
34 |
pages = story_writer.call(config["story_setting"])
|
35 |
return pages
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def generate_modality_assets(self, config, pages):
|
38 |
script_data = {"pages": [{"story": page} for page in pages]}
|
39 |
story_dir = Path(config["story_dir"])
|
@@ -45,22 +61,16 @@ class MMStoryAgent:
|
|
45 |
for modality in self.modalities:
|
46 |
agents[modality] = self.modality_agent_class[modality](config[modality + "_generation"])
|
47 |
|
48 |
-
|
49 |
-
|
50 |
|
51 |
-
# for modality in self.modalities:
|
52 |
-
# p = mp.Process(target=self.call_modality_agent, args=(agents[modality], pages, story_dir / modality, return_dict), daemon=False)
|
53 |
-
# processes.append(p)
|
54 |
-
# p.start()
|
55 |
-
|
56 |
-
# for p in processes:
|
57 |
-
# p.join()
|
58 |
-
|
59 |
-
|
60 |
-
return_dict = {}
|
61 |
-
|
62 |
for modality in self.modalities:
|
63 |
-
self.call_modality_agent(agents[modality], pages, story_dir / modality, return_dict)
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
for modality, result in return_dict.items():
|
66 |
try:
|
|
|
34 |
pages = story_writer.call(config["story_setting"])
|
35 |
return pages
|
36 |
|
37 |
+
def generate_speech(self, config, pages):
|
38 |
+
speech_agent = CosyVoiceAgent(config["speech_generation"])
|
39 |
+
speech_agent.call(pages, config["story_dir"] / "speech")
|
40 |
+
|
41 |
+
def generate_sound(self, config, pages):
|
42 |
+
sound_agent = AudioLDM2Agent(config["sound_generation"])
|
43 |
+
sound_agent.call(pages, config["story_dir"] / "sound")
|
44 |
+
|
45 |
+
def generate_music(self, config, pages):
|
46 |
+
music_agent = MusicGenAgent(config["music_generation"])
|
47 |
+
music_agent.call(pages, config["story_dir"] / "music")
|
48 |
+
|
49 |
+
def generate_image(self, config, pages):
|
50 |
+
image_agent = StoryDiffusionAgent(config["image_generation"])
|
51 |
+
image_agent.call(pages, config["story_dir"] / "image")
|
52 |
+
|
53 |
def generate_modality_assets(self, config, pages):
|
54 |
script_data = {"pages": [{"story": page} for page in pages]}
|
55 |
story_dir = Path(config["story_dir"])
|
|
|
61 |
for modality in self.modalities:
|
62 |
agents[modality] = self.modality_agent_class[modality](config[modality + "_generation"])
|
63 |
|
64 |
+
processes = []
|
65 |
+
return_dict = mp.Manager().dict()
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
for modality in self.modalities:
|
68 |
+
p = mp.Process(target=self.call_modality_agent, args=(agents[modality], pages, story_dir / modality, return_dict), daemon=False)
|
69 |
+
processes.append(p)
|
70 |
+
p.start()
|
71 |
+
|
72 |
+
for p in processes:
|
73 |
+
p.join()
|
74 |
|
75 |
for modality, result in return_dict.items():
|
76 |
try:
|