Spaces:
Runtime error
Runtime error
File size: 4,540 Bytes
2f3d8a1 8d024ac 8af42ca aa0a087 984a212 2f3d8a1 8d024ac f508df4 8d024ac 8af42ca 8d024ac 8af42ca 8d024ac 8933be3 984a212 afd8ffa 8d024ac 984a212 2f3d8a1 984a212 2f3d8a1 8933be3 2f3d8a1 8933be3 2f3d8a1 8933be3 2f3d8a1 3384297 2f3d8a1 afd8ffa 2f3d8a1 a066be5 2f3d8a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import gradio as gr
import os
from gradio_client import Client, handle_file
import numpy as np
import tempfile
import imageio
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
hf_token = os.environ.get("HF_TOKEN")
def get_caption(image_in):
kosmos2_client = Client("fffiloni/Kosmos-2-API", hf_token=hf_token)
kosmos2_result = kosmos2_client.predict(
image_input=handle_file(image_in),
text_input="Detailed",
api_name="/generate_predictions"
)
print(f"KOSMOS2 RETURNS: {kosmos2_result}")
data = kosmos2_result[1]
# Extract and combine tokens starting from the second element
sentence = ''.join(item['token'] for item in data[1:])
# Find the last occurrence of "."
#last_period_index = full_sentence.rfind('.')
# Truncate the string up to the last period
#truncated_caption = full_sentence[:last_period_index + 1]
# print(truncated_caption)
#print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
return sentence
def export_to_video(frames: np.ndarray, fps: int) -> str:
frames = np.clip((frames * 255), 0, 255).astype(np.uint8)
out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
writer = imageio.get_writer(out_file.name, format="FFMPEG", fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
def infer(image_init, progress=gr.Progress(track_tqdm=True)):
prompt = get_caption(image_init)
video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames[0]
video_path = export_to_video(video_frames, 12)
print(video_path)
return prompt, video_path
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {
display: flex;
padding-left: 0.5rem !important;
padding-right: 0.5rem !important;
background-color: #000000;
justify-content: center;
align-items: center;
border-radius: 9999px !important;
max-width: 13rem;
}
#share-btn-container:hover {
background-color: #060606;
}
#share-btn {
all: initial;
color: #ffffff;
font-weight: 600;
cursor:pointer;
font-family: 'IBM Plex Sans', sans-serif;
margin-left: 0.5rem !important;
padding-top: 0.5rem !important;
padding-bottom: 0.5rem !important;
right:0;
}
#share-btn * {
all: unset;
}
#share-btn-container div:nth-child(-n+2){
width: auto !important;
min-height: 0px !important;
}
#share-btn-container .wrap {
display: none !important;
}
#share-btn-container.hidden {
display: none!important;
}
img[src*='#center'] {
display: block;
margin: auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(
"""
<h1 style="text-align: center;">Zeroscope Image-to-Video</h1>
<p style="text-align: center;">
A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br />
This demo is a variation that lets you upload an image as reference for video generation.
</p>
[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/zeroscope-img-to-video?duplicate=true)
"""
)
image_init = gr.Image(label="Image Init", type="filepath", sources=["upload"], elem_id="image-init")
#inference_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=40, interactive=False)
submit_btn = gr.Button("Submit")
coca_cap = gr.Textbox(label="Caption", placeholder="Kosmos-2 caption will be displayed here", elem_id="coca-cap-in")
video_result = gr.Video(label="Video Output", elem_id="video-output")
submit_btn.click(
fn=infer,
inputs=[image_init],
outputs=[coca_cap, video_result],
show_api=False
)
demo.queue(max_size=12).launch(show_api=False)
|