import gradio as gr from PIL import Image import cv2 as cv import torch from RealESRGAN import RealESRGAN import tempfile import numpy as np import tqdm import ffmpeg import spaces device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @spaces.GPU(duration=60) def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image: if img is None: raise Exception("Image not uploaded") width, height = img.size if width >= 5000 or height >= 5000: raise Exception("The image is too large.") model = RealESRGAN(device, scale=size_modifier) model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False) result = model.predict(img.convert('RGB')) print(f"Image size ({device}): {size_modifier} ... OK") return result @spaces.GPU(duration=120) def infer_video(video_filepath: str, size_modifier: int) -> str: model = RealESRGAN(device, scale=size_modifier) model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False) cap = cv.VideoCapture(video_filepath) tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) vid_output = tmpfile.name tmpfile.close() # Check if the input video has an audio stream probe = ffmpeg.probe(video_filepath) has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams']) if has_audio: # Extract audio from the input video audio_file = video_filepath.replace(".mp4", ".wav") ffmpeg.input(video_filepath).output(audio_file, format='wav', ac=1).run(overwrite_output=True) vid_writer = cv.VideoWriter( vid_output, fourcc=cv.VideoWriter.fourcc(*'mp4v'), fps=cap.get(cv.CAP_PROP_FPS), frameSize=(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) * size_modifier) ) n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) for _ in tqdm.tqdm(range(n_frames)): ret, frame = cap.read() if not ret: break frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) frame = Image.fromarray(frame) upscaled_frame = model.predict(frame.convert('RGB')) upscaled_frame = np.array(upscaled_frame) upscaled_frame = cv.cvtColor(upscaled_frame, cv.COLOR_RGB2BGR) vid_writer.write(upscaled_frame) vid_writer.release() if has_audio: # Re-encode the video with the modified audio ffmpeg.input(vid_output).output(video_filepath.replace(".mp4", "_upscaled.mp4"), vcodec='libx264', acodec='aac', audio_bitrate='320k').run(overwrite_output=True) # Replace the original audio with the upscaled audio ffmpeg.input(audio_file).output(video_filepath.replace(".mp4", "_upscaled.mp4"), acodec='aac', audio_bitrate='320k').run(overwrite_output=True) print(f"Video file : {video_filepath}") return vid_output.replace(".mp4", "_upscaled.mp4") if has_audio else vid_output input_image = gr.Image(type='pil', label='Input Image') input_model_image = gr.Radio([('x2', 2), ('x4', 4), ('x8', 8)], type="value", value=4, label="Model Upscale/Enhance Type") submit_image_button = gr.Button('Submit') output_image = gr.Image(type="filepath", label="Output Image") tab_img = gr.Interface( fn=infer_image, inputs=[input_image, input_model_image], outputs=output_image, title="Real-ESRGAN", description="Gradio UI for Real-ESRGAN Pytorch version. To use it, simply upload your image and choose the model. Read more at the links below. Please click submit only once
Credits: [Nick088](https://linktr.ee/Nick088), Xinntao, Tencent, Geeve George, ai-forever, daroche

Github Repo

" ) input_video = gr.Video(label='Input Video') input_model_video = gr.Radio([('x2', 2), ('x4', 4), ('x8', 8)], type="value", value=2, label="Model Upscale/Enhance Type") submit_video_button = gr.Button('Submit') output_video = gr.Video(label='Output Video', autoplay = True, loop = True) tab_vid = gr.Interface( fn=infer_video, inputs=[input_video, input_model_video], outputs=output_video, title="Real-ESRGAN", description="Gradio UI for Real-ESRGAN Pytorch version. To use it, simply upload your video and choose the model. Read more at the links below. Please click submit only once
Credits: [Nick088](https://linktr.ee/Nick088), Xinntao, Tencent, Geeve George, ai-forever, daroche

Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data | Github Repo

", examples=[ [ "RealESRGAN_examples/Example1.mp4", 2 ] ], cache_examples=True, cache_mode='lazy' ) demo = gr.TabbedInterface([tab_img, tab_vid], ["Image", "Video"]) demo.launch(mcp_server=True, debug=True, show_error=True, share=True)