Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
demux_media.py — Separate audio, video, and subtitles from any media container without re-encoding. | |
Usage: | |
python demux_media.py /path/to/input.mp4 --outdir out --subs | |
""" | |
import argparse | |
import json | |
import subprocess | |
from pathlib import Path | |
# Map FFmpeg codec names -> common file extensions | |
AUDIO_EXT = { | |
"aac": "m4a", | |
"mp3": "mp3", | |
"flac": "flac", | |
"opus": "opus", | |
"vorbis": "ogg", | |
"eac3": "eac3", | |
"ac3": "ac3", | |
"dts": "dts", | |
"pcm_s16le": "wav", | |
"pcm_s24le": "wav", | |
"alac": "m4a", | |
} | |
SUB_EXT = { | |
"subrip": "srt", | |
"ass": "ass", | |
"ssa": "ssa", | |
"webvtt": "vtt", | |
"mov_text": "srt", | |
} | |
def run(cmd): | |
return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) | |
def ffprobe_streams(infile): | |
cmd = [ | |
"ffprobe", "-v", "error", | |
"-show_entries", "stream=index,codec_type,codec_name:stream_tags=language,title", | |
"-of", "json", str(infile) | |
] | |
out = run(cmd).stdout.decode("utf-8") | |
data = json.loads(out) | |
return data.get("streams", []) | |
def sanitize(s: str) -> str: | |
return "".join(c if c.isalnum() or c in "._-+" else "_" for c in s) | |
def main(): | |
p = argparse.ArgumentParser() | |
p.add_argument("input", type=Path, help="Input media file (any container: mp4, mkv, avi, mov, …)") | |
p.add_argument("--outdir", type=Path, default=Path("."), help="Output directory") | |
p.add_argument("--subs", action="store_true", help="Also extract subtitle tracks") | |
args = p.parse_args() | |
infile = args.input | |
outdir = args.outdir | |
outdir.mkdir(parents=True, exist_ok=True) | |
if not infile.exists(): | |
raise SystemExit(f"Input not found: {infile}") | |
streams = ffprobe_streams(infile) | |
base = sanitize(infile.stem) | |
container_ext = infile.suffix.lstrip(".") # keep same container type | |
# --- Extract VIDEO only --- | |
video_streams = [s for s in streams if s.get("codec_type") == "video"] | |
if video_streams: | |
video_out = outdir / f"{base}_video_only.{container_ext}" | |
cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", "0:v:0", "-c", "copy", str(video_out)] | |
print("Extracting video ->", video_out.name) | |
subprocess.run(cmd, check=True) | |
else: | |
print("No video streams found.") | |
# --- Extract AUDIO tracks --- | |
audio_streams = [s for s in streams if s.get("codec_type") == "audio"] | |
for s in audio_streams: | |
stream_index = s["index"] | |
codec = s.get("codec_name", "audio") | |
tags = s.get("tags", {}) or {} | |
lang = tags.get("language", "und").lower() | |
title = tags.get("title", "") | |
ext = AUDIO_EXT.get(codec, codec) | |
title_part = f"_{sanitize(title)}" if title else "" | |
out_path = outdir / f"{base}_a{stream_index}_{lang}{title_part}.{ext}" | |
ffmpeg_cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", f"0:{stream_index}", "-c", "copy", str(out_path)] | |
print(f"Extracting audio #{stream_index} ({codec}, {lang}) -> {out_path.name}") | |
subprocess.run(ffmpeg_cmd, check=True) | |
if not audio_streams: | |
print("No audio streams found.") | |
# --- Extract SUBTITLES (optional) --- | |
if args.subs: | |
sub_streams = [s for s in streams if s.get("codec_type") == "subtitle"] | |
for s in sub_streams: | |
stream_index = s["index"] | |
codec = s.get("codec_name", "sub") | |
tags = s.get("tags", {}) or {} | |
lang = tags.get("language", "und").lower() | |
title = tags.get("title", "") | |
ext = SUB_EXT.get(codec, f"{codec}.sub") | |
title_part = f"_{sanitize(title)}" if title else "" | |
out_path = outdir / f"{base}_s{stream_index}_{lang}{title_part}.{ext}" | |
ffmpeg_cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", f"0:{stream_index}", "-c", "copy", str(out_path)] | |
print(f"Extracting subtitle #{stream_index} ({codec}, {lang}) -> {out_path.name}") | |
subprocess.run(ffmpeg_cmd, check=True) | |
if not sub_streams: | |
print("No subtitle streams found or --subs not set.") | |
print("Done.") | |
if __name__ == "__main__": | |
main() | |