EchoMorph.ai / demux.py
harshitmahour360's picture
Upload 6 files
a4e56ca verified
#!/usr/bin/env python3
"""
demux_media.py — Separate audio, video, and subtitles from any media container without re-encoding.
Usage:
python demux_media.py /path/to/input.mp4 --outdir out --subs
"""
import argparse
import json
import subprocess
from pathlib import Path
# Map FFmpeg codec names -> common file extensions
AUDIO_EXT = {
"aac": "m4a",
"mp3": "mp3",
"flac": "flac",
"opus": "opus",
"vorbis": "ogg",
"eac3": "eac3",
"ac3": "ac3",
"dts": "dts",
"pcm_s16le": "wav",
"pcm_s24le": "wav",
"alac": "m4a",
}
SUB_EXT = {
"subrip": "srt",
"ass": "ass",
"ssa": "ssa",
"webvtt": "vtt",
"mov_text": "srt",
}
def run(cmd):
return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
def ffprobe_streams(infile):
cmd = [
"ffprobe", "-v", "error",
"-show_entries", "stream=index,codec_type,codec_name:stream_tags=language,title",
"-of", "json", str(infile)
]
out = run(cmd).stdout.decode("utf-8")
data = json.loads(out)
return data.get("streams", [])
def sanitize(s: str) -> str:
return "".join(c if c.isalnum() or c in "._-+" else "_" for c in s)
def main():
p = argparse.ArgumentParser()
p.add_argument("input", type=Path, help="Input media file (any container: mp4, mkv, avi, mov, …)")
p.add_argument("--outdir", type=Path, default=Path("."), help="Output directory")
p.add_argument("--subs", action="store_true", help="Also extract subtitle tracks")
args = p.parse_args()
infile = args.input
outdir = args.outdir
outdir.mkdir(parents=True, exist_ok=True)
if not infile.exists():
raise SystemExit(f"Input not found: {infile}")
streams = ffprobe_streams(infile)
base = sanitize(infile.stem)
container_ext = infile.suffix.lstrip(".") # keep same container type
# --- Extract VIDEO only ---
video_streams = [s for s in streams if s.get("codec_type") == "video"]
if video_streams:
video_out = outdir / f"{base}_video_only.{container_ext}"
cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", "0:v:0", "-c", "copy", str(video_out)]
print("Extracting video ->", video_out.name)
subprocess.run(cmd, check=True)
else:
print("No video streams found.")
# --- Extract AUDIO tracks ---
audio_streams = [s for s in streams if s.get("codec_type") == "audio"]
for s in audio_streams:
stream_index = s["index"]
codec = s.get("codec_name", "audio")
tags = s.get("tags", {}) or {}
lang = tags.get("language", "und").lower()
title = tags.get("title", "")
ext = AUDIO_EXT.get(codec, codec)
title_part = f"_{sanitize(title)}" if title else ""
out_path = outdir / f"{base}_a{stream_index}_{lang}{title_part}.{ext}"
ffmpeg_cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", f"0:{stream_index}", "-c", "copy", str(out_path)]
print(f"Extracting audio #{stream_index} ({codec}, {lang}) -> {out_path.name}")
subprocess.run(ffmpeg_cmd, check=True)
if not audio_streams:
print("No audio streams found.")
# --- Extract SUBTITLES (optional) ---
if args.subs:
sub_streams = [s for s in streams if s.get("codec_type") == "subtitle"]
for s in sub_streams:
stream_index = s["index"]
codec = s.get("codec_name", "sub")
tags = s.get("tags", {}) or {}
lang = tags.get("language", "und").lower()
title = tags.get("title", "")
ext = SUB_EXT.get(codec, f"{codec}.sub")
title_part = f"_{sanitize(title)}" if title else ""
out_path = outdir / f"{base}_s{stream_index}_{lang}{title_part}.{ext}"
ffmpeg_cmd = ["ffmpeg", "-y", "-i", str(infile), "-map", f"0:{stream_index}", "-c", "copy", str(out_path)]
print(f"Extracting subtitle #{stream_index} ({codec}, {lang}) -> {out_path.name}")
subprocess.run(ffmpeg_cmd, check=True)
if not sub_streams:
print("No subtitle streams found or --subs not set.")
print("Done.")
if __name__ == "__main__":
main()