"""
builder.py
------------------
Builds yt-dlp CLI commands from a DownloadConfig.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import TYPE_CHECKING, List
if TYPE_CHECKING:
from .basemodels import DownloadConfig
from .enums import AudioFormat, Quality
logger = logging.getLogger(__name__)
__all__ = ["build_download_command"]
_QUALITY_FORMAT: dict[str, str] = {
Quality.BEST: "bestvideo*+bestaudio/best",
Quality.WORST: "worstvideo*+worstaudio/worst",
Quality.AUDIO_ONLY: "bestaudio/best",
Quality.VIDEO_ONLY: "bestvideo/best",
Quality.LOW_144P: "bestvideo[height<=144]+bestaudio/best[height<=144]/best",
Quality.LOW_240P: "bestvideo[height<=240]+bestaudio/best[height<=240]/best",
Quality.SD_480P: "bestvideo[height<=480]+bestaudio/best[height<=480]/best",
Quality.HD_720P: "bestvideo[height<=720]+bestaudio/best[height<=720]/best",
Quality.HD_1080P: "bestvideo[height<=1080]+bestaudio/best[height<=1080]/best",
Quality.HD_1440P: "bestvideo[height<=1440]+bestaudio/best[height<=1440]/best",
Quality.UHD_4K: "bestvideo[height<=2160]+bestaudio/best[height<=2160]/best",
Quality.UHD_8K: "bestvideo[height<=4320]+bestaudio/best[height<=4320]/best",
}
# Formats that actually exist as source streams on platforms like YouTube.
# Everything else is a *transcoded* output format — we must not filter by
# ext in the -f selector or yt-dlp will find no matching stream and fall
# back to bestaudio, which is usually opus/webm regardless of what you asked for.
_NATIVE_AUDIO_EXTS = frozenset({"m4a", "mp3", "ogg", "opus", "webm", "aac"})
# FFmpeg codec required to produce each lossless/PCM container correctly.
# Without these, FFmpeg silently defaults to opus inside the container.
_AUDIO_FORMAT_CODEC: dict[str, str] = {
"wav": "pcm_s16le",
"flac": "flac",
"alac": "alac",
"aiff": "pcm_s16le",
}
def _format_selector(config: "DownloadConfig") -> str:
quality = str(config.quality)
if config.extract_audio:
if config.audio_format and str(config.audio_format) != AudioFormat.COPY:
fmt = str(config.audio_format)
# Only restrict by ext when that format actually exists as a
# source stream; otherwise just fetch bestaudio and let
# --audio-format handle the transcode.
if fmt in _NATIVE_AUDIO_EXTS:
return f"bestaudio[ext={fmt}]/bestaudio/best"
return "bestaudio/best"
return _QUALITY_FORMAT.get(quality, "bestvideo*+bestaudio/best")
[docs]
def build_download_command(
ytdlp_path: str,
ffmpeg_path: str,
url: str,
config: "DownloadConfig",
) -> List[str]:
"""
Build a complete yt-dlp CLI command.
FFmpeg is invoked via ``--external-downloader ffmpeg`` so that its
``-progress pipe:1`` output lands on yt-dlp's stdout and can be parsed
in real-time by AsyncYT's line reader.
:param ytdlp_path: Path to yt-dlp binary.
:param ffmpeg_path: Path to ffmpeg binary.
:param url: Target URL.
:param config: DownloadConfig instance.
"""
cmd: List[str] = [ytdlp_path]
# 1. FFmpeg location
cmd += ["--ffmpeg-location", ffmpeg_path]
# 2. Format / quality
cmd += ["-f", _format_selector(config)]
# 3. Output template
output_path = str(Path(config.output_path).resolve())
if config.custom_filename:
template = str(Path(output_path) / config.custom_filename)
else:
template = str(Path(output_path) / "%(title)s.%(ext)s")
cmd += ["-o", template]
# 4. Network / reliability
if config.proxy:
cmd += ["--proxy", config.proxy]
if config.rate_limit:
cmd += ["-r", config.rate_limit]
cmd += ["--retries", str(config.retries)]
cmd += ["--fragment-retries", str(config.fragment_retries)]
if config.cookies_file:
cmd += ["--cookies", config.cookies_file]
# 5. Audio extraction
if config.extract_audio:
cmd += ["--extract-audio"]
if config.audio_format and str(config.audio_format) != AudioFormat.COPY:
cmd += ["--audio-format", str(config.audio_format)]
# 6. Container remux / recode
encoding = getattr(config, "encoding", None)
if not config.extract_audio and config.video_format:
vfmt = str(config.video_format)
needs_reencode = encoding and (
(encoding.video and encoding.video.codec)
or (encoding.audio and encoding.audio.codec)
)
if needs_reencode:
cmd += ["--recode-video", vfmt]
else:
cmd += ["--remux-video", vfmt]
# 7. Custom encoding via --postprocessor-args
#
# For audio formats that require a specific FFmpeg codec (wav, flac,
# alac, aiff), we inject -c:a so FFmpeg doesn't silently default to
# opus. The user's explicit AudioEncodingConfig.codec always wins;
# we only inject the implicit codec when none is set.
if config.extract_audio:
audio_fmt = str(config.audio_format) if config.audio_format else None
implicit_codec = _AUDIO_FORMAT_CODEC.get(audio_fmt or "")
if encoding is not None:
# User supplied an EncodingConfig — use it, but patch in the
# implicit codec if they didn't specify one themselves.
if implicit_codec and (
encoding.audio is None or not encoding.audio.codec
):
from .encoding import AudioEncodingConfig
from .enums import AudioCodec
patched_audio = (encoding.audio or AudioEncodingConfig()).model_copy(
update={"codec": implicit_codec}
)
encoding = encoding.model_copy(update={"audio": patched_audio})
ppa = encoding.build_extract_audio_ppa()
if ppa:
cmd += ["--postprocessor-args", ppa]
elif implicit_codec:
# No EncodingConfig at all — inject the bare minimum so FFmpeg
# produces the correct codec inside the container.
ppa = f"ExtractAudio+ffmpeg_o:-c:a {implicit_codec}"
cmd += ["--postprocessor-args", ppa]
else:
# Video path — only touch postprocessor-args when encoding is set.
if encoding is not None:
ppa_vc = encoding.build_video_convertor_ppa()
if ppa_vc:
cmd += ["--postprocessor-args", ppa_vc]
ppa_mg = encoding.build_merger_ppa()
if ppa_mg:
cmd += ["--postprocessor-args", ppa_mg]
# 8. External downloader → FFmpeg with real-time -progress output
#
# We ONLY use --external-downloader ffmpeg when the user has explicitly
# requested a re-encode (encoding.video.codec or encoding.audio.codec is
# set). For plain downloads and simple remux/container-change we let
# yt-dlp use its built-in downloader so we don't trigger an extra FFmpeg
# pass and don't double-encode.
#
# Enabling this unconditionally caused two problems:
# 1. FFmpeg was invoked even when no encoding was needed (slow, wasteful).
# 2. A second FFmpeg pass was triggered by yt-dlp postprocessors
# (embed-thumbnail, embed-subs, embed-metadata), producing a
# double-encode artefact visible in the logs as out_time resetting.
#
# NOTE: we never set --external-downloader for audio-only downloads
# because yt-dlp handles extraction via a postprocessor.
needs_reencode = (
not config.extract_audio
and encoding is not None
and (
(encoding.video and encoding.video.codec)
or (encoding.audio and encoding.audio.codec)
)
)
if needs_reencode:
cmd += ["--external-downloader", "ffmpeg"]
cmd += [
"--external-downloader-args",
"ffmpeg:-progress pipe:1 -loglevel error",
]
# 9. Thumbnail
if config.write_thumbnail:
cmd += ["--write-thumbnail"]
if config.embed_thumbnail:
cmd += ["--embed-thumbnail"]
# 10. Subtitles
if config.embed_subs:
cmd += ["--embed-subs"]
if config.write_subs:
cmd += ["--write-subs"]
if config.embed_subs or config.write_subs:
cmd += ["--sub-langs", config.subtitle_lang]
# 11. Metadata
if config.embed_metadata:
cmd += ["--embed-metadata"]
# 12. Misc
if config.write_info_json:
cmd += ["--write-info-json"]
if config.write_live_chat:
cmd += ["--write-subs", "--sub-format", "json3"]
# 13. Overwrite behaviour
overwrite = getattr(encoding, "overwrite", False) if encoding else False
if overwrite:
cmd += ["--force-overwrites"]
else:
cmd += ["--no-overwrites"]
# 14. Progress output (newline mode for easy line-by-line parsing)
cmd += ["--newline"]
# 15. Custom yt-dlp options
for key, value in (config.custom_options or {}).items():
flag = f"--{key.replace('_', '-')}"
if value is True:
cmd.append(flag)
elif value is not False:
cmd += [flag, str(value)]
# 16. URL (always last)
cmd.append(url)
logger.debug("yt-dlp command: %s", " ".join(cmd))
return cmd