Source code for asyncyt.builder

"""
builder.py
------------------
Builds yt-dlp CLI commands from a DownloadConfig.
"""

from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING, List

if TYPE_CHECKING:
    from .basemodels import DownloadConfig

from .enums import AudioFormat, Quality

logger = logging.getLogger(__name__)

__all__ = ["build_download_command"]

_QUALITY_FORMAT: dict[str, str] = {
    Quality.BEST: "bestvideo*+bestaudio/best",
    Quality.WORST: "worstvideo*+worstaudio/worst",
    Quality.AUDIO_ONLY: "bestaudio/best",
    Quality.VIDEO_ONLY: "bestvideo/best",
    Quality.LOW_144P: "bestvideo[height<=144]+bestaudio/best[height<=144]/best",
    Quality.LOW_240P: "bestvideo[height<=240]+bestaudio/best[height<=240]/best",
    Quality.SD_480P: "bestvideo[height<=480]+bestaudio/best[height<=480]/best",
    Quality.HD_720P: "bestvideo[height<=720]+bestaudio/best[height<=720]/best",
    Quality.HD_1080P: "bestvideo[height<=1080]+bestaudio/best[height<=1080]/best",
    Quality.HD_1440P: "bestvideo[height<=1440]+bestaudio/best[height<=1440]/best",
    Quality.UHD_4K: "bestvideo[height<=2160]+bestaudio/best[height<=2160]/best",
    Quality.UHD_8K: "bestvideo[height<=4320]+bestaudio/best[height<=4320]/best",
}

# Formats that actually exist as source streams on platforms like YouTube.
# Everything else is a *transcoded* output format — we must not filter by
# ext in the -f selector or yt-dlp will find no matching stream and fall
# back to bestaudio, which is usually opus/webm regardless of what you asked for.
_NATIVE_AUDIO_EXTS = frozenset({"m4a", "mp3", "ogg", "opus", "webm", "aac"})

# FFmpeg codec required to produce each lossless/PCM container correctly.
# Without these, FFmpeg silently defaults to opus inside the container.
_AUDIO_FORMAT_CODEC: dict[str, str] = {
    "wav":  "pcm_s16le",
    "flac": "flac",
    "alac": "alac",
    "aiff": "pcm_s16le",
}


def _format_selector(config: "DownloadConfig") -> str:
    quality = str(config.quality)
    if config.extract_audio:
        if config.audio_format and str(config.audio_format) != AudioFormat.COPY:
            fmt = str(config.audio_format)
            # Only restrict by ext when that format actually exists as a
            # source stream; otherwise just fetch bestaudio and let
            # --audio-format handle the transcode.
            if fmt in _NATIVE_AUDIO_EXTS:
                return f"bestaudio[ext={fmt}]/bestaudio/best"
        return "bestaudio/best"
    return _QUALITY_FORMAT.get(quality, "bestvideo*+bestaudio/best")


[docs] def build_download_command( ytdlp_path: str, ffmpeg_path: str, url: str, config: "DownloadConfig", ) -> List[str]: """ Build a complete yt-dlp CLI command. FFmpeg is invoked via ``--external-downloader ffmpeg`` so that its ``-progress pipe:1`` output lands on yt-dlp's stdout and can be parsed in real-time by AsyncYT's line reader. :param ytdlp_path: Path to yt-dlp binary. :param ffmpeg_path: Path to ffmpeg binary. :param url: Target URL. :param config: DownloadConfig instance. """ cmd: List[str] = [ytdlp_path] # 1. FFmpeg location cmd += ["--ffmpeg-location", ffmpeg_path] # 2. Format / quality cmd += ["-f", _format_selector(config)] # 3. Output template output_path = str(Path(config.output_path).resolve()) if config.custom_filename: template = str(Path(output_path) / config.custom_filename) else: template = str(Path(output_path) / "%(title)s.%(ext)s") cmd += ["-o", template] # 4. Network / reliability if config.proxy: cmd += ["--proxy", config.proxy] if config.rate_limit: cmd += ["-r", config.rate_limit] cmd += ["--retries", str(config.retries)] cmd += ["--fragment-retries", str(config.fragment_retries)] if config.cookies_file: cmd += ["--cookies", config.cookies_file] # 5. Audio extraction if config.extract_audio: cmd += ["--extract-audio"] if config.audio_format and str(config.audio_format) != AudioFormat.COPY: cmd += ["--audio-format", str(config.audio_format)] # 6. Container remux / recode encoding = getattr(config, "encoding", None) if not config.extract_audio and config.video_format: vfmt = str(config.video_format) needs_reencode = encoding and ( (encoding.video and encoding.video.codec) or (encoding.audio and encoding.audio.codec) ) if needs_reencode: cmd += ["--recode-video", vfmt] else: cmd += ["--remux-video", vfmt] # 7. Custom encoding via --postprocessor-args # # For audio formats that require a specific FFmpeg codec (wav, flac, # alac, aiff), we inject -c:a so FFmpeg doesn't silently default to # opus. The user's explicit AudioEncodingConfig.codec always wins; # we only inject the implicit codec when none is set. if config.extract_audio: audio_fmt = str(config.audio_format) if config.audio_format else None implicit_codec = _AUDIO_FORMAT_CODEC.get(audio_fmt or "") if encoding is not None: # User supplied an EncodingConfig — use it, but patch in the # implicit codec if they didn't specify one themselves. if implicit_codec and ( encoding.audio is None or not encoding.audio.codec ): from .encoding import AudioEncodingConfig from .enums import AudioCodec patched_audio = (encoding.audio or AudioEncodingConfig()).model_copy( update={"codec": implicit_codec} ) encoding = encoding.model_copy(update={"audio": patched_audio}) ppa = encoding.build_extract_audio_ppa() if ppa: cmd += ["--postprocessor-args", ppa] elif implicit_codec: # No EncodingConfig at all — inject the bare minimum so FFmpeg # produces the correct codec inside the container. ppa = f"ExtractAudio+ffmpeg_o:-c:a {implicit_codec}" cmd += ["--postprocessor-args", ppa] else: # Video path — only touch postprocessor-args when encoding is set. if encoding is not None: ppa_vc = encoding.build_video_convertor_ppa() if ppa_vc: cmd += ["--postprocessor-args", ppa_vc] ppa_mg = encoding.build_merger_ppa() if ppa_mg: cmd += ["--postprocessor-args", ppa_mg] # 8. External downloader → FFmpeg with real-time -progress output # # We ONLY use --external-downloader ffmpeg when the user has explicitly # requested a re-encode (encoding.video.codec or encoding.audio.codec is # set). For plain downloads and simple remux/container-change we let # yt-dlp use its built-in downloader so we don't trigger an extra FFmpeg # pass and don't double-encode. # # Enabling this unconditionally caused two problems: # 1. FFmpeg was invoked even when no encoding was needed (slow, wasteful). # 2. A second FFmpeg pass was triggered by yt-dlp postprocessors # (embed-thumbnail, embed-subs, embed-metadata), producing a # double-encode artefact visible in the logs as out_time resetting. # # NOTE: we never set --external-downloader for audio-only downloads # because yt-dlp handles extraction via a postprocessor. needs_reencode = ( not config.extract_audio and encoding is not None and ( (encoding.video and encoding.video.codec) or (encoding.audio and encoding.audio.codec) ) ) if needs_reencode: cmd += ["--external-downloader", "ffmpeg"] cmd += [ "--external-downloader-args", "ffmpeg:-progress pipe:1 -loglevel error", ] # 9. Thumbnail if config.write_thumbnail: cmd += ["--write-thumbnail"] if config.embed_thumbnail: cmd += ["--embed-thumbnail"] # 10. Subtitles if config.embed_subs: cmd += ["--embed-subs"] if config.write_subs: cmd += ["--write-subs"] if config.embed_subs or config.write_subs: cmd += ["--sub-langs", config.subtitle_lang] # 11. Metadata if config.embed_metadata: cmd += ["--embed-metadata"] # 12. Misc if config.write_info_json: cmd += ["--write-info-json"] if config.write_live_chat: cmd += ["--write-subs", "--sub-format", "json3"] # 13. Overwrite behaviour overwrite = getattr(encoding, "overwrite", False) if encoding else False if overwrite: cmd += ["--force-overwrites"] else: cmd += ["--no-overwrites"] # 14. Progress output (newline mode for easy line-by-line parsing) cmd += ["--newline"] # 15. Custom yt-dlp options for key, value in (config.custom_options or {}).items(): flag = f"--{key.replace('_', '-')}" if value is True: cmd.append(flag) elif value is not False: cmd += [flag, str(value)] # 16. URL (always last) cmd.append(url) logger.debug("yt-dlp command: %s", " ".join(cmd)) return cmd