feat: enhance SaveVideo with DynamicCombo for codec-specific options

Major changes:
- SaveVideo now uses DynamicCombo to show codec-specific encoding options
- When selecting h264 or vp9, quality and speed inputs appear dynamically
- Each codec has unique ADVANCED options (shown in 'Show Advanced' section):
  - h264: Profile (baseline/main/high), Tune (film/animation/grain/etc.)
  - vp9: Row Multi-threading, Tile Columns
- Quality maps to CRF internally with codec-appropriate ranges
- Speed presets map to FFmpeg presets

New types:
- VideoSpeedPreset enum with user-friendly names
- quality_to_crf() helper function

This demonstrates both DynamicCombo (codec-specific inputs) and advanced
widgets (profile/tune/row_mt/tile_columns hidden by default).

Amp-Thread-ID: https://ampcode.com/threads/T-019bce44-e743-7349-8bad-d3027d456fb1
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
bymyself
2026-01-17 16:23:00 -08:00
parent 36b9673a33
commit 86317e32b8
6 changed files with 389 additions and 42 deletions

View File

@@ -7,7 +7,7 @@ from comfy_api.internal.singleton import ProxiedSingleton
from comfy_api.internal.async_to_sync import create_sync_class
from ._input import ImageInput, AudioInput, MaskInput, LatentInput, VideoInput
from ._input_impl import VideoFromFile, VideoFromComponents
from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL
from ._util import VideoCodec, VideoContainer, VideoComponents, VideoSpeedPreset, MESH, VOXEL
from . import _io_public as io
from . import _ui_public as ui
from comfy_execution.utils import get_executing_context
@@ -103,6 +103,7 @@ class Types:
VideoCodec = VideoCodec
VideoContainer = VideoContainer
VideoComponents = VideoComponents
VideoSpeedPreset = VideoSpeedPreset
MESH = MESH
VOXEL = VOXEL

View File

@@ -10,7 +10,7 @@ import json
import numpy as np
import math
import torch
from .._util import VideoContainer, VideoCodec, VideoComponents
from .._util import VideoContainer, VideoCodec, VideoComponents, VideoSpeedPreset, quality_to_crf
def container_to_output_format(container_format: str | None) -> str | None:
@@ -250,10 +250,16 @@ class VideoFromFile(VideoInput):
path: str | io.BytesIO,
format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None
metadata: Optional[dict] = None,
quality: Optional[int] = None,
speed: Optional[VideoSpeedPreset] = None,
profile: Optional[str] = None,
tune: Optional[str] = None,
row_mt: bool = True,
tile_columns: Optional[int] = None,
):
if isinstance(self.__file, io.BytesIO):
self.__file.seek(0) # Reset the BytesIO object to the beginning
self.__file.seek(0)
with av.open(self.__file, mode='r') as container:
container_format = container.format.name
video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None
@@ -262,6 +268,10 @@ class VideoFromFile(VideoInput):
reuse_streams = False
if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
reuse_streams = False
if quality is not None or speed is not None:
reuse_streams = False
if profile is not None or tune is not None or tile_columns is not None:
reuse_streams = False
if not reuse_streams:
components = self.get_components_internal(container)
@@ -270,7 +280,13 @@ class VideoFromFile(VideoInput):
path,
format=format,
codec=codec,
metadata=metadata
metadata=metadata,
quality=quality,
speed=speed,
profile=profile,
tune=tune,
row_mt=row_mt,
tile_columns=tile_columns,
)
streams = container.streams
@@ -330,54 +346,128 @@ class VideoFromComponents(VideoInput):
path: str,
format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None
metadata: Optional[dict] = None,
quality: Optional[int] = None,
speed: Optional[VideoSpeedPreset] = None,
profile: Optional[str] = None,
tune: Optional[str] = None,
row_mt: bool = True,
tile_columns: Optional[int] = None,
):
if format != VideoContainer.AUTO and format != VideoContainer.MP4:
raise ValueError("Only MP4 format is supported for now")
if codec != VideoCodec.AUTO and codec != VideoCodec.H264:
raise ValueError("Only H264 codec is supported for now")
"""
Save video to file with optional encoding parameters.
Args:
path: Output file path
format: Container format (mp4, webm, or auto)
codec: Video codec (h264, vp9, or auto)
metadata: Optional metadata dict to embed
quality: Quality percentage 0-100 (100=best). Maps to CRF internally.
speed: Encoding speed preset. Slower = better compression.
profile: H.264 profile (baseline, main, high)
tune: H.264 tune option (film, animation, grain, etc.)
row_mt: VP9 row-based multi-threading
tile_columns: VP9 tile columns (power of 2)
"""
resolved_format = format
resolved_codec = codec
if resolved_format == VideoContainer.AUTO:
resolved_format = VideoContainer.MP4
if resolved_codec == VideoCodec.AUTO:
if resolved_format == VideoContainer.WEBM:
resolved_codec = VideoCodec.VP9
else:
resolved_codec = VideoCodec.H264
if resolved_format == VideoContainer.WEBM and resolved_codec == VideoCodec.H264:
raise ValueError("H264 codec is not supported with WebM container")
if resolved_format == VideoContainer.MP4 and resolved_codec == VideoCodec.VP9:
raise ValueError("VP9 codec is not supported with MP4 container")
codec_map = {
VideoCodec.H264: "libx264",
VideoCodec.VP9: "libvpx-vp9",
}
ffmpeg_codec = codec_map.get(resolved_codec, "libx264")
extra_kwargs = {}
if isinstance(format, VideoContainer) and format != VideoContainer.AUTO:
extra_kwargs["format"] = format.value
with av.open(path, mode='w', options={'movflags': 'use_metadata_tags'}, **extra_kwargs) as output:
# Add metadata before writing any streams
if resolved_format != VideoContainer.AUTO:
extra_kwargs["format"] = resolved_format.value
container_options = {}
if resolved_format == VideoContainer.MP4:
container_options["movflags"] = "use_metadata_tags"
with av.open(path, mode='w', options=container_options, **extra_kwargs) as output:
if metadata is not None:
for key, value in metadata.items():
output.metadata[key] = json.dumps(value)
frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000)
# Create a video stream
video_stream = output.add_stream('h264', rate=frame_rate)
video_stream = output.add_stream(ffmpeg_codec, rate=frame_rate)
video_stream.width = self.__components.images.shape[2]
video_stream.height = self.__components.images.shape[1]
video_stream.pix_fmt = 'yuv420p'
# Create an audio stream
if resolved_codec == VideoCodec.VP9:
video_stream.pix_fmt = 'yuv420p'
video_stream.bit_rate = 0
else:
video_stream.pix_fmt = 'yuv420p'
if quality is not None:
crf = quality_to_crf(quality, ffmpeg_codec)
video_stream.options['crf'] = str(crf)
if speed is not None and speed != VideoSpeedPreset.AUTO:
if isinstance(speed, str):
speed = VideoSpeedPreset(speed)
preset = speed.to_ffmpeg_preset(ffmpeg_codec)
if resolved_codec == VideoCodec.VP9:
video_stream.options['cpu-used'] = preset
else:
video_stream.options['preset'] = preset
# H.264-specific options
if resolved_codec == VideoCodec.H264:
if profile is not None:
video_stream.options['profile'] = profile
if tune is not None:
video_stream.options['tune'] = tune
# VP9-specific options
if resolved_codec == VideoCodec.VP9:
if row_mt:
video_stream.options['row-mt'] = '1'
if tile_columns is not None:
video_stream.options['tile-columns'] = str(tile_columns)
audio_sample_rate = 1
audio_stream: Optional[av.AudioStream] = None
if self.__components.audio:
audio_sample_rate = int(self.__components.audio['sample_rate'])
audio_stream = output.add_stream('aac', rate=audio_sample_rate)
audio_codec = 'libopus' if resolved_format == VideoContainer.WEBM else 'aac'
audio_stream = output.add_stream(audio_codec, rate=audio_sample_rate)
# Encode video
for i, frame in enumerate(self.__components.images):
img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
frame = av.VideoFrame.from_ndarray(img, format='rgb24')
frame = frame.reformat(format='yuv420p') # Convert to YUV420P as required by h264
packet = video_stream.encode(frame)
img = (frame * 255).clamp(0, 255).byte().cpu().numpy()
video_frame = av.VideoFrame.from_ndarray(img, format='rgb24')
video_frame = video_frame.reformat(format='yuv420p')
packet = video_stream.encode(video_frame)
output.mux(packet)
# Flush video
packet = video_stream.encode(None)
output.mux(packet)
if audio_stream and self.__components.audio:
waveform = self.__components.audio['waveform']
waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo')
frame.sample_rate = audio_sample_rate
frame.pts = 0
output.mux(audio_stream.encode(frame))
# Flush encoder
audio_frame = av.AudioFrame.from_ndarray(
waveform.movedim(2, 1).reshape(1, -1).float().numpy(),
format='flt',
layout='mono' if waveform.shape[1] == 1 else 'stereo'
)
audio_frame.sample_rate = audio_sample_rate
audio_frame.pts = 0
output.mux(audio_stream.encode(audio_frame))
output.mux(audio_stream.encode(None))

View File

@@ -1,4 +1,4 @@
from .video_types import VideoContainer, VideoCodec, VideoComponents
from .video_types import VideoContainer, VideoCodec, VideoComponents, VideoSpeedPreset, quality_to_crf
from .geometry_types import VOXEL, MESH
from .image_types import SVG
@@ -7,6 +7,8 @@ __all__ = [
"VideoContainer",
"VideoCodec",
"VideoComponents",
"VideoSpeedPreset",
"quality_to_crf",
"VOXEL",
"MESH",
"SVG",

View File

@@ -8,6 +8,7 @@ from .._input import ImageInput, AudioInput
class VideoCodec(str, Enum):
AUTO = "auto"
H264 = "h264"
VP9 = "vp9"
@classmethod
def as_input(cls) -> list[str]:
@@ -16,9 +17,11 @@ class VideoCodec(str, Enum):
"""
return [member.value for member in cls]
class VideoContainer(str, Enum):
AUTO = "auto"
MP4 = "mp4"
WEBM = "webm"
@classmethod
def as_input(cls) -> list[str]:
@@ -36,8 +39,71 @@ class VideoContainer(str, Enum):
value = cls(value)
if value == VideoContainer.MP4 or value == VideoContainer.AUTO:
return "mp4"
if value == VideoContainer.WEBM:
return "webm"
return ""
class VideoSpeedPreset(str, Enum):
"""Encoding speed presets - slower = better compression at same quality."""
AUTO = "auto"
FASTEST = "Fastest"
FAST = "Fast"
BALANCED = "Balanced"
QUALITY = "Quality"
BEST = "Best"
@classmethod
def as_input(cls) -> list[str]:
return [member.value for member in cls]
def to_ffmpeg_preset(self, codec: str = "h264") -> str:
"""Convert to FFmpeg preset string for the given codec."""
h264_map = {
VideoSpeedPreset.FASTEST: "ultrafast",
VideoSpeedPreset.FAST: "veryfast",
VideoSpeedPreset.BALANCED: "medium",
VideoSpeedPreset.QUALITY: "slow",
VideoSpeedPreset.BEST: "veryslow",
VideoSpeedPreset.AUTO: "medium",
}
vp9_map = {
VideoSpeedPreset.FASTEST: "0",
VideoSpeedPreset.FAST: "1",
VideoSpeedPreset.BALANCED: "2",
VideoSpeedPreset.QUALITY: "3",
VideoSpeedPreset.BEST: "4",
VideoSpeedPreset.AUTO: "2",
}
if codec in ("vp9", "libvpx-vp9"):
return vp9_map.get(self, "2")
return h264_map.get(self, "medium")
def quality_to_crf(quality: int, codec: str = "h264") -> int:
"""
Map 0-100 quality percentage to codec-appropriate CRF value.
Args:
quality: 0-100 where 100 is best quality
codec: The codec being used (h264, vp9, etc.)
Returns:
CRF value appropriate for the codec
"""
quality = max(0, min(100, quality))
if codec in ("h264", "libx264"):
# h264: CRF 0-51 (lower = better), typical range 12-40
# quality 100 → CRF 12, quality 0 → CRF 40
return int(40 - (quality / 100) * 28)
elif codec in ("vp9", "libvpx-vp9"):
# vp9: CRF 0-63 (lower = better), typical range 15-50
# quality 100 → CRF 15, quality 0 → CRF 50
return int(50 - (quality / 100) * 35)
# Default fallback
return 23
@dataclass
class VideoComponents:
"""