diff --git a/comfy_api/latest/__init__.py b/comfy_api/latest/__init__.py index b0fa14ff6..7932efe76 100644 --- a/comfy_api/latest/__init__.py +++ b/comfy_api/latest/__init__.py @@ -7,7 +7,7 @@ from comfy_api.internal.singleton import ProxiedSingleton from comfy_api.internal.async_to_sync import create_sync_class from ._input import ImageInput, AudioInput, MaskInput, LatentInput, VideoInput from ._input_impl import VideoFromFile, VideoFromComponents -from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL +from ._util import VideoCodec, VideoContainer, VideoComponents, VideoSpeedPreset, MESH, VOXEL from . import _io_public as io from . import _ui_public as ui from comfy_execution.utils import get_executing_context @@ -103,6 +103,7 @@ class Types: VideoCodec = VideoCodec VideoContainer = VideoContainer VideoComponents = VideoComponents + VideoSpeedPreset = VideoSpeedPreset MESH = MESH VOXEL = VOXEL diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index ea35c6062..ffd105c18 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -10,7 +10,7 @@ import json import numpy as np import math import torch -from .._util import VideoContainer, VideoCodec, VideoComponents +from .._util import VideoContainer, VideoCodec, VideoComponents, VideoSpeedPreset, quality_to_crf def container_to_output_format(container_format: str | None) -> str | None: @@ -250,10 +250,16 @@ class VideoFromFile(VideoInput): path: str | io.BytesIO, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, + quality: Optional[int] = None, + speed: Optional[VideoSpeedPreset] = None, + profile: Optional[str] = None, + tune: Optional[str] = None, + row_mt: bool = True, + tile_columns: Optional[int] = None, ): if isinstance(self.__file, io.BytesIO): - self.__file.seek(0) # Reset the BytesIO object to the beginning + self.__file.seek(0) with av.open(self.__file, mode='r') as container: container_format = container.format.name video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None @@ -262,6 +268,10 @@ class VideoFromFile(VideoInput): reuse_streams = False if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: reuse_streams = False + if quality is not None or speed is not None: + reuse_streams = False + if profile is not None or tune is not None or tile_columns is not None: + reuse_streams = False if not reuse_streams: components = self.get_components_internal(container) @@ -270,7 +280,13 @@ class VideoFromFile(VideoInput): path, format=format, codec=codec, - metadata=metadata + metadata=metadata, + quality=quality, + speed=speed, + profile=profile, + tune=tune, + row_mt=row_mt, + tile_columns=tile_columns, ) streams = container.streams @@ -330,54 +346,128 @@ class VideoFromComponents(VideoInput): path: str, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, + quality: Optional[int] = None, + speed: Optional[VideoSpeedPreset] = None, + profile: Optional[str] = None, + tune: Optional[str] = None, + row_mt: bool = True, + tile_columns: Optional[int] = None, ): - if format != VideoContainer.AUTO and format != VideoContainer.MP4: - raise ValueError("Only MP4 format is supported for now") - if codec != VideoCodec.AUTO and codec != VideoCodec.H264: - raise ValueError("Only H264 codec is supported for now") + """ + Save video to file with optional encoding parameters. + + Args: + path: Output file path + format: Container format (mp4, webm, or auto) + codec: Video codec (h264, vp9, or auto) + metadata: Optional metadata dict to embed + quality: Quality percentage 0-100 (100=best). Maps to CRF internally. + speed: Encoding speed preset. Slower = better compression. + profile: H.264 profile (baseline, main, high) + tune: H.264 tune option (film, animation, grain, etc.) + row_mt: VP9 row-based multi-threading + tile_columns: VP9 tile columns (power of 2) + """ + resolved_format = format + resolved_codec = codec + + if resolved_format == VideoContainer.AUTO: + resolved_format = VideoContainer.MP4 + if resolved_codec == VideoCodec.AUTO: + if resolved_format == VideoContainer.WEBM: + resolved_codec = VideoCodec.VP9 + else: + resolved_codec = VideoCodec.H264 + + if resolved_format == VideoContainer.WEBM and resolved_codec == VideoCodec.H264: + raise ValueError("H264 codec is not supported with WebM container") + if resolved_format == VideoContainer.MP4 and resolved_codec == VideoCodec.VP9: + raise ValueError("VP9 codec is not supported with MP4 container") + + codec_map = { + VideoCodec.H264: "libx264", + VideoCodec.VP9: "libvpx-vp9", + } + ffmpeg_codec = codec_map.get(resolved_codec, "libx264") + extra_kwargs = {} - if isinstance(format, VideoContainer) and format != VideoContainer.AUTO: - extra_kwargs["format"] = format.value - with av.open(path, mode='w', options={'movflags': 'use_metadata_tags'}, **extra_kwargs) as output: - # Add metadata before writing any streams + if resolved_format != VideoContainer.AUTO: + extra_kwargs["format"] = resolved_format.value + + container_options = {} + if resolved_format == VideoContainer.MP4: + container_options["movflags"] = "use_metadata_tags" + + with av.open(path, mode='w', options=container_options, **extra_kwargs) as output: if metadata is not None: for key, value in metadata.items(): output.metadata[key] = json.dumps(value) frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000) - # Create a video stream - video_stream = output.add_stream('h264', rate=frame_rate) + video_stream = output.add_stream(ffmpeg_codec, rate=frame_rate) video_stream.width = self.__components.images.shape[2] video_stream.height = self.__components.images.shape[1] - video_stream.pix_fmt = 'yuv420p' - # Create an audio stream + if resolved_codec == VideoCodec.VP9: + video_stream.pix_fmt = 'yuv420p' + video_stream.bit_rate = 0 + else: + video_stream.pix_fmt = 'yuv420p' + + if quality is not None: + crf = quality_to_crf(quality, ffmpeg_codec) + video_stream.options['crf'] = str(crf) + + if speed is not None and speed != VideoSpeedPreset.AUTO: + if isinstance(speed, str): + speed = VideoSpeedPreset(speed) + preset = speed.to_ffmpeg_preset(ffmpeg_codec) + if resolved_codec == VideoCodec.VP9: + video_stream.options['cpu-used'] = preset + else: + video_stream.options['preset'] = preset + + # H.264-specific options + if resolved_codec == VideoCodec.H264: + if profile is not None: + video_stream.options['profile'] = profile + if tune is not None: + video_stream.options['tune'] = tune + + # VP9-specific options + if resolved_codec == VideoCodec.VP9: + if row_mt: + video_stream.options['row-mt'] = '1' + if tile_columns is not None: + video_stream.options['tile-columns'] = str(tile_columns) + audio_sample_rate = 1 audio_stream: Optional[av.AudioStream] = None if self.__components.audio: audio_sample_rate = int(self.__components.audio['sample_rate']) - audio_stream = output.add_stream('aac', rate=audio_sample_rate) + audio_codec = 'libopus' if resolved_format == VideoContainer.WEBM else 'aac' + audio_stream = output.add_stream(audio_codec, rate=audio_sample_rate) - # Encode video for i, frame in enumerate(self.__components.images): - img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3) - frame = av.VideoFrame.from_ndarray(img, format='rgb24') - frame = frame.reformat(format='yuv420p') # Convert to YUV420P as required by h264 - packet = video_stream.encode(frame) + img = (frame * 255).clamp(0, 255).byte().cpu().numpy() + video_frame = av.VideoFrame.from_ndarray(img, format='rgb24') + video_frame = video_frame.reformat(format='yuv420p') + packet = video_stream.encode(video_frame) output.mux(packet) - # Flush video packet = video_stream.encode(None) output.mux(packet) if audio_stream and self.__components.audio: waveform = self.__components.audio['waveform'] waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] - frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo') - frame.sample_rate = audio_sample_rate - frame.pts = 0 - output.mux(audio_stream.encode(frame)) - - # Flush encoder + audio_frame = av.AudioFrame.from_ndarray( + waveform.movedim(2, 1).reshape(1, -1).float().numpy(), + format='flt', + layout='mono' if waveform.shape[1] == 1 else 'stereo' + ) + audio_frame.sample_rate = audio_sample_rate + audio_frame.pts = 0 + output.mux(audio_stream.encode(audio_frame)) output.mux(audio_stream.encode(None)) diff --git a/comfy_api/latest/_util/__init__.py b/comfy_api/latest/_util/__init__.py index 6313eb01b..b93bca1aa 100644 --- a/comfy_api/latest/_util/__init__.py +++ b/comfy_api/latest/_util/__init__.py @@ -1,4 +1,4 @@ -from .video_types import VideoContainer, VideoCodec, VideoComponents +from .video_types import VideoContainer, VideoCodec, VideoComponents, VideoSpeedPreset, quality_to_crf from .geometry_types import VOXEL, MESH from .image_types import SVG @@ -7,6 +7,8 @@ __all__ = [ "VideoContainer", "VideoCodec", "VideoComponents", + "VideoSpeedPreset", + "quality_to_crf", "VOXEL", "MESH", "SVG", diff --git a/comfy_api/latest/_util/video_types.py b/comfy_api/latest/_util/video_types.py index fd3b5a510..b4a63ac16 100644 --- a/comfy_api/latest/_util/video_types.py +++ b/comfy_api/latest/_util/video_types.py @@ -8,6 +8,7 @@ from .._input import ImageInput, AudioInput class VideoCodec(str, Enum): AUTO = "auto" H264 = "h264" + VP9 = "vp9" @classmethod def as_input(cls) -> list[str]: @@ -16,9 +17,11 @@ class VideoCodec(str, Enum): """ return [member.value for member in cls] + class VideoContainer(str, Enum): AUTO = "auto" MP4 = "mp4" + WEBM = "webm" @classmethod def as_input(cls) -> list[str]: @@ -36,8 +39,71 @@ class VideoContainer(str, Enum): value = cls(value) if value == VideoContainer.MP4 or value == VideoContainer.AUTO: return "mp4" + if value == VideoContainer.WEBM: + return "webm" return "" + +class VideoSpeedPreset(str, Enum): + """Encoding speed presets - slower = better compression at same quality.""" + AUTO = "auto" + FASTEST = "Fastest" + FAST = "Fast" + BALANCED = "Balanced" + QUALITY = "Quality" + BEST = "Best" + + @classmethod + def as_input(cls) -> list[str]: + return [member.value for member in cls] + + def to_ffmpeg_preset(self, codec: str = "h264") -> str: + """Convert to FFmpeg preset string for the given codec.""" + h264_map = { + VideoSpeedPreset.FASTEST: "ultrafast", + VideoSpeedPreset.FAST: "veryfast", + VideoSpeedPreset.BALANCED: "medium", + VideoSpeedPreset.QUALITY: "slow", + VideoSpeedPreset.BEST: "veryslow", + VideoSpeedPreset.AUTO: "medium", + } + vp9_map = { + VideoSpeedPreset.FASTEST: "0", + VideoSpeedPreset.FAST: "1", + VideoSpeedPreset.BALANCED: "2", + VideoSpeedPreset.QUALITY: "3", + VideoSpeedPreset.BEST: "4", + VideoSpeedPreset.AUTO: "2", + } + if codec in ("vp9", "libvpx-vp9"): + return vp9_map.get(self, "2") + return h264_map.get(self, "medium") + + +def quality_to_crf(quality: int, codec: str = "h264") -> int: + """ + Map 0-100 quality percentage to codec-appropriate CRF value. + + Args: + quality: 0-100 where 100 is best quality + codec: The codec being used (h264, vp9, etc.) + + Returns: + CRF value appropriate for the codec + """ + quality = max(0, min(100, quality)) + + if codec in ("h264", "libx264"): + # h264: CRF 0-51 (lower = better), typical range 12-40 + # quality 100 → CRF 12, quality 0 → CRF 40 + return int(40 - (quality / 100) * 28) + elif codec in ("vp9", "libvpx-vp9"): + # vp9: CRF 0-63 (lower = better), typical range 15-50 + # quality 100 → CRF 15, quality 0 → CRF 50 + return int(50 - (quality / 100) * 35) + # Default fallback + return 23 + @dataclass class VideoComponents: """ diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index c609e03da..72981f325 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -67,23 +67,134 @@ class SaveWEBM(io.ComfyNode): class SaveVideo(io.ComfyNode): @classmethod def define_schema(cls): + # H264-specific inputs + h264_quality = io.Int.Input( + "quality", + default=80, + min=0, + max=100, + step=1, + display_name="Quality", + tooltip="Output quality (0-100). Higher = better quality, larger files. " + "Internally maps to CRF: 100→CRF 12, 50→CRF 23, 0→CRF 40.", + ) + h264_speed = io.Combo.Input( + "speed", + options=Types.VideoSpeedPreset.as_input(), + default="auto", + display_name="Encoding Speed", + tooltip="Encoding speed preset. Slower = better compression at same quality. " + "Maps to FFmpeg presets: Fastest=ultrafast, Balanced=medium, Best=veryslow.", + ) + h264_profile = io.Combo.Input( + "profile", + options=["auto", "baseline", "main", "high"], + default="auto", + display_name="Profile", + tooltip="H.264 profile. 'baseline' for max compatibility (older devices), " + "'main' for standard use, 'high' for best quality/compression.", + advanced=True, + ) + h264_tune = io.Combo.Input( + "tune", + options=["auto", "film", "animation", "grain", "stillimage", "fastdecode", "zerolatency"], + default="auto", + display_name="Tune", + tooltip="Optimize encoding for specific content types. " + "'film' for live action, 'animation' for cartoons/anime, 'grain' to preserve film grain.", + advanced=True, + ) + + # VP9-specific inputs + vp9_quality = io.Int.Input( + "quality", + default=80, + min=0, + max=100, + step=1, + display_name="Quality", + tooltip="Output quality (0-100). Higher = better quality, larger files. " + "Internally maps to CRF: 100→CRF 15, 50→CRF 33, 0→CRF 50.", + ) + vp9_speed = io.Combo.Input( + "speed", + options=Types.VideoSpeedPreset.as_input(), + default="auto", + display_name="Encoding Speed", + tooltip="Encoding speed. Slower = better compression. " + "Maps to VP9 cpu-used: Fastest=0, Balanced=2, Best=4.", + ) + vp9_row_mt = io.Boolean.Input( + "row_mt", + default=True, + display_name="Row Multi-threading", + tooltip="Enable row-based multi-threading for faster encoding on multi-core CPUs.", + advanced=True, + ) + vp9_tile_columns = io.Combo.Input( + "tile_columns", + options=["auto", "0", "1", "2", "3", "4"], + default="auto", + display_name="Tile Columns", + tooltip="Number of tile columns (as power of 2). More tiles = faster encoding " + "but slightly worse compression. 'auto' picks based on resolution.", + advanced=True, + ) + return io.Schema( node_id="SaveVideo", display_name="Save Video", category="image/video", - description="Saves the input images to your ComfyUI output directory.", + description="Saves video to the output directory. " + "When format/codec/quality differ from source, the video is re-encoded.", inputs=[ io.Video.Input("video", tooltip="The video to save."), - io.String.Input("filename_prefix", default="video/ComfyUI", tooltip="The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."), - io.Combo.Input("format", options=Types.VideoContainer.as_input(), default="auto", tooltip="The format to save the video as."), - io.Combo.Input("codec", options=Types.VideoCodec.as_input(), default="auto", tooltip="The codec to use for the video."), + io.String.Input( + "filename_prefix", + default="video/ComfyUI", + tooltip="The prefix for the file to save. " + "Supports formatting like %date:yyyy-MM-dd%.", + ), + io.DynamicCombo.Input("codec", options=[ + io.DynamicCombo.Option("auto", []), + io.DynamicCombo.Option("h264", [h264_quality, h264_speed, h264_profile, h264_tune]), + io.DynamicCombo.Option("vp9", [vp9_quality, vp9_speed, vp9_row_mt, vp9_tile_columns]), + ], tooltip="Video codec. 'auto' preserves source when possible. " + "h264 outputs MP4, vp9 outputs WebM."), ], hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo], is_output_node=True, ) @classmethod - def execute(cls, video: Input.Video, filename_prefix, format: str, codec) -> io.NodeOutput: + def execute(cls, video: Input.Video, filename_prefix: str, codec: dict) -> io.NodeOutput: + selected_codec = codec.get("codec", "auto") + quality = codec.get("quality") + speed_str = codec.get("speed", "auto") + + # H264-specific options + profile = codec.get("profile", "auto") + tune = codec.get("tune", "auto") + + # VP9-specific options + row_mt = codec.get("row_mt", True) + tile_columns = codec.get("tile_columns", "auto") + + if selected_codec == "auto": + resolved_format = Types.VideoContainer.AUTO + resolved_codec = Types.VideoCodec.AUTO + elif selected_codec == "h264": + resolved_format = Types.VideoContainer.MP4 + resolved_codec = Types.VideoCodec.H264 + elif selected_codec == "vp9": + resolved_format = Types.VideoContainer.WEBM + resolved_codec = Types.VideoCodec.VP9 + else: + resolved_format = Types.VideoContainer.AUTO + resolved_codec = Types.VideoCodec.AUTO + + speed = Types.VideoSpeedPreset(speed_str) if speed_str else None + width, height = video.get_dimensions() full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path( filename_prefix, @@ -91,6 +202,7 @@ class SaveVideo(io.ComfyNode): width, height ) + saved_metadata = None if not args.disable_metadata: metadata = {} @@ -100,12 +212,20 @@ class SaveVideo(io.ComfyNode): metadata["prompt"] = cls.hidden.prompt if len(metadata) > 0: saved_metadata = metadata - file = f"{filename}_{counter:05}_.{Types.VideoContainer.get_extension(format)}" + + extension = Types.VideoContainer.get_extension(resolved_format) + file = f"{filename}_{counter:05}_.{extension}" video.save_to( os.path.join(full_output_folder, file), - format=Types.VideoContainer(format), - codec=codec, - metadata=saved_metadata + format=resolved_format, + codec=resolved_codec, + metadata=saved_metadata, + quality=quality, + speed=speed, + profile=profile if profile != "auto" else None, + tune=tune if tune != "auto" else None, + row_mt=row_mt, + tile_columns=int(tile_columns) if tile_columns != "auto" else None, ) return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(file, subfolder, io.FolderType.output)])) diff --git a/tests-unit/comfy_api_test/video_types_test.py b/tests-unit/comfy_api_test/video_types_test.py index b25fcb1ca..7d7e52b54 100644 --- a/tests-unit/comfy_api_test/video_types_test.py +++ b/tests-unit/comfy_api_test/video_types_test.py @@ -6,7 +6,7 @@ import av import io from fractions import Fraction from comfy_api.input_impl.video_types import VideoFromFile, VideoFromComponents -from comfy_api.util.video_types import VideoComponents +from comfy_api.util.video_types import VideoComponents, VideoSpeedPreset, quality_to_crf from comfy_api.input.basic_types import AudioInput from av.error import InvalidDataError @@ -237,3 +237,71 @@ def test_duration_consistency(video_components): manual_duration = float(components.images.shape[0] / components.frame_rate) assert duration == pytest.approx(manual_duration) + + +class TestVideoSpeedPreset: + """Tests for VideoSpeedPreset enum and its methods.""" + + def test_as_input_returns_all_values(self): + """as_input() returns all preset values""" + values = VideoSpeedPreset.as_input() + assert values == ["auto", "Fastest", "Fast", "Balanced", "Quality", "Best"] + + def test_to_ffmpeg_preset_h264(self): + """H.264 presets map correctly""" + assert VideoSpeedPreset.FASTEST.to_ffmpeg_preset("h264") == "ultrafast" + assert VideoSpeedPreset.FAST.to_ffmpeg_preset("h264") == "veryfast" + assert VideoSpeedPreset.BALANCED.to_ffmpeg_preset("h264") == "medium" + assert VideoSpeedPreset.QUALITY.to_ffmpeg_preset("h264") == "slow" + assert VideoSpeedPreset.BEST.to_ffmpeg_preset("h264") == "veryslow" + assert VideoSpeedPreset.AUTO.to_ffmpeg_preset("h264") == "medium" + + def test_to_ffmpeg_preset_vp9(self): + """VP9 presets map correctly""" + assert VideoSpeedPreset.FASTEST.to_ffmpeg_preset("vp9") == "0" + assert VideoSpeedPreset.FAST.to_ffmpeg_preset("vp9") == "1" + assert VideoSpeedPreset.BALANCED.to_ffmpeg_preset("vp9") == "2" + assert VideoSpeedPreset.QUALITY.to_ffmpeg_preset("vp9") == "3" + assert VideoSpeedPreset.BEST.to_ffmpeg_preset("vp9") == "4" + assert VideoSpeedPreset.AUTO.to_ffmpeg_preset("vp9") == "2" + + def test_to_ffmpeg_preset_libvpx_vp9(self): + """libvpx-vp9 codec string also maps to VP9 presets""" + assert VideoSpeedPreset.BALANCED.to_ffmpeg_preset("libvpx-vp9") == "2" + + def test_to_ffmpeg_preset_default_to_h264(self): + """Unknown codecs default to H.264 mapping""" + assert VideoSpeedPreset.BALANCED.to_ffmpeg_preset("unknown") == "medium" + + +class TestQualityToCrf: + """Tests for quality_to_crf helper function.""" + + def test_h264_quality_boundaries(self): + """H.264 quality maps to correct CRF range (12-40)""" + assert quality_to_crf(100, "h264") == 12 + assert quality_to_crf(0, "h264") == 40 + assert quality_to_crf(50, "h264") == 26 + + def test_h264_libx264_alias(self): + """libx264 codec string uses H.264 mapping""" + assert quality_to_crf(100, "libx264") == 12 + + def test_vp9_quality_boundaries(self): + """VP9 quality maps to correct CRF range (15-50)""" + assert quality_to_crf(100, "vp9") == 15 + assert quality_to_crf(0, "vp9") == 50 + assert quality_to_crf(50, "vp9") == 32 + + def test_vp9_libvpx_alias(self): + """libvpx-vp9 codec string uses VP9 mapping""" + assert quality_to_crf(100, "libvpx-vp9") == 15 + + def test_quality_clamping(self): + """Quality values outside 0-100 are clamped""" + assert quality_to_crf(150, "h264") == 12 + assert quality_to_crf(-50, "h264") == 40 + + def test_unknown_codec_fallback(self): + """Unknown codecs return default CRF 23""" + assert quality_to_crf(50, "unknown_codec") == 23