From 17b43c2b87eba43f0f071471b855e0ed659a2627 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 5 Mar 2026 13:31:28 -0800 Subject: [PATCH] LTX audio vae novram fixes. (#12796) --- comfy/ldm/lightricks/vocoders/vocoder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/comfy/ldm/lightricks/vocoders/vocoder.py b/comfy/ldm/lightricks/vocoders/vocoder.py index a0e03cada..2481d8bdd 100644 --- a/comfy/ldm/lightricks/vocoders/vocoder.py +++ b/comfy/ldm/lightricks/vocoders/vocoder.py @@ -82,7 +82,7 @@ class LowPassFilter1d(nn.Module): _, C, _ = x.shape if self.padding: x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode) - return F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C) + return F.conv1d(x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device), stride=self.stride, groups=C) class UpSample1d(nn.Module): @@ -191,7 +191,7 @@ class Snake(nn.Module): self.eps = 1e-9 def forward(self, x): - a = self.alpha.unsqueeze(0).unsqueeze(-1) + a = comfy.model_management.cast_to(self.alpha.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) if self.alpha_logscale: a = torch.exp(a) return x + (1.0 / (a + self.eps)) * torch.sin(x * a).pow(2) @@ -218,8 +218,8 @@ class SnakeBeta(nn.Module): self.eps = 1e-9 def forward(self, x): - a = self.alpha.unsqueeze(0).unsqueeze(-1) - b = self.beta.unsqueeze(0).unsqueeze(-1) + a = comfy.model_management.cast_to(self.alpha.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) + b = comfy.model_management.cast_to(self.beta.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) if self.alpha_logscale: a = torch.exp(a) b = torch.exp(b) @@ -597,7 +597,7 @@ class _STFTFn(nn.Module): y = y.unsqueeze(1) # (B, 1, T) left_pad = max(0, self.win_length - self.hop_length) # causal: left-only y = F.pad(y, (left_pad, 0)) - spec = F.conv1d(y, self.forward_basis, stride=self.hop_length, padding=0) + spec = F.conv1d(y, comfy.model_management.cast_to(self.forward_basis, dtype=y.dtype, device=y.device), stride=self.hop_length, padding=0) n_freqs = spec.shape[1] // 2 real, imag = spec[:, :n_freqs], spec[:, n_freqs:] magnitude = torch.sqrt(real ** 2 + imag ** 2) @@ -648,7 +648,7 @@ class MelSTFT(nn.Module): """ magnitude, phase = self.stft_fn(y) energy = torch.norm(magnitude, dim=1) - mel = torch.matmul(self.mel_basis.to(magnitude.dtype), magnitude) + mel = torch.matmul(comfy.model_management.cast_to(self.mel_basis, dtype=magnitude.dtype, device=y.device), magnitude) log_mel = torch.log(torch.clamp(mel, min=1e-5)) return log_mel, magnitude, phase, energy