From e419ae3e96bacb03a305623cc24cc563f2796067 Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:10:16 -0700 Subject: [PATCH 1/7] refactor vq tests --- tests/models/autoencoders/test_models_vq.py | 81 ++++++++++++--------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/tests/models/autoencoders/test_models_vq.py b/tests/models/autoencoders/test_models_vq.py index b88d24d1f2d8..ce1606f0e859 100644 --- a/tests/models/autoencoders/test_models_vq.py +++ b/tests/models/autoencoders/test_models_vq.py @@ -13,43 +13,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest - +import pytest import torch from diffusers import VQModel +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import backend_manual_seed, enable_full_determinism, floats_tensor, torch_device -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import backend_manual_seed, enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class VQModelTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = VQModel - main_input_name = "sample" - +class VQModelTesterConfig(BaseModelTesterConfig): @property - def dummy_input(self, sizes=(32, 32)): - batch_size = 4 - num_channels = 3 - - image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) + def model_class(self): + return VQModel - return {"sample": image} + @property + def main_input_name(self) -> str: + return "sample" @property - def input_shape(self): + def output_shape(self) -> tuple: return (3, 32, 32) @property - def output_shape(self): - return (3, 32, 32) + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def prepare_init_args_and_inputs_for_common(self): - init_dict = { + def get_init_dict(self) -> dict: + return { "block_out_channels": [8, 16], "norm_num_groups": 8, "in_channels": 3, @@ -58,24 +54,23 @@ def prepare_init_args_and_inputs_for_common(self): "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], "latent_channels": 3, } - inputs_dict = self.dummy_input - return init_dict, inputs_dict - @unittest.skip("Test not supported.") - def test_forward_signature(self): - pass + def get_dummy_inputs(self) -> dict: + batch_size = 4 + num_channels = 3 + sizes = (32, 32) + image = randn_tensor((batch_size, num_channels, *sizes), generator=self.generator, device=torch_device) + return {"sample": image} - @unittest.skip("Test not supported.") - def test_training(self): - pass +class TestVQModel(VQModelTesterConfig, ModelTesterMixin): def test_from_pretrained_hub(self): model, loading_info = VQModel.from_pretrained("fusing/vqgan-dummy", output_loading_info=True) - self.assertIsNotNone(model) - self.assertEqual(len(loading_info["missing_keys"]), 0) + assert model is not None + assert len(loading_info["missing_keys"]) == 0 model.to(torch_device) - image = model(**self.dummy_input) + image = model(**self.get_dummy_inputs()) assert image is not None, "Make sure output is not None" @@ -95,7 +90,7 @@ def test_output_pretrained(self): # fmt: off expected_output_slice = torch.tensor([-0.0153, -0.4044, -0.1880, -0.5161, -0.2418, -0.4072, -0.1612, -0.0633, -0.0143]) # fmt: on - self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) + assert torch.allclose(output_slice, expected_output_slice, atol=1e-3) def test_loss_pretrained(self): model = VQModel.from_pretrained("fusing/vqgan-dummy") @@ -111,4 +106,24 @@ def test_loss_pretrained(self): # fmt: off expected_output = torch.tensor([0.1936]) # fmt: on - self.assertTrue(torch.allclose(output, expected_output, atol=1e-3)) + assert torch.allclose(output, expected_output, atol=1e-3) + + +class TestVQModelTraining(VQModelTesterConfig, TrainingTesterMixin): + """Training tests for VQModel.""" + + @pytest.mark.skip("Test not supported.") + def test_training(self): + super().test_training() + + @pytest.mark.skip("Test not supported.") + def test_training_with_ema(self): + super().test_training_with_ema() + + +class TestVQModelMemory(VQModelTesterConfig, MemoryTesterMixin): + """Memory optimization tests for VQModel.""" + + +class TestVQModelSlicingTiling(VQModelTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for VQModel.""" From 17f1bdc908538a405e1cbb1a90a162841c83ef39 Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:10:16 -0700 Subject: [PATCH 2/7] refactor autoencoder_kl_kvae_video tests --- .../test_models_autoencoder_kl_kvae_video.py | 123 +++++++++--------- 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py b/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py index 7e9eebb87cf4..1e04c6be5a5b 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py @@ -13,24 +13,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import pytest +import torch from diffusers import AutoencoderKLKVAEVideo +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import enable_full_determinism, floats_tensor, torch_device -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLKVAEVideoTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLKVAEVideo - main_input_name = "sample" - base_precision = 1e-2 +def _run_nondeterministic(fn): + # reflection_pad3d_backward_out_cuda has no deterministic CUDA implementation; + # temporarily relax the requirement for tests that do backward passes. + torch.use_deterministic_algorithms(False) + try: + fn() + finally: + torch.use_deterministic_algorithms(True) + + +class AutoencoderKLKVAEVideoTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderKLKVAEVideo + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (3, 3, 16, 16) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def get_autoencoder_kl_kvae_video_config(self): + def get_init_dict(self) -> dict: return { "ch": 32, "ch_mult": (1, 2), @@ -41,78 +65,59 @@ def get_autoencoder_kl_kvae_video_config(self): "temporal_compress_times": 2, } - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 2 num_frames = 3 # satisfies (T-1) % temporal_compress_times == 0 with temporal_compress_times=2 num_channels = 3 sizes = (16, 16) - - video = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) - + video = randn_tensor( + (batch_size, num_channels, num_frames, *sizes), generator=self.generator, device=torch_device + ) return {"sample": video} - @property - def input_shape(self): - return (3, 3, 16, 16) - - @property - def output_shape(self): - return (3, 3, 16, 16) - - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_kvae_video_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict - def test_gradient_checkpointing_is_applied(self): - expected_set = { - "KVAECachedEncoder3D", - "KVAECachedDecoder3D", - } - super().test_gradient_checkpointing_is_applied(expected_set=expected_set) +class TestAutoencoderKLKVAEVideo(AutoencoderKLKVAEVideoTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @unittest.skip("Unsupported test.") + @pytest.mark.skip("Unsupported test.") def test_outputs_equivalence(self): - pass + super().test_outputs_equivalence() - @unittest.skip( + @pytest.mark.skip( "Multi-GPU inference is not supported due to the stateful cache_dict passing through the forward pass." ) def test_model_parallelism(self): - pass + super().test_model_parallelism() - @unittest.skip( - "Multi-GPU inference is not supported due to the stateful cache_dict passing through the forward pass." - ) - def test_sharded_checkpoints_device_map(self): - pass - def _run_nondeterministic(self, fn): - # reflection_pad3d_backward_out_cuda has no deterministic CUDA implementation; - # temporarily relax the requirement for training tests that do backward passes. - import torch +class TestAutoencoderKLKVAEVideoTraining(AutoencoderKLKVAEVideoTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLKVAEVideo.""" - torch.use_deterministic_algorithms(False) - try: - fn() - finally: - torch.use_deterministic_algorithms(True) + def test_gradient_checkpointing_is_applied(self): + expected_set = {"KVAECachedEncoder3D", "KVAECachedDecoder3D"} + super().test_gradient_checkpointing_is_applied(expected_set=expected_set) def test_training(self): - self._run_nondeterministic(super().test_training) + _run_nondeterministic(super().test_training) - def test_ema_training(self): - self._run_nondeterministic(super().test_ema_training) + def test_training_with_ema(self): + _run_nondeterministic(super().test_training_with_ema) - @unittest.skip( + @pytest.mark.skip( "Gradient checkpointing recomputes the forward pass, but the model uses a stateful cache_dict " "that is mutated during the first forward. On recomputation the cache is already populated, " - "causing a different execution path and numerically different gradients. " - "GC still reduces peak memory usage; gradient correctness in the presence of GC is a known limitation." + "causing a different execution path and numerically different gradients." ) - def test_effective_gradient_checkpointing(self): - pass + def test_gradient_checkpointing_equivalence(self): + super().test_gradient_checkpointing_equivalence() def test_layerwise_casting_training(self): - self._run_nondeterministic(super().test_layerwise_casting_training) + _run_nondeterministic(super().test_layerwise_casting_training) + + +class TestAutoencoderKLKVAEVideoMemory(AutoencoderKLKVAEVideoTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderKLKVAEVideo.""" + + +class TestAutoencoderKLKVAEVideoSlicingTiling(AutoencoderKLKVAEVideoTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderKLKVAEVideo.""" From 331fa74514e77ec2f2f38886c181e4e584f7be2c Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:10:16 -0700 Subject: [PATCH 3/7] refactor autoencoder_oobleck tests --- .../test_models_autoencoder_oobleck.py | 141 ++++++++---------- 1 file changed, 59 insertions(+), 82 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_oobleck.py b/tests/models/autoencoders/test_models_autoencoder_oobleck.py index d10e8ba33a12..96b70bc29bb4 100644 --- a/tests/models/autoencoders/test_models_autoencoder_oobleck.py +++ b/tests/models/autoencoders/test_models_autoencoder_oobleck.py @@ -16,34 +16,47 @@ import gc import unittest +import pytest import torch from datasets import load_dataset from parameterized import parameterized from diffusers import AutoencoderOobleck +from diffusers.utils.torch_utils import randn_tensor from ...testing_utils import ( backend_empty_cache, enable_full_determinism, - floats_tensor, slow, torch_all_close, torch_device, ) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderOobleckTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderOobleck - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderOobleckTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderOobleck + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (2, 24) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def get_autoencoder_oobleck_config(self, block_out_channels=None): - init_dict = { + def get_init_dict(self) -> dict: + return { "encoder_hidden_size": 12, "decoder_channels": 12, "decoder_input_channels": 6, @@ -51,33 +64,46 @@ def get_autoencoder_oobleck_config(self, block_out_channels=None): "downsampling_ratios": [2, 4], "channel_multiples": [1, 2], } - return init_dict - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 4 num_channels = 2 seq_len = 24 + waveform = randn_tensor((batch_size, num_channels, seq_len), generator=self.generator, device=torch_device) + return {"sample": waveform, "sample_posterior": False} - waveform = floats_tensor((batch_size, num_channels, seq_len)).to(torch_device) - return {"sample": waveform, "sample_posterior": False} +class TestAutoencoderOobleck(AutoencoderOobleckTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @property - def input_shape(self): - return (2, 24) - @property - def output_shape(self): - return (2, 24) +class TestAutoencoderOobleckTraining(AutoencoderOobleckTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderOobleck.""" + + +class TestAutoencoderOobleckMemory(AutoencoderOobleckTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderOobleck.""" + + @pytest.mark.skip( + "Test not supported because of 'weight_norm_fwd_first_dim_kernel' not implemented for 'Float8_e4m3fn'" + ) + def test_layerwise_casting_training(self): + super().test_layerwise_casting_training() + + @pytest.mark.skip( + "The convolution layers of AutoencoderOobleck are wrapped with torch.nn.utils.weight_norm. " + "This causes the hook's pre_forward to not cast the module weights to compute_dtype." + ) + def test_layerwise_casting_memory(self): + super().test_layerwise_casting_memory() + - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_oobleck_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderOobleckSlicingTiling(AutoencoderOobleckTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderOobleck.""" def test_enable_disable_slicing(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + init_dict = self.get_init_dict() + inputs_dict = self.get_dummy_inputs() torch.manual_seed(0) model = self.model_class(**init_dict).to(torch_device) @@ -91,55 +117,23 @@ def test_enable_disable_slicing(self): model.enable_slicing() output_with_slicing = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertLess( - (output_without_slicing.detach().cpu().numpy() - output_with_slicing.detach().cpu().numpy()).max(), - 0.5, - "VAE slicing should not affect the inference results", - ) + assert ( + output_without_slicing.detach().cpu().numpy() - output_with_slicing.detach().cpu().numpy() + ).max() < 0.5, "VAE slicing should not affect the inference results" torch.manual_seed(0) model.disable_slicing() output_without_slicing_2 = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertEqual( - output_without_slicing.detach().cpu().numpy().all(), - output_without_slicing_2.detach().cpu().numpy().all(), - "Without slicing outputs should match with the outputs when slicing is manually disabled.", - ) - - @unittest.skip("No attention module used in this model") - def test_set_attn_processor_for_determinism(self): - return - - @unittest.skip( - "Test not supported because of 'weight_norm_fwd_first_dim_kernel' not implemented for 'Float8_e4m3fn'" - ) - def test_layerwise_casting_training(self): - return super().test_layerwise_casting_training() - - @unittest.skip( - "The convolution layers of AutoencoderOobleck are wrapped with torch.nn.utils.weight_norm. This causes the hook's pre_forward to not " - "cast the module weights to compute_dtype (as required by forward pass). As a result, forward pass errors out. To fix:\n" - "1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n" - "2. Unskip this test." - ) - def test_layerwise_casting_inference(self): - pass - - @unittest.skip( - "The convolution layers of AutoencoderOobleck are wrapped with torch.nn.utils.weight_norm. This causes the hook's pre_forward to not " - "cast the module weights to compute_dtype (as required by forward pass). As a result, forward pass errors out. To fix:\n" - "1. Make sure `nn::Module::to` works with `torch.nn.utils.weight_norm` wrapped convolution layer.\n" - "2. Unskip this test." - ) - def test_layerwise_casting_memory(self): - pass + assert ( + output_without_slicing.detach().cpu().numpy().all() + == output_without_slicing_2.detach().cpu().numpy().all() + ), "Without slicing outputs should match with the outputs when slicing is manually disabled." @slow class AutoencoderOobleckIntegrationTests(unittest.TestCase): def tearDown(self): - # clean up the VRAM after each test super().tearDown() gc.collect() backend_empty_cache(torch_device) @@ -148,9 +142,7 @@ def _load_datasamples(self, num_samples): ds = load_dataset( "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True ) - # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] - return torch.nn.utils.rnn.pad_sequence( [torch.from_numpy(x["array"]) for x in speech_samples], batch_first=True ) @@ -158,25 +150,14 @@ def _load_datasamples(self, num_samples): def get_audio(self, audio_sample_size=2097152, fp16=False): dtype = torch.float16 if fp16 else torch.float32 audio = self._load_datasamples(2).to(torch_device).to(dtype) - - # pad / crop to audio_sample_size audio = torch.nn.functional.pad(audio[:, :audio_sample_size], pad=(0, audio_sample_size - audio.shape[-1])) - - # todo channel audio = audio.unsqueeze(1).repeat(1, 2, 1).to(torch_device) - return audio def get_oobleck_vae_model(self, model_id="stabilityai/stable-audio-open-1.0", fp16=False): torch_dtype = torch.float16 if fp16 else torch.float32 - - model = AutoencoderOobleck.from_pretrained( - model_id, - subfolder="vae", - torch_dtype=torch_dtype, - ) + model = AutoencoderOobleck.from_pretrained(model_id, subfolder="vae", torch_dtype=torch_dtype) model.to(torch_device) - return model def get_generator(self, seed=0): @@ -206,7 +187,6 @@ def test_stable_diffusion(self, seed, expected_slice, expected_mean_absolute_dif output_slice = sample[-1, 1, 5:10].cpu() expected_output_slice = torch.tensor(expected_slice) - assert torch_all_close(output_slice, expected_output_slice, atol=1e-5) def test_stable_diffusion_mode(self): @@ -237,13 +217,10 @@ def test_stable_diffusion_encode_decode(self, seed, expected_slice, expected_mea z = posterior.sample(generator=generator) sample = model.decode(z).sample - # (batch_size, latent_dim, sequence_length) assert posterior.mean.shape == (audio.shape[0], model.config.decoder_input_channels, 1024) - assert sample.shape == audio.shape assert ((sample - audio).abs().mean() - expected_mean_absolute_diff).abs() <= 1e-6 output_slice = sample[-1, 1, 5:10].cpu() expected_output_slice = torch.tensor(expected_slice) - assert torch_all_close(output_slice, expected_output_slice, atol=1e-5) From cae36fb9477c5c54d1874255e9283275fcfda365 Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:10:16 -0700 Subject: [PATCH 4/7] refactor consistency_decoder_vae tests --- .../test_models_consistency_decoder_vae.py | 86 +++++++++---------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/tests/models/autoencoders/test_models_consistency_decoder_vae.py b/tests/models/autoencoders/test_models_consistency_decoder_vae.py index ef04d151ecd1..ef37797bad12 100644 --- a/tests/models/autoencoders/test_models_consistency_decoder_vae.py +++ b/tests/models/autoencoders/test_models_consistency_decoder_vae.py @@ -30,22 +30,33 @@ torch_all_close, torch_device, ) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class ConsistencyDecoderVAETests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = ConsistencyDecoderVAE - main_input_name = "sample" - base_precision = 1e-2 - forward_requires_fresh_args = True +class ConsistencyDecoderVAETesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return ConsistencyDecoderVAE + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (3, 32, 32) - def get_consistency_vae_config(self, block_out_channels=None, norm_num_groups=None): - block_out_channels = block_out_channels or [2, 4] - norm_num_groups = norm_num_groups or 2 + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) + + def get_init_dict(self) -> dict: + block_out_channels = [2, 4] + norm_num_groups = 2 return { "encoder_block_out_channels": block_out_channels, "encoder_in_channels": 3, @@ -69,48 +80,43 @@ def get_consistency_vae_config(self, block_out_channels=None, norm_num_groups=No "latent_channels": 4, } - def inputs_dict(self, seed=None): - if seed is None: - generator = torch.Generator("cpu").manual_seed(0) - else: - generator = torch.Generator("cpu").manual_seed(seed) - image = randn_tensor((4, 3, 32, 32), generator=generator, device=torch.device(torch_device)) - + def get_dummy_inputs(self) -> dict: + generator = torch.Generator("cpu").manual_seed(0) + image = randn_tensor((4, 3, 32, 32), generator=generator, device=torch_device) return {"sample": image, "generator": generator} - @property - def input_shape(self): - return (3, 32, 32) - @property - def output_shape(self): - return (3, 32, 32) +class TestConsistencyDecoderVAE(ConsistencyDecoderVAETesterConfig, ModelTesterMixin): + base_precision = 1e-2 + + +class TestConsistencyDecoderVAETraining(ConsistencyDecoderVAETesterConfig, TrainingTesterMixin): + """Training tests for ConsistencyDecoderVAE.""" - @property - def init_dict(self): - return self.get_consistency_vae_config() - def prepare_init_args_and_inputs_for_common(self): - return self.init_dict, self.inputs_dict() +class TestConsistencyDecoderVAEMemory(ConsistencyDecoderVAETesterConfig, MemoryTesterMixin): + """Memory optimization tests for ConsistencyDecoderVAE.""" + + +class TestConsistencyDecoderVAESlicingTiling(ConsistencyDecoderVAETesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for ConsistencyDecoderVAE.""" @slow class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase): def setUp(self): - # clean up the VRAM before each test super().setUp() gc.collect() backend_empty_cache(torch_device) def tearDown(self): - # clean up the VRAM after each test super().tearDown() gc.collect() backend_empty_cache(torch_device) @torch.no_grad() def test_encode_decode(self): - vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") # TODO - update + vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") vae.to(torch_device) image = load_image( @@ -122,16 +128,14 @@ def test_encode_decode(self): ) latent = vae.encode(image).latent_dist.mean - sample = vae.decode(latent, generator=torch.Generator("cpu").manual_seed(0)).sample actual_output = sample[0, :2, :2, :2].flatten().cpu() expected_output = torch.tensor([-0.0141, -0.0014, 0.0115, 0.0086, 0.1051, 0.1053, 0.1031, 0.1024]) - assert torch_all_close(actual_output, expected_output, atol=5e-3) def test_sd(self): - vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") # TODO - update + vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") pipe = StableDiffusionPipeline.from_pretrained( "stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, safety_checker=None ) @@ -146,13 +150,10 @@ def test_sd(self): actual_output = out[:2, :2, :2].flatten().cpu() expected_output = torch.tensor([0.7686, 0.8228, 0.6489, 0.7455, 0.8661, 0.8797, 0.8241, 0.8759]) - assert torch_all_close(actual_output, expected_output, atol=5e-3) def test_encode_decode_f16(self): - vae = ConsistencyDecoderVAE.from_pretrained( - "openai/consistency-decoder", torch_dtype=torch.float16 - ) # TODO - update + vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16) vae.to(torch_device) image = load_image( @@ -166,7 +167,6 @@ def test_encode_decode_f16(self): ) latent = vae.encode(image).latent_dist.mean - sample = vae.decode(latent, generator=torch.Generator("cpu").manual_seed(0)).sample actual_output = sample[0, :2, :2, :2].flatten().cpu() @@ -174,13 +174,10 @@ def test_encode_decode_f16(self): [-0.0111, -0.0125, -0.0017, -0.0007, 0.1257, 0.1465, 0.1450, 0.1471], dtype=torch.float16, ) - assert torch_all_close(actual_output, expected_output, atol=5e-3) def test_sd_f16(self): - vae = ConsistencyDecoderVAE.from_pretrained( - "openai/consistency-decoder", torch_dtype=torch.float16 - ) # TODO - update + vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16) pipe = StableDiffusionPipeline.from_pretrained( "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16, @@ -201,7 +198,6 @@ def test_sd_f16(self): [0.0000, 0.0249, 0.0000, 0.0000, 0.1709, 0.2773, 0.0471, 0.1035], dtype=torch.float16, ) - assert torch_all_close(actual_output, expected_output, atol=5e-3) def test_vae_tiling(self): @@ -219,7 +215,6 @@ def test_vae_tiling(self): generator=torch.Generator("cpu").manual_seed(0), ).images[0] - # make sure tiled vae decode yields the same result pipe.enable_vae_tiling() out_2 = pipe( "horse", @@ -230,7 +225,6 @@ def test_vae_tiling(self): assert torch_all_close(out_1, out_2, atol=5e-3) - # test that tiled decode works with various shapes shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] with torch.no_grad(): for shape in shapes: From 3168001cc67fae70fa1070639e7f1aecae2d849b Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:10:16 -0700 Subject: [PATCH 5/7] refactor autoencoder_tiny tests --- .../test_models_autoencoder_tiny.py | 128 ++++++++---------- 1 file changed, 57 insertions(+), 71 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_tiny.py b/tests/models/autoencoders/test_models_autoencoder_tiny.py index 68232aa12fdf..741a31a15f65 100644 --- a/tests/models/autoencoders/test_models_autoencoder_tiny.py +++ b/tests/models/autoencoders/test_models_autoencoder_tiny.py @@ -17,35 +17,48 @@ import gc import unittest +import pytest import torch from parameterized import parameterized from diffusers import AutoencoderTiny +from diffusers.utils.torch_utils import randn_tensor from ...testing_utils import ( backend_empty_cache, enable_full_determinism, - floats_tensor, load_hf_numpy, slow, torch_all_close, torch_device, ) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderTinyTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderTiny - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderTinyTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderTiny + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (3, 32, 32) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def get_autoencoder_tiny_config(self, block_out_channels=None): - block_out_channels = (len(block_out_channels) * [32]) if block_out_channels is not None else [32, 32] - init_dict = { + def get_init_dict(self) -> dict: + block_out_channels = [32, 32] + return { "in_channels": 3, "out_channels": 3, "encoder_block_out_channels": block_out_channels, @@ -53,53 +66,36 @@ def get_autoencoder_tiny_config(self, block_out_channels=None): "num_encoder_blocks": [b // min(block_out_channels) for b in block_out_channels], "num_decoder_blocks": [b // min(block_out_channels) for b in reversed(block_out_channels)], } - return init_dict - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 4 num_channels = 3 sizes = (32, 32) - - image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) - + image = randn_tensor((batch_size, num_channels, *sizes), generator=self.generator, device=torch_device) return {"sample": image} - @property - def input_shape(self): - return (3, 32, 32) - - @property - def output_shape(self): - return (3, 32, 32) - - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_tiny_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict - @unittest.skip("Model doesn't yet support smaller resolution.") - def test_enable_disable_tiling(self): - pass +class TestAutoencoderTiny(AutoencoderTinyTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @unittest.skip("Test not supported.") + @pytest.mark.skip("Test not supported.") def test_outputs_equivalence(self): - pass + super().test_outputs_equivalence() - @unittest.skip("Test not supported.") - def test_forward_with_norm_groups(self): - pass + +class TestAutoencoderTinyTraining(AutoencoderTinyTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderTiny.""" def test_gradient_checkpointing_is_applied(self): expected_set = {"DecoderTiny", "EncoderTiny"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) - def test_effective_gradient_checkpointing(self): + def test_gradient_checkpointing_equivalence(self): if not self.model_class._supports_gradient_checkpointing: - return # Skip test if model does not support gradient checkpointing + return - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + init_dict = self.get_init_dict() + inputs_dict = self.get_dummy_inputs() inputs_dict_copy = copy.deepcopy(inputs_dict) torch.manual_seed(0) model = self.model_class(**init_dict) @@ -108,18 +104,13 @@ def test_effective_gradient_checkpointing(self): assert not model.is_gradient_checkpointing and model.training out = model(**inputs_dict).sample - # run the backwards pass on the model. For backwards pass, for simplicity purpose, - # we won't calculate the loss and rather backprop on out.sum() model.zero_grad() - labels = torch.randn_like(out) loss = (out - labels).mean() loss.backward() - # re-instantiate the model now enabling gradient checkpointing torch.manual_seed(0) model_2 = self.model_class(**init_dict) - # clone model model_2.load_state_dict(model.state_dict()) model_2.to(torch_device) model_2.enable_gradient_checkpointing() @@ -127,43 +118,45 @@ def test_effective_gradient_checkpointing(self): assert model_2.is_gradient_checkpointing and model_2.training out_2 = model_2(**inputs_dict_copy).sample - # run the backwards pass on the model. For backwards pass, for simplicity purpose, - # we won't calculate the loss and rather backprop on out.sum() model_2.zero_grad() loss_2 = (out_2 - labels).mean() loss_2.backward() - # compare the output and parameters gradients - self.assertTrue((loss - loss_2).abs() < 1e-3) + assert (loss - loss_2).abs() < 1e-3 named_params = dict(model.named_parameters()) named_params_2 = dict(model_2.named_parameters()) - for name, param in named_params.items(): if "encoder.layers" in name: continue - self.assertTrue(torch_all_close(param.grad.data, named_params_2[name].grad.data, atol=3e-2)) + assert torch_all_close(param.grad.data, named_params_2[name].grad.data, atol=3e-2) - @unittest.skip( - "The forward pass of AutoencoderTiny creates a torch.float32 tensor. This causes inference in compute_dtype=torch.bfloat16 to fail. To fix:\n" - "1. Change the forward pass to be dtype agnostic.\n" - "2. Unskip this test." - ) - def test_layerwise_casting_inference(self): - pass - @unittest.skip( - "The forward pass of AutoencoderTiny creates a torch.float32 tensor. This causes inference in compute_dtype=torch.bfloat16 to fail. To fix:\n" - "1. Change the forward pass to be dtype agnostic.\n" - "2. Unskip this test." +class TestAutoencoderTinyMemory(AutoencoderTinyTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderTiny.""" + + @pytest.mark.skip( + "The forward pass of AutoencoderTiny creates a torch.float32 tensor. " + "This causes inference in compute_dtype=torch.bfloat16 to fail." ) def test_layerwise_casting_memory(self): - pass + super().test_layerwise_casting_memory() + + +class TestAutoencoderTinySlicingTiling(AutoencoderTinyTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderTiny.""" + + @pytest.mark.skip("Model does not yet support smaller resolution.") + def test_enable_disable_tiling(self): + super().test_enable_disable_tiling() + + @pytest.mark.skip("Test not supported.") + def test_forward_with_norm_groups(self): + super().test_forward_with_norm_groups() @slow class AutoencoderTinyIntegrationTests(unittest.TestCase): def tearDown(self): - # clean up the VRAM after each test super().tearDown() gc.collect() backend_empty_cache(torch_device) @@ -178,7 +171,6 @@ def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False): def get_sd_vae_model(self, model_id="hf-internal-testing/taesd-diffusers", fp16=False): torch_dtype = torch.float16 if fp16 else torch.float32 - model = AutoencoderTiny.from_pretrained(model_id, torch_dtype=torch_dtype) model.to(torch_device).eval() return model @@ -211,26 +203,20 @@ def test_stable_diffusion(self): output_slice = sample[-1, -2:, -2:, :2].flatten().float().cpu() expected_output_slice = torch.tensor([0.0093, 0.6385, -0.1274, 0.1631, -0.1762, 0.5232, -0.3108, -0.0382]) - assert torch_all_close(output_slice, expected_output_slice, atol=3e-3) @parameterized.expand([(True,), (False,)]) def test_tae_roundtrip(self, enable_tiling): - # load the autoencoder model = self.get_sd_vae_model() if enable_tiling: model.enable_tiling() - # make a black image with a white square in the middle, - # which is large enough to split across multiple tiles image = -torch.ones(1, 3, 1024, 1024, device=torch_device) image[..., 256:768, 256:768] = 1.0 - # round-trip the image through the autoencoder with torch.no_grad(): sample = model(image).sample - # the autoencoder reconstruction should match original image, sorta def downscale(x): return torch.nn.functional.avg_pool2d(x, model.spatial_scale_factor) From ce58c5ba1ae4952853a0866ba833f5a6dbe27bcd Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Sun, 31 May 2026 12:18:26 -0700 Subject: [PATCH 6/7] refactor autoencoder_vidtok tests --- .../test_models_autoencoder_vidtok.py | 145 +++++++++--------- 1 file changed, 75 insertions(+), 70 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_vidtok.py b/tests/models/autoencoders/test_models_autoencoder_vidtok.py index 70932f2b55aa..0b1bd49b5ba7 100644 --- a/tests/models/autoencoders/test_models_autoencoder_vidtok.py +++ b/tests/models/autoencoders/test_models_autoencoder_vidtok.py @@ -13,26 +13,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest - +import pytest import torch from diffusers import AutoencoderVidTok -from diffusers.utils.testing_utils import ( - floats_tensor, - torch_device, -) +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import IS_GITHUB_ACTIONS -from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin +from ...testing_utils import IS_GITHUB_ACTIONS, enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin -class AutoencoderVidTokTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase): - model_class = AutoencoderVidTok - main_input_name = "sample" - base_precision = 1e-2 +enable_full_determinism() + + +class AutoencoderVidTokTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderVidTok + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (3, 16, 32, 32) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def get_autoencoder_vidtok_config(self): + def get_init_dict(self) -> dict: return { "is_causal": False, "in_channels": 3, @@ -46,32 +58,47 @@ def get_autoencoder_vidtok_config(self): "codebook_size": 262144, } - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 4 num_frames = 16 num_channels = 3 sizes = (32, 32) + image = randn_tensor( + (batch_size, num_channels, num_frames, *sizes), generator=self.generator, device=torch_device + ) + return {"sample": image} - image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) - return {"sample": image} +class TestAutoencoderVidTok(AutoencoderVidTokTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @property - def input_shape(self): - return (3, 16, 32, 32) + @pytest.mark.skip("Unsupported test.") + def test_outputs_equivalence(self): + super().test_outputs_equivalence() - @property - def output_shape(self): - return (3, 16, 32, 32) - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_vidtok_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderVidTokTraining(AutoencoderVidTokTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderVidTok.""" + + def test_gradient_checkpointing_is_applied(self): + expected_set = {"VidTokEncoder3D", "VidTokDecoder3D"} + super().test_gradient_checkpointing_is_applied(expected_set=expected_set) + + @pytest.mark.skipif(IS_GITHUB_ACTIONS, reason="Skipping test inside GitHub Actions environment") + def test_layerwise_casting_training(self): + super().test_layerwise_casting_training() + + +class TestAutoencoderVidTokMemory(AutoencoderVidTokTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderVidTok.""" + + +class TestAutoencoderVidTokSlicingTiling(AutoencoderVidTokTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderVidTok.""" def test_enable_disable_tiling(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + init_dict = self.get_init_dict() + inputs_dict = self.get_dummy_inputs() torch.manual_seed(0) model = self.model_class(**init_dict).to(torch_device) @@ -83,28 +110,24 @@ def test_enable_disable_tiling(self): model.enable_tiling() output_with_tiling = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertLess( - (output_without_tiling.detach().cpu().numpy() - output_with_tiling.detach().cpu().numpy()).max(), - 0.5, - "VAE tiling should not affect the inference results", - ) + assert ( + output_without_tiling.detach().cpu().numpy() - output_with_tiling.detach().cpu().numpy() + ).max() < 0.5, "VAE tiling should not affect the inference results" torch.manual_seed(0) model.disable_tiling() output_without_tiling_2 = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertEqual( - output_without_tiling.detach().cpu().numpy().all(), - output_without_tiling_2.detach().cpu().numpy().all(), - "Without tiling outputs should match with the outputs when tiling is manually disabled.", - ) + assert ( + output_without_tiling.detach().cpu().numpy().all() == output_without_tiling_2.detach().cpu().numpy().all() + ), "Without tiling outputs should match with the outputs when tiling is manually disabled." def test_enable_disable_slicing(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + init_dict = self.get_init_dict() + inputs_dict = self.get_dummy_inputs() torch.manual_seed(0) model = self.model_class(**init_dict).to(torch_device) - inputs_dict.update({"return_dict": False}) torch.manual_seed(0) @@ -114,50 +137,32 @@ def test_enable_disable_slicing(self): model.enable_slicing() output_with_slicing = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertLess( - (output_without_slicing.detach().cpu().numpy() - output_with_slicing.detach().cpu().numpy()).max(), - 0.5, - "VAE slicing should not affect the inference results", - ) + assert ( + output_without_slicing.detach().cpu().numpy() - output_with_slicing.detach().cpu().numpy() + ).max() < 0.5, "VAE slicing should not affect the inference results" torch.manual_seed(0) model.disable_slicing() output_without_slicing_2 = model(**inputs_dict, generator=torch.manual_seed(0))[0] - self.assertEqual( - output_without_slicing.detach().cpu().numpy().all(), - output_without_slicing_2.detach().cpu().numpy().all(), - "Without slicing outputs should match with the outputs when slicing is manually disabled.", - ) - - def test_gradient_checkpointing_is_applied(self): - expected_set = { - "VidTokEncoder3D", - "VidTokDecoder3D", - } - super().test_gradient_checkpointing_is_applied(expected_set=expected_set) + assert ( + output_without_slicing.detach().cpu().numpy().all() + == output_without_slicing_2.detach().cpu().numpy().all() + ), "Without slicing outputs should match when slicing is manually disabled." def test_forward_with_norm_groups(self): - r"""VidTok uses layernorm instead of groupnorm.""" - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + """VidTok uses layernorm instead of groupnorm.""" + init_dict = self.get_init_dict() + inputs_dict = self.get_dummy_inputs() model = self.model_class(**init_dict) model.to(torch_device) model.eval() with torch.no_grad(): output = model(**inputs_dict) - if isinstance(output, dict): output = output.to_tuple()[0] - self.assertIsNotNone(output) + assert output is not None expected_shape = inputs_dict["sample"].shape - self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") - - @unittest.skip("Unsupported test.") - def test_outputs_equivalence(self): - pass - - @unittest.skipIf(IS_GITHUB_ACTIONS, reason="Skipping test inside GitHub Actions environment") - def test_layerwise_casting_training(self): - super().test_layerwise_casting_training() + assert output.shape == expected_shape, "Input and output shapes do not match" From e531f623acafe19af91090182a47b1931b963f9b Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Mon, 1 Jun 2026 19:15:51 -0700 Subject: [PATCH 7/7] remove unused base_precision and test_outputs_equivalence skips --- .../autoencoders/test_models_autoencoder_kl_kvae_video.py | 6 ------ .../models/autoencoders/test_models_autoencoder_oobleck.py | 2 +- tests/models/autoencoders/test_models_autoencoder_tiny.py | 6 +----- tests/models/autoencoders/test_models_autoencoder_vidtok.py | 4 +--- .../autoencoders/test_models_consistency_decoder_vae.py | 2 +- 5 files changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py b/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py index 1e04c6be5a5b..d810145c4da8 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_kvae_video.py @@ -77,12 +77,6 @@ def get_dummy_inputs(self) -> dict: class TestAutoencoderKLKVAEVideo(AutoencoderKLKVAEVideoTesterConfig, ModelTesterMixin): - base_precision = 1e-2 - - @pytest.mark.skip("Unsupported test.") - def test_outputs_equivalence(self): - super().test_outputs_equivalence() - @pytest.mark.skip( "Multi-GPU inference is not supported due to the stateful cache_dict passing through the forward pass." ) diff --git a/tests/models/autoencoders/test_models_autoencoder_oobleck.py b/tests/models/autoencoders/test_models_autoencoder_oobleck.py index 96b70bc29bb4..ccd67bc7c704 100644 --- a/tests/models/autoencoders/test_models_autoencoder_oobleck.py +++ b/tests/models/autoencoders/test_models_autoencoder_oobleck.py @@ -74,7 +74,7 @@ def get_dummy_inputs(self) -> dict: class TestAutoencoderOobleck(AutoencoderOobleckTesterConfig, ModelTesterMixin): - base_precision = 1e-2 + pass class TestAutoencoderOobleckTraining(AutoencoderOobleckTesterConfig, TrainingTesterMixin): diff --git a/tests/models/autoencoders/test_models_autoencoder_tiny.py b/tests/models/autoencoders/test_models_autoencoder_tiny.py index 741a31a15f65..7fdab4aeb910 100644 --- a/tests/models/autoencoders/test_models_autoencoder_tiny.py +++ b/tests/models/autoencoders/test_models_autoencoder_tiny.py @@ -76,11 +76,7 @@ def get_dummy_inputs(self) -> dict: class TestAutoencoderTiny(AutoencoderTinyTesterConfig, ModelTesterMixin): - base_precision = 1e-2 - - @pytest.mark.skip("Test not supported.") - def test_outputs_equivalence(self): - super().test_outputs_equivalence() + pass class TestAutoencoderTinyTraining(AutoencoderTinyTesterConfig, TrainingTesterMixin): diff --git a/tests/models/autoencoders/test_models_autoencoder_vidtok.py b/tests/models/autoencoders/test_models_autoencoder_vidtok.py index 0b1bd49b5ba7..eb2863121a21 100644 --- a/tests/models/autoencoders/test_models_autoencoder_vidtok.py +++ b/tests/models/autoencoders/test_models_autoencoder_vidtok.py @@ -70,9 +70,7 @@ def get_dummy_inputs(self) -> dict: class TestAutoencoderVidTok(AutoencoderVidTokTesterConfig, ModelTesterMixin): - base_precision = 1e-2 - - @pytest.mark.skip("Unsupported test.") + @pytest.mark.skip("VidTok output structure not compatible with recursive output check.") def test_outputs_equivalence(self): super().test_outputs_equivalence() diff --git a/tests/models/autoencoders/test_models_consistency_decoder_vae.py b/tests/models/autoencoders/test_models_consistency_decoder_vae.py index ef37797bad12..0edb713d9a1f 100644 --- a/tests/models/autoencoders/test_models_consistency_decoder_vae.py +++ b/tests/models/autoencoders/test_models_consistency_decoder_vae.py @@ -87,7 +87,7 @@ def get_dummy_inputs(self) -> dict: class TestConsistencyDecoderVAE(ConsistencyDecoderVAETesterConfig, ModelTesterMixin): - base_precision = 1e-2 + pass class TestConsistencyDecoderVAETraining(ConsistencyDecoderVAETesterConfig, TrainingTesterMixin):