[Quantization] dtype fix for GGUF + fix BnB tests (#11159)

DN6 · web-flow · commit 7dc52ea7691e · 2025-03-26T22:22:16.000+05:30
* update

* update

* update

* update
diff --git a/src/diffusers/loaders/single_file_model.py b/src/diffusers/loaders/single_file_model.py
@@ -282,6 +282,7 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
         if quantization_config is not None:
             hf_quantizer = DiffusersAutoQuantizer.from_config(quantization_config)
             hf_quantizer.validate_environment()
+            torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype)
 
         else:
             hf_quantizer = None
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -90,13 +90,16 @@ class Base8bitTests(unittest.TestCase):
 
     def get_dummy_inputs(self):
         prompt_embeds = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt",
+            map_location="cpu",
         )
         pooled_prompt_embeds = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt",
+            map_location="cpu",
         )
         latent_model_input = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt"
+            "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt",
+            map_location="cpu",
         )
 
         input_dict_for_transformer = {
diff --git a/tests/quantization/gguf/test_gguf.py b/tests/quantization/gguf/test_gguf.py
@@ -57,7 +57,7 @@ def test_gguf_linear_layers(self):
             if isinstance(module, torch.nn.Linear) and hasattr(module.weight, "quant_type"):
                 assert module.weight.dtype == torch.uint8
                 if module.bias is not None:
-                    assert module.bias.dtype == torch.float32
+                    assert module.bias.dtype == self.torch_dtype
 
     def test_gguf_memory_usage(self):
         quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)

Original file line number	Diff line number	Diff line change
`@@ -90,13 +90,16 @@ class Base8bitTests(unittest.TestCase):`
`90`	`90`
`91`	`91`	`def get_dummy_inputs(self):`
`92`	`92`	`prompt_embeds = load_pt(`
`93`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt"`
	`93`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt",`
	`94`	`+ map_location="cpu",`
`94`	`95`	`)`
`95`	`96`	`pooled_prompt_embeds = load_pt(`
`96`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt"`
	`97`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt",`
	`98`	`+ map_location="cpu",`
`97`	`99`	`)`
`98`	`100`	`latent_model_input = load_pt(`
`99`		`- "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt"`
	`101`	`+ "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt",`
	`102`	`+ map_location="cpu",`
`100`	`103`	`)`
`101`	`104`
`102`	`105`	`input_dict_for_transformer = {`