Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 2, 2025
diff --git a/test/prototype/test_smoothquant.py b/test/prototype/test_smoothquant.py
        # Step 1. Basic quantization
        basic_model = deepcopy(m)
        if isinstance(base_config, Int8StaticActivationInt8WeightConfig):
            quantize_(basic_model, Int8DynamicActivationInt8WeightConfig(version=2))
            quantize_(
                basic_model,
                Int8DynamicActivationInt8WeightConfig(
                    version=2, granularity=base_config.granularity
                ),
            )
        else:
            quantize_(basic_model, base_config)
        out_basic = basic_model(*x)
diff --git a/torchao/prototype/smoothquant/core.py b/torchao/prototype/smoothquant/core.py
        inputs = [inp.to(self.device) for inp in self.inputs]
        acc = torch.cat(inputs, dim=0)
        # Reshape if needed: [batch, seq, features] -> [batch*seq, features]
 temp = acc
 example_input_for_quantization = acc
        if acc.ndim > 2:
            acc = acc.view(-1, acc.shape[-1])


        if weight_quant_kwargs is not None:
            quant_smooth_activation = _choose_quant_func_and_quantize_tensor(
 temp / smoothing_factor, weight_quant_kwargs
 example_input_for_quantization / smoothing_factor, weight_quant_kwargs
            )
            return smoothing_factor, quant_smooth_activation.scale
        else:
diff --git a/torchao/quantization/quantize_/workflows/int8/int8_tensor.py b/torchao/quantization/quantize_/workflows/int8/int8_tensor.py
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.

 import math
 from dataclasses import dataclass
 from typing import List, Optional

        else:
            # Scale can be provided in the case of static quant
            assert scale.ndim == hp_tensor.ndim
            # if isinstance(granularity, PerTensor):
            #     assert scale.numel() == 1
            # elif isinstance(granularity, PerRow):
            #     breakpoint()
            #     assert scale.numel() == block_size[-1]

            num_expected_values = math.prod(
                [num_dim // bs for (bs, num_dim) in zip(block_size, hp_tensor.shape)]
            )
            assert scale.numel() == num_expected_values
            zero_point = torch.zeros_like(scale, dtype=torch.int8)

        int_data = quantize_affine(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -106,7 +106,12 @@ def test_smoothquant_accuracy(self, alpha, base_config, device, input_dtype):
		# Step 1. Basic quantization
		basic_model = deepcopy(m)
		if isinstance(base_config, Int8StaticActivationInt8WeightConfig):
		quantize_(basic_model, Int8DynamicActivationInt8WeightConfig(version=2))
		quantize_(
		basic_model,
		Int8DynamicActivationInt8WeightConfig(
		version=2, granularity=base_config.granularity
		),
		)
		else:
		quantize_(basic_model, base_config)
		out_basic = basic_model(*x)
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -52,7 +52,7 @@ def calculate_qparams(self, weight_quant_kwargs=None):
		inputs = [inp.to(self.device) for inp in self.inputs]
		acc = torch.cat(inputs, dim=0)
		# Reshape if needed: [batch, seq, features] -> [batch*seq, features]
		temp = acc
		example_input_for_quantization = acc
		if acc.ndim > 2:
		acc = acc.view(-1, acc.shape[-1])

Expand All		@@ -71,7 +71,7 @@ def calculate_qparams(self, weight_quant_kwargs=None):

		if weight_quant_kwargs is not None:
		quant_smooth_activation = _choose_quant_func_and_quantize_tensor(
		temp / smoothing_factor, weight_quant_kwargs
		example_input_for_quantization / smoothing_factor, weight_quant_kwargs
		)
		return smoothing_factor, quant_smooth_activation.scale
		else:
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,7 @@
		#
		# This source code is licensed under the BSD 3-Clause license found in the
		# LICENSE file in the root directory of this source tree.

		import math
		from dataclasses import dataclass
		from typing import List, Optional

Expand DownExpand Up		@@ -140,12 +140,10 @@ def from_hp(
		else:
		# Scale can be provided in the case of static quant
		assert scale.ndim == hp_tensor.ndim
		# if isinstance(granularity, PerTensor):
		# assert scale.numel() == 1
		# elif isinstance(granularity, PerRow):
		# breakpoint()
		# assert scale.numel() == block_size[-1]

		num_expected_values = math.prod(
		[num_dim // bs for (bs, num_dim) in zip(block_size, hp_tensor.shape)]
		)
		assert scale.numel() == num_expected_values
		zero_point = torch.zeros_like(scale, dtype=torch.int8)

		int_data = quantize_affine(
Expand Down