Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 2, 2025
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
    AffineQuantizedTensor,
    Int4CPULayout,
    Int4XPULayout,
    PlainLayout,
    TensorCoreTiledLayout,
 )
 from torchao.quantization import (
    Float8Tensor,
    Int8Tensor,
    Int4TilePackedTo4dTensor,
    Int8Tensor,
    IntxUnpackedToInt8Tensor,
    LinearActivationQuantizedTensor,
    PerGroup,
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
 )
 from .quantize_.workflows import (
    Float8Tensor,
    Int8Tensor,
    Int4MarlinSparseTensor,
    Int4PlainInt32Tensor,
    Int4PreshuffledTensor,
    Int4Tensor,
    Int4TilePackedTo4dTensor,
    Int8Tensor,
    IntxOpaqueTensor,
    IntxUnpackedToInt8Tensor,
 )
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
 and mixed GEMM kernels
 """

 from torchao.quantization.quantize_.workflows.int8.int8_tensor import QuantizeTensorToInt8Kwargs
 import logging
 import re
 import types
    Int4PlainInt32Tensor,
    Int4PreshuffledTensor,
    Int4Tensor,
    Int8Tensor,
    Int4TilePackedTo4dTensor,
    Int8Tensor,
    IntxChooseQParamsAlgorithm,
    IntxOpaqueTensor,
    IntxPackingFormat,
    IntxUnpackedToInt8Tensor,
    QuantizeTensorToFloat8Kwargs,
 )
 from torchao.quantization.quantize_.workflows.int8.int8_tensor import (
    QuantizeTensorToInt8Kwargs,
 )
 from torchao.quantization.transform_module import (
    _QUANTIZE_CONFIG_HANDLER,
    register_quantize_module_handler,
        new_weight = to_linear_activation_quantized(new_weight, input_quant_func)
        return new_weight
    else:
        activation_granularity, weight_granularity = _normalize_granularity(config.granularity)
        activation_granularity, weight_granularity = _normalize_granularity(
            config.granularity
        )
        act_quant_kwargs = QuantizeTensorToInt8Kwargs(
            activation_granularity,
            # hp_value_lb=activation_value_lb,
            # hp_value_ub=activation_value_ub,
        )
        new_weight = Int8Tensor.from_hp(
            weight,
            granularity=weight_granularity,
            act_quant_kwargs=act_quant_kwargs
            weight, granularity=weight_granularity, act_quant_kwargs=act_quant_kwargs
        )
        return new_weight

diff --git a/torchao/quantization/quantize_/workflows/__init__.py b/torchao/quantization/quantize_/workflows/__init__.py
    Float8Tensor,
    QuantizeTensorToFloat8Kwargs,
 )
 from .int8.int8_tensor import (
    Int8Tensor,
    QuantizeTensorToInt8Kwargs,
 )
 from .int4.int4_choose_qparams_algorithm import Int4ChooseQParamsAlgorithm
 from .int4.int4_marlin_sparse_tensor import (
    Int4MarlinSparseTensor,
    Int4Tensor,
 )
 from .int4.int4_tile_packed_to_4d_tensor import Int4TilePackedTo4dTensor
 from .int8.int8_tensor import (
    Int8Tensor,
    QuantizeTensorToInt8Kwargs,
 )
 from .intx.intx_choose_qparams_algorithm import IntxChooseQParamsAlgorithm
 from .intx.intx_opaque_tensor import (
    IntxOpaqueTensor,
diff --git a/torchao/quantization/quantize_/workflows/int8/int8_tensor.py b/torchao/quantization/quantize_/workflows/int8/int8_tensor.py
 # LICENSE file in the root directory of this source tree.

 from dataclasses import dataclass
 from typing importOptional, List
 from typing importList, Optional

 import torch
 from torch.utils._python_dispatch import return_and_correct_aliasing
    Args:
        granularity: the granularity for the Tensor, currently either PerRow() or PerTensor()
    """

    granularity: Granularity = PerRow()
    hp_value_lb: Optional[float] = None
    hp_value_ub: Optional[float] = None
        Int8Tensor(
            sliced_qdata,
            sliced_scale,
            block_size=self.block_size[1:],
            block_size=self.block_size[1:],
            act_quant_kwargs=self.act_quant_kwargs,
            dtype=self.dtype,
        ),
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,13 +30,12 @@
		AffineQuantizedTensor,
		Int4CPULayout,
		Int4XPULayout,
		PlainLayout,
		TensorCoreTiledLayout,
		)
		from torchao.quantization import (
		Float8Tensor,
		Int8Tensor,
		Int4TilePackedTo4dTensor,
		Int8Tensor,
		IntxUnpackedToInt8Tensor,
		LinearActivationQuantizedTensor,
		PerGroup,
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,12 +93,12 @@
		)
		from .quantize_.workflows import (
		Float8Tensor,
		Int8Tensor,
		Int4MarlinSparseTensor,
		Int4PlainInt32Tensor,
		Int4PreshuffledTensor,
		Int4Tensor,
		Int4TilePackedTo4dTensor,
		Int8Tensor,
		IntxOpaqueTensor,
		IntxUnpackedToInt8Tensor,
		)
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,7 +15,6 @@
		and mixed GEMM kernels
		"""

		from torchao.quantization.quantize_.workflows.int8.int8_tensor import QuantizeTensorToInt8Kwargs
		import logging
		import re
		import types
Expand DownExpand Up		@@ -82,14 +81,17 @@
		Int4PlainInt32Tensor,
		Int4PreshuffledTensor,
		Int4Tensor,
		Int8Tensor,
		Int4TilePackedTo4dTensor,
		Int8Tensor,
		IntxChooseQParamsAlgorithm,
		IntxOpaqueTensor,
		IntxPackingFormat,
		IntxUnpackedToInt8Tensor,
		QuantizeTensorToFloat8Kwargs,
		)
		from torchao.quantization.quantize_.workflows.int8.int8_tensor import (
		QuantizeTensorToInt8Kwargs,
		)
		from torchao.quantization.transform_module import (
		_QUANTIZE_CONFIG_HANDLER,
		register_quantize_module_handler,
Expand DownExpand Up		@@ -1583,16 +1585,16 @@ def get_weight_block_size(x):
		new_weight = to_linear_activation_quantized(new_weight, input_quant_func)
		return new_weight
		else:
		activation_granularity, weight_granularity = _normalize_granularity(config.granularity)
		activation_granularity, weight_granularity = _normalize_granularity(
		config.granularity
		)
		act_quant_kwargs = QuantizeTensorToInt8Kwargs(
		activation_granularity,
		# hp_value_lb=activation_value_lb,
		# hp_value_ub=activation_value_ub,
		)
		new_weight = Int8Tensor.from_hp(
		weight,
		granularity=weight_granularity,
		act_quant_kwargs=act_quant_kwargs
		weight, granularity=weight_granularity, act_quant_kwargs=act_quant_kwargs
		)
		return new_weight

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,10 +2,6 @@
		Float8Tensor,
		QuantizeTensorToFloat8Kwargs,
		)
		from .int8.int8_tensor import (
		Int8Tensor,
		QuantizeTensorToInt8Kwargs,
		)
		from .int4.int4_choose_qparams_algorithm import Int4ChooseQParamsAlgorithm
		from .int4.int4_marlin_sparse_tensor import (
		Int4MarlinSparseTensor,
Expand All		@@ -21,6 +17,10 @@
		Int4Tensor,
		)
		from .int4.int4_tile_packed_to_4d_tensor import Int4TilePackedTo4dTensor
		from .int8.int8_tensor import (
		Int8Tensor,
		QuantizeTensorToInt8Kwargs,
		)
		from .intx.intx_choose_qparams_algorithm import IntxChooseQParamsAlgorithm
		from .intx.intx_opaque_tensor import (
		IntxOpaqueTensor,
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,7 +5,7 @@
		# LICENSE file in the root directory of this source tree.

		from dataclasses import dataclass
		from typing importOptional, List
		from typing importList, Optional

		import torch
		from torch.utils._python_dispatch import return_and_correct_aliasing
Expand DownExpand Up		@@ -34,6 +34,7 @@ class QuantizeTensorToInt8Kwargs(QuantizeTensorKwargs):
		Args:
		granularity: the granularity for the Tensor, currently either PerRow() or PerTensor()
		"""

		granularity: Granularity = PerRow()
		hp_value_lb: Optional[float] = None
		hp_value_ub: Optional[float] = None
Expand DownExpand Up		@@ -314,7 +315,7 @@ def _(func, types, args, kwargs):
		Int8Tensor(
		sliced_qdata,
		sliced_scale,
		block_size=self.block_size[1:],
		block_size=self.block_size[1:],
		act_quant_kwargs=self.act_quant_kwargs,
		dtype=self.dtype,
		),
Expand Down