Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
/aoPublic

enable smoothquant for int8 static tensor#3468

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
jcaip wants to merge40 commits intomain
base:main
Choose a base branch
Loading
fromjcaip/enable-smoothquant
Open
Show file tree
Hide file tree
Changes from1 commit
Commits
Show all changes
40 commits
Select commitHold shift + click to select a range
48cdb61
Int8Tensor migration
jcaipDec 1, 2025
0b73aed
ruff fixes
jcaipDec 1, 2025
1e49945
add init
jcaipDec 1, 2025
669b6ee
fix ruff again
jcaipDec 1, 2025
9071526
update
jcaipDec 1, 2025
1539e0f
wip
jcaipDec 2, 2025
d9a2b1b
Merge branch 'main' into jcaip/int8-tensor
jcaipDec 3, 2025
673f228
undo update tests
jcaipDec 3, 2025
739fd64
fix ruff
jcaipDec 3, 2025
750db1a
fix varname
jcaipDec 3, 2025
9410488
fix typing
jcaipDec 3, 2025
45a3a76
add tests
jcaipDec 3, 2025
4e2f09c
fix dtype
jcaipDec 3, 2025
dd80cca
fix ci
jcaipDec 3, 2025
7f73062
address granularity cr
jcaipDec 4, 2025
ac6a2b6
update _choose_quant_func_and_quantize_tensor
jcaipDec 4, 2025
f28df4a
make block size required attribute
jcaipDec 4, 2025
328585e
made dtype required as well
jcaipDec 4, 2025
ce4d568
address nits
jcaipDec 4, 2025
a665d45
skip per tensor weight only test for now
jcaipDec 4, 2025
0338016
add static quant
jcaipDec 3, 2025
ee39691
add static quant
jcaipDec 4, 2025
9eb0aa9
update
jcaipDec 5, 2025
d4a1514
static quant working eager + compile
jcaipDec 6, 2025
3cdea56
remove file
jcaipDec 6, 2025
fa9022d
added asserts
jcaipDec 6, 2025
8ce5cde
undo smoothquant change
jcaipDec 6, 2025
6f64121
fix return
jcaipDec 6, 2025
8ae921d
Merge branch 'main' into jcaip/static-quant-rebased
jcaipDec 7, 2025
5b9e243
got smoothquant + int8 static working
jcaipDec 8, 2025
7a0e38f
generalized smoothquat code
jcaipDec 8, 2025
3d18edf
free tests
jcaipDec 8, 2025
9e07f8b
fix static scale check
jcaipDec 8, 2025
4274e02
update
jcaipDec 8, 2025
b5309eb
address cr feedback
jcaipDec 9, 2025
a732fee
Merge branch 'jcaip/static-quant-rebased' into jcaip/enable-smoothquant
jcaipDec 9, 2025
0c23589
Merge branch 'main' into jcaip/enable-smoothquant
jcaipDec 9, 2025
0872986
update
jcaipDec 17, 2025
049830f
fix ruff
jcaipDec 17, 2025
2586ab6
fix varname
jcaipDec 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
fix ci
  • Loading branch information
@jcaip
jcaip committedDec 3, 2025
commitdd80cca3fc7f95b453ebe148b688d15b5e844877

Some comments aren't visible on the classic Files Changed page.

33 changes: 13 additions & 20 deletionstest/quantization/quantize_/workflows/int8/test_int8_tensor.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -27,12 +27,6 @@
INT8_TEST_CONFIGS = [
Int8WeightOnlyConfig(version=2, granularity=PerTensor()),
Int8WeightOnlyConfig(version=2, granularity=PerRow()),
Int8DynamicActivationInt8WeightConfig(
version=2, granularity=PerTensor(), act_mapping_type=MappingType.ASYMMETRIC
),
Int8DynamicActivationInt8WeightConfig(
version=2, granularity=PerRow(), act_mapping_type=MappingType.ASYMMETRIC
),
Int8DynamicActivationInt8WeightConfig(
version=2, granularity=PerTensor(), act_mapping_type=MappingType.SYMMETRIC
),
Expand DownExpand Up@@ -77,13 +71,8 @@ def test_creation_and_attributes(self, config):
elif isinstance(config.granularity, PerTensor):
self.assertEqual(w.scale.shape, (1, 1))

if config.act_mapping_type == MappingType.SYMMETRIC:
self.assertEqual(w.zero_point, None)
elif config.act_mapping_type == MappingType.ASYMMETRIC:
if isinstance(config.granularity, PerRow):
self.assertEqual(w.zero_point.shape, (w.shape[0], 1))
elif isinstance(config.granularity, PerTensor):
self.assertEqual(w.zero_point.shape, (1, 1))
if hasattr(config, "act_mapping_type"):
self.assertEqual(w.act_quant_kwargs.mapping_type, config.act_mapping_type)

@common_utils.parametrize("dtype", [torch.bfloat16, torch.float32])
@common_utils.parametrize("compile", [True, False])
Expand All@@ -103,6 +92,8 @@ def test_int8_linear_variants(
sizes: tuple,
):
"""Test linear operation supports including shape and compile"""
torch.compiler.reset()

M, N, K = sizes
input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda")
model = ToyTwoLinearModel(K, N, K, dtype=dtype, device="cuda").eval()
Expand All@@ -118,7 +109,6 @@ def test_int8_linear_variants(
self.assertEqual(model_q.linear2.weight.scale.ndim, 2)

if compile:
torch.compiler.reset()
model_q = torch.compile(model_q, fullgraph=True)

output_fp = model(input_tensor)
Expand DownExpand Up@@ -146,21 +136,24 @@ def test_slice(self, config, device, dtype):

self.assertEqual(weight1.qdata, dummy.weight.qdata.narrow(0, 0, slice_sizes[0]))
self.assertEqual(weight2.qdata, dummy.weight.qdata.narrow(1, 0, slice_sizes[1]))
self.assertEqual(weight1.scale, dummy.weight.scale.narrow(0, 0, slice_sizes[0]))

if isinstance(config.granularity, PerRow):
self.assertEqual(
weight1.scale, dummy.weight.scale.narrow(0, 0, slice_sizes[0])
)

self.assertEqual(weight2.scale, dummy.weight.scale)
with self.assertRaises(NotImplementedError):
_ = dummy.weight[::2]

@common_utils.parametrize("config", INT8_TEST_CONFIGS)
@common_utils.parametrize("granularity", [PerTensor(), PerRow()])
def test_index_select(self, config, granularity):
def test_index_select(self, config):
"""test that `x_0 = x[0]` works when `x` is a 2D quantized tensor."""
N, K = 256, 512
x = torch.randn(N, K, device="cuda", dtype=torch.bfloat16)
linear = torch.nn.Linear(K, N, bias=False, dtype=torch.bfloat16, device="cuda")
linear.weight.data = x

config = config(version=2, granularity=granularity)
quantize_(linear, config)

x_int8 = linear.weight
Expand All@@ -172,11 +165,11 @@ def test_index_select(self, config, granularity):
)

# Test block_size granularity
if isinstance(granularity, PerRow):
if isinstance(config.granularity, PerRow):
self.assertEqual(
list(get_block_size(x_int8.shape, config.granularity)), [1, K]
)
elif isinstance(granularity, PerTensor):
elif isinstance(config.granularity, PerTensor):
self.assertEqual(
list(get_block_size(x_int8.shape, config.granularity)), [N, K]
)
Expand Down
2 changes: 1 addition & 1 deletiontorchao/quantization/quant_api.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1608,7 +1608,7 @@ def get_weight_block_size(x):
granularity=config.granularity,
act_quant_kwargs=QuantizeTensorToInt8Kwargs(
granularity=act_granularity,
act_mapping_type=config.act_mapping_type,
mapping_type=config.act_mapping_type,
),
)

Expand Down
14 changes: 9 additions & 5 deletionstorchao/quantization/quantize_/workflows/int8/int8_tensor.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -37,12 +37,14 @@ class QuantizeTensorToInt8Kwargs(QuantizeTensorKwargs):
"""

granularity: Granularity = PerRow()
act_mapping_type: MappingType = MappingType.SYMMETRIC
mapping_type: MappingType = MappingType.SYMMETRIC


class Int8Tensor(TorchAOBaseTensor):
"""
int8 quantized tensor with plain layout
int8 quantized tensor with plain layout.

Currently only Symmetric quantization is supported.

Tensor Attributes:
qdata: (N, K) or (B, N, K) int8 quantized weight data (2D or 3D)
Expand DownExpand Up@@ -73,7 +75,7 @@ def __new__(
):
kwargs = {
"device": qdata.device,
"dtype": dtype,
"dtype": dtype or scale.dtype,
"requires_grad": False,
}
return torch.Tensor._make_wrapper_subclass(cls, qdata.shape, **kwargs)
Expand DownExpand Up@@ -110,14 +112,15 @@ def from_hp(
hp_tensor: torch.Tensor,
granularity: Granularity = PerRow(),
act_quant_kwargs: Optional[QuantizeTensorToInt8Kwargs] = None,
mapping_type=MappingType.SYMMETRIC,
):
"""Create Int8Tensor from high-precision tensor"""
block_size = get_block_size(hp_tensor.shape, granularity)
block_size = list(block_size)

scale, zero_point = choose_qparams_affine(
input=hp_tensor,
mapping_type=MappingType.SYMMETRIC,
mapping_type=mapping_type,
block_size=block_size,
target_dtype=torch.int8,
quant_min=-128,
Expand DownExpand Up@@ -179,7 +182,8 @@ def _(func, types, args, kwargs):

if weight_tensor.act_quant_kwargs is not None:
activation_tensor = Int8Tensor.from_hp(
activation_tensor, weight_tensor.act_quant_kwargs.granularity
activation_tensor,
granularity=weight_tensor.act_quant_kwargs.granularity,
)
# Dynamic activation quantization path

Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp