Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
/aoPublic

enable smoothquant for int8 static tensor#3468

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
jcaip wants to merge40 commits intomain
base:main
Choose a base branch
Loading
fromjcaip/enable-smoothquant
Open
Show file tree
Hide file tree
Changes from1 commit
Commits
Show all changes
40 commits
Select commitHold shift + click to select a range
48cdb61
Int8Tensor migration
jcaipDec 1, 2025
0b73aed
ruff fixes
jcaipDec 1, 2025
1e49945
add init
jcaipDec 1, 2025
669b6ee
fix ruff again
jcaipDec 1, 2025
9071526
update
jcaipDec 1, 2025
1539e0f
wip
jcaipDec 2, 2025
d9a2b1b
Merge branch 'main' into jcaip/int8-tensor
jcaipDec 3, 2025
673f228
undo update tests
jcaipDec 3, 2025
739fd64
fix ruff
jcaipDec 3, 2025
750db1a
fix varname
jcaipDec 3, 2025
9410488
fix typing
jcaipDec 3, 2025
45a3a76
add tests
jcaipDec 3, 2025
4e2f09c
fix dtype
jcaipDec 3, 2025
dd80cca
fix ci
jcaipDec 3, 2025
7f73062
address granularity cr
jcaipDec 4, 2025
ac6a2b6
update _choose_quant_func_and_quantize_tensor
jcaipDec 4, 2025
f28df4a
make block size required attribute
jcaipDec 4, 2025
328585e
made dtype required as well
jcaipDec 4, 2025
ce4d568
address nits
jcaipDec 4, 2025
a665d45
skip per tensor weight only test for now
jcaipDec 4, 2025
0338016
add static quant
jcaipDec 3, 2025
ee39691
add static quant
jcaipDec 4, 2025
9eb0aa9
update
jcaipDec 5, 2025
d4a1514
static quant working eager + compile
jcaipDec 6, 2025
3cdea56
remove file
jcaipDec 6, 2025
fa9022d
added asserts
jcaipDec 6, 2025
8ce5cde
undo smoothquant change
jcaipDec 6, 2025
6f64121
fix return
jcaipDec 6, 2025
8ae921d
Merge branch 'main' into jcaip/static-quant-rebased
jcaipDec 7, 2025
5b9e243
got smoothquant + int8 static working
jcaipDec 8, 2025
7a0e38f
generalized smoothquat code
jcaipDec 8, 2025
3d18edf
free tests
jcaipDec 8, 2025
9e07f8b
fix static scale check
jcaipDec 8, 2025
4274e02
update
jcaipDec 8, 2025
b5309eb
address cr feedback
jcaipDec 9, 2025
a732fee
Merge branch 'jcaip/static-quant-rebased' into jcaip/enable-smoothquant
jcaipDec 9, 2025
0c23589
Merge branch 'main' into jcaip/enable-smoothquant
jcaipDec 9, 2025
0872986
update
jcaipDec 17, 2025
049830f
fix ruff
jcaipDec 17, 2025
2586ab6
fix varname
jcaipDec 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
ruff fixes
  • Loading branch information
@jcaip
jcaip committedDec 1, 2025
commit0b73aed8bea8f26ae60a94f23e885f1a09ed0196

Some comments aren't visible on the classic Files Changed page.

3 changes: 1 addition & 2 deletionstest/quantization/test_quant_api.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -30,13 +30,12 @@
AffineQuantizedTensor,
Int4CPULayout,
Int4XPULayout,
PlainLayout,
TensorCoreTiledLayout,
)
from torchao.quantization import (
Float8Tensor,
Int8Tensor,
Int4TilePackedTo4dTensor,
Int8Tensor,
IntxUnpackedToInt8Tensor,
LinearActivationQuantizedTensor,
PerGroup,
Expand Down
2 changes: 1 addition & 1 deletiontorchao/quantization/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -93,12 +93,12 @@
)
from .quantize_.workflows import (
Float8Tensor,
Int8Tensor,
Int4MarlinSparseTensor,
Int4PlainInt32Tensor,
Int4PreshuffledTensor,
Int4Tensor,
Int4TilePackedTo4dTensor,
Int8Tensor,
IntxOpaqueTensor,
IntxUnpackedToInt8Tensor,
)
Expand Down
14 changes: 8 additions & 6 deletionstorchao/quantization/quant_api.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,7 +15,6 @@
and mixed GEMM kernels
"""

from torchao.quantization.quantize_.workflows.int8.int8_tensor import QuantizeTensorToInt8Kwargs
import logging
import re
import types
Expand DownExpand Up@@ -82,14 +81,17 @@
Int4PlainInt32Tensor,
Int4PreshuffledTensor,
Int4Tensor,
Int8Tensor,
Int4TilePackedTo4dTensor,
Int8Tensor,
IntxChooseQParamsAlgorithm,
IntxOpaqueTensor,
IntxPackingFormat,
IntxUnpackedToInt8Tensor,
QuantizeTensorToFloat8Kwargs,
)
from torchao.quantization.quantize_.workflows.int8.int8_tensor import (
QuantizeTensorToInt8Kwargs,
)
from torchao.quantization.transform_module import (
_QUANTIZE_CONFIG_HANDLER,
register_quantize_module_handler,
Expand DownExpand Up@@ -1583,16 +1585,16 @@ def get_weight_block_size(x):
new_weight = to_linear_activation_quantized(new_weight, input_quant_func)
return new_weight
else:
activation_granularity, weight_granularity = _normalize_granularity(config.granularity)
activation_granularity, weight_granularity = _normalize_granularity(
config.granularity
)
act_quant_kwargs = QuantizeTensorToInt8Kwargs(
activation_granularity,
# hp_value_lb=activation_value_lb,
# hp_value_ub=activation_value_ub,
)
new_weight = Int8Tensor.from_hp(
weight,
granularity=weight_granularity,
act_quant_kwargs=act_quant_kwargs
weight, granularity=weight_granularity, act_quant_kwargs=act_quant_kwargs
)
return new_weight

Expand Down
8 changes: 4 additions & 4 deletionstorchao/quantization/quantize_/workflows/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2,10 +2,6 @@
Float8Tensor,
QuantizeTensorToFloat8Kwargs,
)
from .int8.int8_tensor import (
Int8Tensor,
QuantizeTensorToInt8Kwargs,
)
from .int4.int4_choose_qparams_algorithm import Int4ChooseQParamsAlgorithm
from .int4.int4_marlin_sparse_tensor import (
Int4MarlinSparseTensor,
Expand All@@ -21,6 +17,10 @@
Int4Tensor,
)
from .int4.int4_tile_packed_to_4d_tensor import Int4TilePackedTo4dTensor
from .int8.int8_tensor import (
Int8Tensor,
QuantizeTensorToInt8Kwargs,
)
from .intx.intx_choose_qparams_algorithm import IntxChooseQParamsAlgorithm
from .intx.intx_opaque_tensor import (
IntxOpaqueTensor,
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -5,7 +5,7 @@
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass
from typing importOptional, List
from typing importList, Optional

import torch
from torch.utils._python_dispatch import return_and_correct_aliasing
Expand DownExpand Up@@ -34,6 +34,7 @@ class QuantizeTensorToInt8Kwargs(QuantizeTensorKwargs):
Args:
granularity: the granularity for the Tensor, currently either PerRow() or PerTensor()
"""

granularity: Granularity = PerRow()
hp_value_lb: Optional[float] = None
hp_value_ub: Optional[float] = None
Expand DownExpand Up@@ -314,7 +315,7 @@ def _(func, types, args, kwargs):
Int8Tensor(
sliced_qdata,
sliced_scale,
block_size=self.block_size[1:],
block_size=self.block_size[1:],
act_quant_kwargs=self.act_quant_kwargs,
dtype=self.dtype,
),
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp