Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit15721f1

Browse files
committed
Tidy code
Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com>
1 parent73045d8 commit15721f1

File tree

6 files changed

+10
-9
lines changed

6 files changed

+10
-9
lines changed

‎examples/llm-api/quickstart_advanced.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def add_llm_args(parser):
7474
parser.add_argument('--moe_tp_size',type=int,default=-1)
7575
parser.add_argument('--moe_cluster_size',type=int,default=-1)
7676
parser.add_argument(
77-
'--low_precision_combine',
77+
'--use_low_precision_moe_combine',
7878
default=False,
7979
action='store_true',
8080
help='Use low precision combine in MoE (only for NVFP4 quantization)')
@@ -234,7 +234,7 @@ def setup_llm(args, **kwargs):
234234
enable_piecewise_cuda_graph= \
235235
args.use_piecewise_cuda_graph)
236236
ifargs.use_torch_compileelseNone,
237-
moe_config=MoeConfig(backend=args.moe_backend,low_precision_combine=args.low_precision_combine),
237+
moe_config=MoeConfig(backend=args.moe_backend,use_low_precision_moe_combine=args.use_low_precision_moe_combine),
238238
sampler_type=args.sampler_type,
239239
max_seq_len=args.max_seq_len,
240240
max_batch_size=args.max_batch_size,

‎tensorrt_llm/_torch/model_config.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class ModelConfig(Generic[TConfig]):
118118
# IF true, disables FC2+finalize fusion in CUTLASS MoE backend
119119
moe_disable_finalize_fusion:bool=False
120120
# If true, use low precision combine in MoE operations (only for NVFP4 quantization)
121-
moe_low_precision_combine:bool=False
121+
use_low_precision_moe_combine:bool=False
122122

123123
allreduce_strategy:AllReduceStrategy=AllReduceStrategy.AUTO
124124

‎tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def __init__(
189189
self.use_postquant_alltoall= (os.environ.get(
190190
"TRTLLM_MOE_POST_QUANT_ALLTOALLV","1")
191191
=="1")andqm.has_nvfp4()
192-
self.use_low_precision_combine=model_config.moe_low_precision_combineandqm.has_nvfp4(
192+
self.use_low_precision_combine=model_config.use_low_precision_moe_combineandqm.has_nvfp4(
193193
)
194194

195195
ifself.alltoall_method_type==AlltoallMethodType.MNNVL:

‎tensorrt_llm/_torch/pyexecutor/config.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class PyTorchConfig:
6060
moe_backend:str='CUTLASS'
6161

6262
moe_disable_finalize_fusion:bool=False
63-
moe_low_precision_combine:bool=False
63+
use_low_precision_moe_combine:bool=False
6464

6565
enable_mixed_sampler:bool=False
6666
"""

‎tensorrt_llm/_torch/pyexecutor/model_engine.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,8 @@ def __init__(
307307
moe_backend=pytorch_backend_config.moe_backend,
308308
moe_disable_finalize_fusion=pytorch_backend_config.
309309
moe_disable_finalize_fusion,
310-
moe_low_precision_combine=pytorch_backend_config.
311-
moe_low_precision_combine,
310+
use_low_precision_moe_combine=pytorch_backend_config.
311+
use_low_precision_moe_combine,
312312
load_format=pytorch_backend_config.load_format,
313313
max_num_tokens=max_num_tokens,
314314
moe_max_num_tokens=pytorch_backend_config.moe_max_num_tokens,

‎tensorrt_llm/llmapi/llm_args.py‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ class MoeConfig(StrictBaseModel):
191191
"Disable FC2+finalize kernel fusion in CUTLASS MoE backend. Setting this to True recovers deterministic numerical behavior with top-k > 2."
192192
)
193193

194-
low_precision_combine:bool=Field(
194+
use_low_precision_moe_combine:bool=Field(
195195
default=False,
196196
description=
197197
"Use low precision combine in MoE operations (only for NVFP4 quantization). When enabled, uses lower precision for combining expert outputs to improve performance."
@@ -2592,7 +2592,8 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
25922592
moe_load_balancer=self.moe_config.load_balancer,
25932593
attn_backend=self.attn_backend,
25942594
moe_backend=self.moe_config.backend,
2595-
moe_low_precision_combine=self.moe_config.low_precision_combine,
2595+
use_low_precision_moe_combine=self.moe_config.
2596+
use_low_precision_moe_combine,
25962597
enable_mixed_sampler=self.enable_mixed_sampler,
25972598
sampler_type=self.sampler_type,
25982599
kv_cache_dtype=self.kv_cache_config.dtype,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp