Commit15721f1

committed

Tidy code

Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com>

1 parent73045d8 commit15721f1Copy full SHA for 15721f1

File tree

6 files changed

+10

-9

lines changed

examples/llm-api
- quickstart_advanced.py
tensorrt_llm
- _torch
  - model_config.py
  - modules/fused_moe
    - fused_moe_wide_ep.py
  - pyexecutor
    - config.py
    - model_engine.py
- llmapi
  - llm_args.py

6 files changed

+10

-9

lines changed

`‎examples/llm-api/quickstart_advanced.py‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ def add_llm_args(parser):`
`74`	`74`	`parser.add_argument('--moe_tp_size',type=int,default=-1)`
`75`	`75`	`parser.add_argument('--moe_cluster_size',type=int,default=-1)`
`76`	`76`	`parser.add_argument(`
`77`		`-'--low_precision_combine',`
	`77`	`+'--use_low_precision_moe_combine',`
`78`	`78`	`default=False,`
`79`	`79`	`action='store_true',`
`80`	`80`	`help='Use low precision combine in MoE (only for NVFP4 quantization)')`
`@@ -234,7 +234,7 @@ def setup_llm(args, **kwargs):`
`234`	`234`	`enable_piecewise_cuda_graph= \`
`235`	`235`	`args.use_piecewise_cuda_graph)`
`236`	`236`	`ifargs.use_torch_compileelseNone,`
`237`		`-moe_config=MoeConfig(backend=args.moe_backend,low_precision_combine=args.low_precision_combine),`
	`237`	`+moe_config=MoeConfig(backend=args.moe_backend,use_low_precision_moe_combine=args.use_low_precision_moe_combine),`
`238`	`238`	`sampler_type=args.sampler_type,`
`239`	`239`	`max_seq_len=args.max_seq_len,`
`240`	`240`	`max_batch_size=args.max_batch_size,`

`‎tensorrt_llm/_torch/model_config.py‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ class ModelConfig(Generic[TConfig]):`
`118`	`118`	`# IF true, disables FC2+finalize fusion in CUTLASS MoE backend`
`119`	`119`	`moe_disable_finalize_fusion:bool=False`
`120`	`120`	`# If true, use low precision combine in MoE operations (only for NVFP4 quantization)`
`121`		`-moe_low_precision_combine:bool=False`
	`121`	`+use_low_precision_moe_combine:bool=False`
`122`	`122`
`123`	`123`	`allreduce_strategy:AllReduceStrategy=AllReduceStrategy.AUTO`
`124`	`124`

`‎tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ def __init__(`
`189`	`189`	`self.use_postquant_alltoall= (os.environ.get(`
`190`	`190`	`"TRTLLM_MOE_POST_QUANT_ALLTOALLV","1")`
`191`	`191`	`=="1")andqm.has_nvfp4()`
`192`		`-self.use_low_precision_combine=model_config.moe_low_precision_combineandqm.has_nvfp4(`
	`192`	`+self.use_low_precision_combine=model_config.use_low_precision_moe_combineandqm.has_nvfp4(`
`193`	`193`	`)`
`194`	`194`
`195`	`195`	`ifself.alltoall_method_type==AlltoallMethodType.MNNVL:`

`‎tensorrt_llm/_torch/pyexecutor/config.py‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ class PyTorchConfig:`
`60`	`60`	`moe_backend:str='CUTLASS'`
`61`	`61`
`62`	`62`	`moe_disable_finalize_fusion:bool=False`
`63`		`-moe_low_precision_combine:bool=False`
	`63`	`+use_low_precision_moe_combine:bool=False`
`64`	`64`
`65`	`65`	`enable_mixed_sampler:bool=False`
`66`	`66`	`"""`

`‎tensorrt_llm/_torch/pyexecutor/model_engine.py‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -307,8 +307,8 @@ def __init__(`
`307`	`307`	`moe_backend=pytorch_backend_config.moe_backend,`
`308`	`308`	`moe_disable_finalize_fusion=pytorch_backend_config.`
`309`	`309`	`moe_disable_finalize_fusion,`
`310`		`-moe_low_precision_combine=pytorch_backend_config.`
`311`		`-moe_low_precision_combine,`
	`310`	`+use_low_precision_moe_combine=pytorch_backend_config.`
	`311`	`+use_low_precision_moe_combine,`
`312`	`312`	`load_format=pytorch_backend_config.load_format,`
`313`	`313`	`max_num_tokens=max_num_tokens,`
`314`	`314`	`moe_max_num_tokens=pytorch_backend_config.moe_max_num_tokens,`

`‎tensorrt_llm/llmapi/llm_args.py‎`

Lines changed: 3 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -191,7 +191,7 @@ class MoeConfig(StrictBaseModel):`
`191`	`191`	`"Disable FC2+finalize kernel fusion in CUTLASS MoE backend. Setting this to True recovers deterministic numerical behavior with top-k > 2."`
`192`	`192`	`)`
`193`	`193`
`194`		`-low_precision_combine:bool=Field(`
	`194`	`+use_low_precision_moe_combine:bool=Field(`
`195`	`195`	`default=False,`
`196`	`196`	`description=`
`197`	`197`	`"Use low precision combine in MoE operations (only for NVFP4 quantization). When enabled, uses lower precision for combining expert outputs to improve performance."`
`@@ -2592,7 +2592,8 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":`
`2592`	`2592`	`moe_load_balancer=self.moe_config.load_balancer,`
`2593`	`2593`	`attn_backend=self.attn_backend,`
`2594`	`2594`	`moe_backend=self.moe_config.backend,`
`2595`		`-moe_low_precision_combine=self.moe_config.low_precision_combine,`
	`2595`	`+use_low_precision_moe_combine=self.moe_config.`
	`2596`	`+use_low_precision_moe_combine,`
`2596`	`2597`	`enable_mixed_sampler=self.enable_mixed_sampler,`
`2597`	`2598`	`sampler_type=self.sampler_type,`
`2598`	`2599`	`kv_cache_dtype=self.kv_cache_config.dtype,`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit15721f1

File tree

6 files changed

6 files changed

`‎examples/llm-api/quickstart_advanced.py‎`

`‎tensorrt_llm/_torch/model_config.py‎`

`‎tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py‎`

`‎tensorrt_llm/_torch/pyexecutor/config.py‎`

`‎tensorrt_llm/_torch/pyexecutor/model_engine.py‎`

`‎tensorrt_llm/llmapi/llm_args.py‎`

0 commit comments