Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit57079ce

Browse files
authored
[None][chroe] Rename TensorRT-LLM to TensorRT LLM for source code. (#7851)
Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
1 parent68b7900 commit57079ce

File tree

148 files changed

+311
-311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+311
-311
lines changed

‎README.md‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ TensorRT LLM
2525
*[08/01] Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)
2626
[➡️ link](./docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md)
2727

28-
*[07/26] N-Gram Speculative Decoding in TensorRTLLM
28+
*[07/26] N-Gram Speculative Decoding in TensorRTLLM
2929
[➡️ link](./docs/source/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md)
3030

3131
*[06/19] Disaggregated Serving in TensorRT LLM

‎benchmarks/cpp/bertBenchmark.cpp‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da
135135

136136
intmain(int argc,char* argv[])
137137
{
138-
cxxopts::Optionsoptions("TensorRT-LLM C++ Runtime Benchmark","TensorRT-LLM C++ Runtime Benchmark for BERT.");
138+
cxxopts::Optionsoptions("TensorRTLLM C++ Runtime Benchmark","TensorRTLLM C++ Runtime Benchmark for BERT.");
139139
options.add_options()("h,help","Print usage");
140140
options.add_options()(
141141
"m,model","Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));

‎benchmarks/cpp/disaggServerBenchmark.cpp‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
11451145
intmain(int argc,char* argv[])
11461146

11471147
{
1148-
cxxopts::Optionsoptions("TensorRT-LLm DisaggServer Benchmark");
1148+
cxxopts::Optionsoptions("TensorRT LLM DisaggServer Benchmark");
11491149
options.add_options()("h,help","Print usage");
11501150
options.add_options()("context_engine_dirs","Directories that store context engines,separator is a ,",
11511151
cxxopts::value<std::vector<std::string>>());

‎benchmarks/cpp/gptManagerBenchmark.cpp‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine
10551055
intmain(int argc,char* argv[])
10561056
{
10571057
cxxopts::Optionsoptions(
1058-
"TensorRT-LLM BatchManager Benchmark","TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");
1058+
"TensorRTLLM BatchManager Benchmark","TensorRTLLM BatchManager Benchmark for GPT and GPT-like models.");
10591059
options.add_options()("h,help","Print usage");
10601060
options.add_options()("engine_dir, decoder_engine_dir","Directory that store the engines of decoder models.",
10611061
cxxopts::value<std::string>());

‎cpp/include/tensorrt_llm/deep_gemm/compiler.cuh‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()
217217
}
218218
else
219219
{
220-
TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");
220+
TLLM_LOG_WARNING("Failed to find TensorRTLLM installation, DeepGEMM will be disabled.");
221221
}
222222
}
223223
return includeDirs;

‎cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa
165165
{
166166
void* ret =dllGetSym(handle, name);
167167
TLLM_CHECK_WITH_INFO(ret !=nullptr,
168-
"Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not"
168+
"Unable to load UCX wrapper library symbol, possible cause is that TensorRTLLM library is not"
169169
"built with UCX support, please rebuild in UCX-enabled environment.");
170170
return ret;
171171
};

‎cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h‎

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm100(T* D, void const* A, void const*
105105
break;
106106
default:
107107
throwstd::runtime_error(
108-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
108+
"[TensorRTLLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
109109
break;
110110
}
111111
}
@@ -146,15 +146,15 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
146146
occupancy);
147147
break;
148148
case tkc::CutlassTileConfigSM100::Undefined:
149-
throwstd::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
149+
throwstd::runtime_error("[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
150150
break;
151151
case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
152152
throwstd::runtime_error(
153-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by"
153+
"[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by"
154154
"heuristic.");
155155
break;
156156
default:
157-
throwstd::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
157+
throwstd::runtime_error("[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
158158
break;
159159
}
160160
}
@@ -177,7 +177,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*
177177
break;
178178
default:
179179
throwstd::runtime_error(
180-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
180+
"[TensorRTLLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
181181
break;
182182
}
183183
}
@@ -205,16 +205,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,
205205
occupancy);
206206
break;
207207
case tkc::CutlassTileConfigSM120::Undefined:
208-
throwstd::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
208+
throwstd::runtime_error("[TensorRTLLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
209209
break;
210210
case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:
211211
throwstd::runtime_error(
212-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by"
212+
"[TensorRTLLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by"
213213
"heuristic.");
214214
break;
215215
default:
216216
throwstd::runtime_error(
217-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
217+
"[TensorRTLLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
218218
break;
219219
}
220220
}
@@ -257,7 +257,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*
257257
break;
258258
default:
259259
throwstd::runtime_error(
260-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
260+
"[TensorRTLLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
261261
break;
262262
}
263263
}
@@ -293,15 +293,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
293293
occupancy);
294294
break;
295295
case tkc::CutlassTileConfigSM100::Undefined:
296-
throwstd::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
296+
throwstd::runtime_error("[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
297297
break;
298298
case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
299299
throwstd::runtime_error(
300-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by"
300+
"[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by"
301301
"heuristic.");
302302
break;
303303
default:
304-
throwstd::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
304+
throwstd::runtime_error("[TensorRTLLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
305305
break;
306306
}
307307
}
@@ -338,7 +338,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
338338
else
339339
{
340340
throwstd::runtime_error(
341-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
341+
"[TensorRTLLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
342342
}
343343
}
344344
elseifconstexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)
@@ -356,13 +356,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
356356
else
357357
{
358358
throwstd::runtime_error(
359-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
359+
"[TensorRTLLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
360360
}
361361
}
362362
else
363363
{
364364
throwstd::runtime_error(
365-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
365+
"[TensorRTLLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
366366
}
367367
}
368368

‎cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h‎

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
9393
int* occupancy)
9494
{
9595
throwstd::runtime_error(
96-
"[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture.");
96+
"[TensorRTLLM Error][FP4 gemm Runner] TensorRTLLM is not compiled with support for this Architecture.");
9797
}
9898

9999
#else
@@ -250,7 +250,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
250250
{
251251
std::string errMsg ="SMEM size exceeds maximum allowed. Required" +std::to_string(smem_size) +", got"
252252
+std::to_string(mMaxSmemSize);
253-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg);
253+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg);
254254
}
255255
/* // Return workspace size*/
256256
if (!A && !B && !D)
@@ -261,28 +261,28 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
261261
{
262262
std::stringerrMsg("Requested workspace size insufficient. Required"
263263
+std::to_string(gemm.get_workspace_size(args)) +", got" +std::to_string(workspaceBytes));
264-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg);
264+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg);
265265
}
266266
auto can_implement = gemm.can_implement(args);
267267
if (can_implement != cutlass::Status::kSuccess)
268268
{
269269
std::string errMsg ="MXFP8xMXFP4 Gemm cutlass kernel will fail for params. Error:"
270270
+std::string(cutlassGetStatusString(can_implement));
271-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg);
271+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg);
272272
}
273273
auto initStatus = gemm.initialize(args, workspace, stream);
274274
if (initStatus != cutlass::Status::kSuccess)
275275
{
276276
std::string errMsg ="Failed to initialize cutlass MXFP8xMXFP4 gemm. Error:"
277277
+std::string(cutlassGetStatusString(initStatus));
278-
throwstd::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner]" + errMsg);
278+
throwstd::runtime_error("[TensorRTLLM Error][MXFP8xMXFP4 gemm Runner]" + errMsg);
279279
}
280280
auto runStatus = gemm.run(args, workspace, stream,nullptr,tensorrt_llm::common::getEnvEnablePDL());
281281
if (runStatus != cutlass::Status::kSuccess)
282282
{
283283
std::string errMsg
284284
="Failed to run cutlass MXFP8xMXFP4 gemm. Error:" +std::string(cutlassGetStatusString(runStatus));
285-
throwstd::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner]" + errMsg);
285+
throwstd::runtime_error("[TensorRTLLM Error][MXFP8xMXFP4 gemm Runner]" + errMsg);
286286
}
287287
return gemm.get_workspace_size(args);
288288
}

‎cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h‎

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
107107
int* occupancy) \
108108
{ \
109109
throwstd::runtime_error( \
110-
"[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture."); \
110+
"[TensorRTLLM Error][FP4 gemm Runner] TensorRTLLM is not compiled with support for this Architecture."); \
111111
}
112112

113113
#else
@@ -268,7 +268,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
268268
{ \
269269
std::string errMsg ="SMEM size exceeds maximum allowed. Required" +std::to_string(smem_size) +", got" \
270270
+std::to_string(mMaxSmemSize); \
271-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg); \
271+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg); \
272272
} \
273273
/* // Return workspace size*/ \
274274
if (!A && !B && !D) \
@@ -279,28 +279,28 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
279279
{ \
280280
std::stringerrMsg("Requested workspace size insufficient. Required" \
281281
+std::to_string(gemm.get_workspace_size(args)) +", got" +std::to_string(workspaceBytes)); \
282-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg); \
282+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg); \
283283
} \
284284
auto can_implement = gemm.can_implement(args); \
285285
if (can_implement != cutlass::Status::kSuccess) \
286286
{ \
287287
std::string errMsg ="FP4 Gemm cutlass kernel will fail for params. Error:" \
288288
+std::string(cutlassGetStatusString(can_implement)); \
289-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg); \
289+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg); \
290290
} \
291291
auto initStatus = gemm.initialize(args, workspace, stream); \
292292
if (initStatus != cutlass::Status::kSuccess) \
293293
{ \
294294
std::string errMsg \
295295
="Failed to initialize cutlass FP4 gemm. Error:" +std::string(cutlassGetStatusString(initStatus)); \
296-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg); \
296+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg); \
297297
} \
298298
auto runStatus = gemm.run(args, workspace, stream,nullptr,tensorrt_llm::common::getEnvEnablePDL()); \
299299
if (runStatus != cutlass::Status::kSuccess) \
300300
{ \
301301
std::string errMsg \
302302
="Failed to run cutlass FP4 gemm. Error:" +std::string(cutlassGetStatusString(runStatus)); \
303-
throwstd::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner]" + errMsg); \
303+
throwstd::runtime_error("[TensorRTLLM Error][FP4 gemm Runner]" + errMsg); \
304304
} \
305305
return gemm.get_workspace_size(args); \
306306
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp