Commit57079ce

authored

[None][chroe] Rename TensorRT-LLM to TensorRT LLM for source code. (#7851)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>

1 parent68b7900 commit57079ceCopy full SHA for 57079ce

File tree

148 files changed

+311

-311

lines changed

README.md
benchmarks/cpp
cpp
- include/tensorrt_llm/deep_gemm
  - compiler.cuh
- tensorrt_llm
  - batch_manager
    - cacheTransceiver.cpp
  - kernels
    - cutlass_kernels
      - fp4_gemm
        fp4_gemm_template.h
        mxfp8_mxfp4_gemm_template_sm100.h
        nvfp4_nvfp4_gemm_template_sm100.h
        nvfp4_nvfp4_gemm_template_sm120.h
      - fp8_rowwise_gemm
        fp8_rowwise_gemm_template.h
      - fpA_intB_gemm
        fpA_intB_gemm_template.h
        fpA_intB_gemm_template_sm90.h
        launchers
        fpA_intB_launcher_sm90.inl
      - fused_gated_gemm
        fused_gated_gemm_template.h
      - int8_gemm
        int8_gemm_template.h
      - low_latency_gemm
        fp8_low_latency_gemm_template.h
    - trtllmGenKernels
      - batchedGemm/trtllmGen_bmm_export
        BatchedGemmInterface.h
        KernelParams.h
      - gemmGatedAct/trtllmGen_gatedAct_export
        GemmGatedActInterface.h
        KernelParams.h
      - gemm/trtllmGen_gemm_export
        GemmInterface.h
        KernelParams.h
  - nanobind
    - bindings.cpp
  - plugins
    - bertAttentionPlugin
      - bertAttentionPlugin.cpp
    - cudaStreamPlugin
      - cudaStreamPlugin.cpp
    - eaglePlugin
      - eagleDecodeDraftTokensPlugin.cpp
      - eagleSampleAndAcceptDraftTokensPlugin.cpp
    - fusedLayernormPlugin
      - fusedLayernormPlugin.cpp
    - gemmAllReducePlugin
      - gemmAllReducePlugin.cpp
    - gemmPlugin
      - gemmPlugin.cpp
    - gptAttentionCommon
      - gptAttentionCommon.cpp
    - identityPlugin
      - identityPlugin.cpp
    - layernormQuantizationPlugin
      - layernormQuantizationPlugin.cpp
    - lookupPlugin
      - lookupPlugin.cpp
    - loraPlugin
      - loraPlugin.cpp
    - lowLatencyGemmPlugin
      - lowLatencyGemmPlugin.cpp
    - lowLatencyGemmSwigluPlugin
      - lowLatencyGemmSwigluPlugin.cpp
    - mixtureOfExperts
      - mixtureOfExpertsPlugin.cpp
    - ncclPlugin
      - allgatherPlugin.cpp
      - allreducePlugin.cpp
      - recvPlugin.cpp
      - reduceScatterPlugin.cpp
      - sendPlugin.cpp
    - qserveGemmPlugin
      - qserveGemmPlugin.cpp
    - quantizePerTokenPlugin
      - quantizePerTokenPlugin.cpp
    - quantizeTensorPlugin
      - quantizeTensorPlugin.cpp
    - quantizeToFP4Plugin
      - quantizeToFP4Plugin.cpp
    - rmsnormQuantizationPlugin
      - rmsnormQuantizationPlugin.cpp
    - smoothQuantGemmPlugin
      - smoothQuantGemmPlugin.cpp
    - weightOnlyGroupwiseQuantMatmulPlugin
      - weightOnlyGroupwiseQuantMatmulPlugin.cpp
    - weightOnlyQuantMatmulPlugin
      - weightOnlyQuantMatmulPlugin.cpp
  - pybind
    - bindings.cpp
  - runtime
    - tllmRuntime.h
- tests
  - batch_manager
    - cacheTransceiverTest.cpp
  - unit_tests
    - executor
      - ucxCommTest.cpp
    - kernels/fused_gated_gemm
      - gemmSwigluKernelTestSm90Fp8.cu
      - gemmSwigluRunnerTest.cu
examples
- apps
  - fastapi_server.py
- cpp_library
  - main.cpp
- eagle
  - convert_checkpoint.py
- generate_checkpoint_config.py
- llm-api
- medusa
  - convert_checkpoint.py
- models
  - contrib
    - baichuan
      - convert_checkpoint.py
    - bloom
      - convert_checkpoint.py
    - cogvlm
      - convert_checkpoint.py
    - dbrx
      - convert_checkpoint.py
    - deepseek_v1
      - convert_checkpoint.py
    - deepseek_v2
      - convert_checkpoint.py
    - dit
      - convert_checkpoint.py
    - falcon
      - convert_checkpoint.py
    - gptj
      - convert_checkpoint.py
    - gptneox
      - convert_checkpoint.py
    - grok
      - convert_checkpoint.py
    - mmdit
      - convert_checkpoint.py
    - mpt
      - convert_checkpoint.py
    - opt
      - convert_checkpoint.py
    - stdit
      - convert_checkpoint.py
  - core
    - bert
      - convert_checkpoint.py
    - commandr
      - convert_checkpoint.py
    - gemma
      - convert_checkpoint.py
    - glm-4-9b
      - convert_checkpoint.py
    - gpt
      - convert_checkpoint.py
    - internlm2
      - convert_checkpoint.py
    - llama
      - convert_checkpoint.py
      - summarize_long.py
    - mamba
      - convert_checkpoint.py
    - mllama
      - convert_checkpoint.py
    - multimodal
      - eval.py
    - nemotron_nas
      - convert_checkpoint.py
    - phi
      - convert_checkpoint.py
    - qwen2audio
      - run.py
    - qwenvl
      - run.py
    - qwen
      - convert_checkpoint.py
    - recurrentgemma
      - convert_checkpoint.py
    - vit
      - convert_checkpoint.py
    - whisper
      - convert_checkpoint.py
- openai_triton/manual_plugin
  - plugin.py
- redrafter
  - convert_checkpoint.py
- summarize.py
scripts
- build_wheel.py
tensorrt_llm
- __init__.py
- _common.py
- _torch
  - autotuner.py
  - pyexecutor
    - py_executor_creator.py
- _utils.py
- bench
  - benchmark
    - low_latency.py
    - throughput.py
  - build
    - build.py
  - dataclasses
    - reporting.py
- builder.py
- commands
  - build.py
- functional.py
- llmapi
  - llm.py
  - llm_utils.py
- lora_manager.py
- models
  - eagle
    - model.py
  - mmdit_sd3
    - model.py
  - model_weights_loader.py
  - modeling_utils.py
  - qwen
    - model.py
  - stdit
    - model.py
  - unet
    - embeddings.py
- module.py
- plugin
  - plugin.py
- serve/scripts
  - benchmark_dataset.py
  - benchmark_serving.py
- tools/plugin_gen/templates
  - functional.py.tpl
tests
- integration/defs
  - accuracy
    - accuracy_core.py
  - common.py
  - examples
    - test_gemma.py
  - perf
    - build.py
  - stress_test
    - stress_test.py
  - utils
    - __init__.py
- unittest
  - _torch/thop
    - test_fused_qk_norm_rope.py
  - others
    - test_plugins.py
  - tools
    - test_prepare_dataset.py
  - trt
    - functional
      - test_gemm_swiglu.py
      - test_low_latency_gemm.py
    - quantization
triton_backend/tools/inflight_batcher_llm
- benchmark_core_model.py

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+311

-311

lines changed