Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit81e9d5c

Browse files
committed
rename chunked_mode to chunked_logits
Signed-off-by: Yibin Li <109242046+yibinl-nvidia@users.noreply.github.com>
1 parentb20f62d commit81e9d5c

File tree

2 files changed

+15
-14
lines changed

2 files changed

+15
-14
lines changed

‎tensorrt_llm/_torch/pyexecutor/handle_logits.py‎

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ def __call__(
7979
logits_view=logits[logits_begin:logits_end].reshape(
8080
1,beam_width,-1)
8181
llm_req.py_result.append_generation_logits(logits_view)
82-
83-
# Finalize any remaining logits transfers for all requests(chunked mode)
82+
83+
# Finalize any remaining logits transfers for all requestsinchunked mode
8484
forllm_reqinchain(context_requests,generation_requests):
85-
ifllm_req.py_return_generation_logitsorllm_req.py_return_context_logits:
86-
llm_req.py_result.post_processing_transfer()
85+
ifllm_req.py_use_chunked_logits:
86+
ifllm_req.py_return_generation_logitsorllm_req.py_return_context_logits:
87+
llm_req.py_result.post_processing_transfer()

‎tensorrt_llm/_torch/pyexecutor/llm_request.py‎

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self,
4747
seq_length:int,
4848
use_device_memory=True,
4949
should_exclude_last=False,
50-
chunked_mode=False,
50+
use_chunked_logits=False,
5151
streaming=False,
5252
chunk_size=8):
5353
ifshould_exclude_last:
@@ -57,7 +57,7 @@ def __init__(self,
5757
self.seq_length=seq_length
5858
self.use_device_memory=use_device_memory
5959
self._should_exclude_last=should_exclude_last
60-
self.chunked_mode=chunked_mode
60+
self.use_chunked_logits=use_chunked_logits
6161
self.chunk_size=chunk_size
6262
self.streaming=streaming
6363
self._logits_indices= []
@@ -68,7 +68,7 @@ def __init__(self,
6868
self.vocab_size=-1
6969

7070
# Chunked mode: device-side fragments
71-
ifchunked_mode:
71+
ifuse_chunked_logits:
7272
self._device_fragments:List[torch.Tensor]= []
7373
self._current_position=0
7474

@@ -105,7 +105,7 @@ def append(self, logits: torch.Tensor):
105105
logits=logits.unsqueeze(1)
106106
assertlogits.ndim==3,f"Bad logits shape, expect [num_tokens, beam_width, vocab_size], got{logits.shape}"
107107

108-
ifself.chunked_mode:
108+
ifself.use_chunked_logits:
109109
ifself.beam_width==-1:
110110
self._init_chunked_storage(logits)
111111
self._add_fragment(logits)
@@ -181,7 +181,7 @@ def _transfer_chunk_to_host(self):
181181

182182
deffinalize_transfer(self):
183183
"""Force transfer of any remaining fragments to host (for chunked mode)"""
184-
ifself.chunked_modeandhasattr(
184+
ifself.use_chunked_logitsandhasattr(
185185
self,'_device_fragments')andself._device_fragments:
186186
self._transfer_chunk_to_host()
187187

@@ -243,20 +243,20 @@ def __init__(self,
243243
return_context_logits:bool=False,
244244
return_generation_logits:bool=False,
245245
exclude_last_generation_logits:bool=False,
246-
chunked_mode:bool=False,
246+
use_chunked_logits:bool=True,
247247
chunk_size:int=8):
248248
self._streaming=streaming
249249
self._context_logits=LogitsStorage(
250250
prompt_len,
251251
use_device_memory,
252-
chunked_mode=chunked_mode,
252+
use_chunked_logits=use_chunked_logits,
253253
streaming=streaming,
254254
chunk_size=chunk_size)ifreturn_context_logitselseNone
255255
self._generation_logits=LogitsStorage(
256256
max_new_tokens,
257257
use_device_memory,
258258
exclude_last_generation_logits,
259-
chunked_mode=chunked_mode,
259+
use_chunked_logits=use_chunked_logits,
260260
streaming=streaming,
261261
chunk_size=chunk_size)ifreturn_generation_logitselseNone
262262
self._log_probs=LogProbStorage()ifreturn_log_probselseNone
@@ -394,7 +394,7 @@ def __init__(
394394
is_draft:bool=False,
395395
seq_slot:Optional[int]=None,
396396
target_seq_slot:Optional[int]=None,
397-
use_chunked_logits:bool=False,
397+
use_chunked_logits:bool=True,
398398
logits_chunk_size:int=8,
399399
**kwargs):
400400

@@ -466,7 +466,7 @@ def __init__(
466466
return_context_logits,
467467
return_generation_logits,
468468
exclude_last_generation_logits,
469-
chunked_mode=use_chunked_logits,
469+
use_chunked_logits=use_chunked_logits,
470470
chunk_size=logits_chunk_size)
471471
self.child_requests= []
472472

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp