Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9458741

Browse files
committed
rename chunked_mode to chunked_logits
Signed-off-by: Yibin Li <109242046+yibinl-nvidia@users.noreply.github.com>
1 parent9788010 commit9458741

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

‎tensorrt_llm/_torch/pyexecutor/handle_logits.py‎

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ def __call__(
8080
1,beam_width,-1)
8181
llm_req.py_result.append_generation_logits(logits_view)
8282

83-
# Finalize any remaining logits transfers for all requests(chunked mode)
83+
# Finalize any remaining logits transfers for all requestsinchunked mode
8484
forllm_reqinchain(context_requests,generation_requests):
85-
ifllm_req.py_return_generation_logitsorllm_req.py_return_context_logits:
86-
llm_req.py_result.post_processing_transfer()
85+
ifllm_req.py_use_chunked_logits:
86+
ifllm_req.py_return_generation_logitsorllm_req.py_return_context_logits:
87+
llm_req.py_result.post_processing_transfer()

‎tensorrt_llm/_torch/pyexecutor/llm_request.py‎

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(self,
4545
seq_length:int,
4646
use_device_memory=True,
4747
should_exclude_last=False,
48-
chunked_mode=False,
48+
use_chunked_logits=False,
4949
streaming=False,
5050
chunk_size=8):
5151
ifshould_exclude_last:
@@ -55,7 +55,7 @@ def __init__(self,
5555
self.seq_length=seq_length
5656
self.use_device_memory=use_device_memory
5757
self._should_exclude_last=should_exclude_last
58-
self.chunked_mode=chunked_mode
58+
self.use_chunked_logits=use_chunked_logits
5959
self.chunk_size=chunk_size
6060
self.streaming=streaming
6161
self._logits_indices= []
@@ -66,7 +66,7 @@ def __init__(self,
6666
self.vocab_size=-1
6767

6868
# Chunked mode: device-side fragments
69-
ifchunked_mode:
69+
ifuse_chunked_logits:
7070
self._device_fragments:List[torch.Tensor]= []
7171
self._current_position=0
7272

@@ -103,7 +103,7 @@ def append(self, logits: torch.Tensor):
103103
logits=logits.unsqueeze(1)
104104
assertlogits.ndim==3,f"Bad logits shape, expect [num_tokens, beam_width, vocab_size], got{logits.shape}"
105105

106-
ifself.chunked_mode:
106+
ifself.use_chunked_logits:
107107
ifself.beam_width==-1:
108108
self._init_chunked_storage(logits)
109109
self._add_fragment(logits)
@@ -179,7 +179,7 @@ def _transfer_chunk_to_host(self):
179179

180180
deffinalize_transfer(self):
181181
"""Force transfer of any remaining fragments to host (for chunked mode)"""
182-
ifself.chunked_modeandhasattr(
182+
ifself.use_chunked_logitsandhasattr(
183183
self,'_device_fragments')andself._device_fragments:
184184
self._transfer_chunk_to_host()
185185

@@ -241,20 +241,20 @@ def __init__(self,
241241
return_context_logits:bool=False,
242242
return_generation_logits:bool=False,
243243
exclude_last_generation_logits:bool=False,
244-
chunked_mode:bool=False,
244+
use_chunked_logits:bool=True,
245245
chunk_size:int=8):
246246
self._streaming=streaming
247247
self._context_logits=LogitsStorage(
248248
prompt_len,
249249
use_device_memory,
250-
chunked_mode=chunked_mode,
250+
use_chunked_logits=use_chunked_logits,
251251
streaming=streaming,
252252
chunk_size=chunk_size)ifreturn_context_logitselseNone
253253
self._generation_logits=LogitsStorage(
254254
max_new_tokens,
255255
use_device_memory,
256256
exclude_last_generation_logits,
257-
chunked_mode=chunked_mode,
257+
use_chunked_logits=use_chunked_logits,
258258
streaming=streaming,
259259
chunk_size=chunk_size)ifreturn_generation_logitselseNone
260260
self._log_probs=LogProbStorage()ifreturn_log_probselseNone
@@ -392,7 +392,7 @@ def __init__(
392392
is_draft:bool=False,
393393
seq_slot:Optional[int]=None,
394394
target_seq_slot:Optional[int]=None,
395-
use_chunked_logits:bool=False,
395+
use_chunked_logits:bool=True,
396396
logits_chunk_size:int=8,
397397
**kwargs):
398398

@@ -464,7 +464,7 @@ def __init__(
464464
return_context_logits,
465465
return_generation_logits,
466466
exclude_last_generation_logits,
467-
chunked_mode=use_chunked_logits,
467+
use_chunked_logits=use_chunked_logits,
468468
chunk_size=logits_chunk_size)
469469
self.child_requests= []
470470

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp