Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit89889fb

Browse files
authored
[https://nvbugs/5369366] [fix] Report failing requests (#7060)
Signed-off-by: Rashid Kaleem <4079439+arekay@users.noreply.github.com>
1 parent08a0e06 commit89889fb

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

‎tensorrt_llm/serve/scripts/backend_request_func.py‎

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class RequestFuncOutput:
4646
prompt_len:int=0
4747
error:str=""
4848
avg_decoded_tokens_per_iter:float=0.0# Average tokens decoded per iteration
49+
exception_type:str=None# unset
4950

5051

5152
asyncdefasync_request_trt_llm(
@@ -132,10 +133,11 @@ async def async_request_trt_llm(
132133
else:
133134
output.error=response.reasonor""
134135
output.success=False
135-
exceptException:
136+
exceptExceptionase:
136137
output.success=False
137138
exc_info=sys.exc_info()
138139
output.error="".join(traceback.format_exception(*exc_info))
140+
output.exception_type=e.__class__.__name__
139141
finally:
140142
ifsessionisNone:
141143
awaitrequest_session.close()
@@ -259,12 +261,14 @@ async def async_request_openai_completions(
259261
output.avg_decoded_tokens_per_iter=choice[
260262
"avg_decoded_tokens_per_iter"]
261263
else:
264+
print(f"HTTP Error{response.status}:{response}")
262265
output.error=response.reasonor""
263266
output.success=False
264-
exceptException:
267+
exceptExceptionase:
265268
output.success=False
266269
exc_info=sys.exc_info()
267270
output.error="".join(traceback.format_exception(*exc_info))
271+
output.exception_type=e.__class__.__name__
268272
finally:
269273
ifsessionisNone:
270274
awaitrequest_session.close()
@@ -392,12 +396,14 @@ async def async_request_openai_chat_completions(
392396
"avg_decoded_tokens_per_iter"]
393397

394398
else:
399+
# TODO: Need to store the status code to debug and report
395400
output.error=response.reasonor""
396401
output.success=False
397-
exceptException:
402+
exceptExceptionase:
398403
output.success=False
399404
exc_info=sys.exc_info()
400405
output.error="".join(traceback.format_exception(*exc_info))
406+
output.exception_type=e.__class__.__name__
401407
finally:
402408
ifsessionisNone:
403409
awaitrequest_session.close()

‎tensorrt_llm/serve/scripts/benchmark_serving.py‎

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,12 @@ def calculate_metrics(
144144
e2els:list[float]= []
145145
tput_user:list[float]= []
146146
latest_avg_decoded_tokens_per_iter:float=0.0
147+
error_counts:dict[str,int]= {}
147148
foriinrange(len(outputs)):
149+
ifoutputs[i].exception_type:
150+
exception_type=outputs[i].exception_type
151+
error_counts[exception_type]=error_counts.get(exception_type,
152+
0)+1
148153
ifoutputs[i].success:
149154
output_len=outputs[i].output_tokens
150155
ifnotoutput_len:
@@ -179,6 +184,11 @@ def calculate_metrics(
179184
else:
180185
actual_output_lens.append(0)
181186

187+
total_error_count=sum(error_counts.values())
188+
forexception_type,countinerror_counts.items():
189+
print(f"Error type:{exception_type}, Count:{count} requests")
190+
print(f"Total failed requests:{total_error_count}")
191+
182192
ifgoodput_config_dict:
183193
valid_metrics= []
184194
slo_values= []
@@ -336,7 +346,8 @@ async def benchmark(
336346
print(f"Burstiness factor:{burstiness} ({distribution})")
337347
print(f"Maximum request concurrency:{max_concurrency}")
338348

339-
pbar=Noneifdisable_tqdmelsetqdm(total=len(input_requests))
349+
pbar=Noneifdisable_tqdmelsetqdm(total=len(input_requests),
350+
desc="Benchmarking")
340351

341352
# This can be used once the minimum Python version is 3.10 or higher,
342353
# and it will simplify the code in limited_request_func.
@@ -433,7 +444,10 @@ async def limited_request_func(request_func_input, streaming, pbar,
433444
)
434445

435446
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ',n=50,c='='))
447+
print("{:<40} {:<10}".format("Total requests:",len(outputs)))
436448
print("{:<40} {:<10}".format("Successful requests:",metrics.completed))
449+
print("{:<40} {:<10}".format("Failed requests:",
450+
len(outputs)-metrics.completed))
437451
print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
438452
benchmark_duration))
439453
print("{:<40} {:<10}".format("Total input tokens:",metrics.total_input))
@@ -455,6 +469,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
455469
ifmetrics.avg_decoded_tokens_per_iter>0.0:
456470
print("{:<40} {:<10.2f}".format("Avg Decoded Tokens per Iter:",
457471
metrics.avg_decoded_tokens_per_iter))
472+
iflen(outputs)-metrics.completed>0:
473+
print(
474+
f"=======================!FAILED REQUESTS!=======================")
475+
print(f"Total failed requests:{len(outputs)-metrics.completed}")
476+
print(
477+
f"=====================!CHECK LOG FOR ERRORS!====================")
458478

459479
result= {
460480
"duration":benchmark_duration,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp