Commit89889fb

authored

[https://nvbugs/5369366] [fix] Report failing requests (#7060)

Signed-off-by: Rashid Kaleem <4079439+arekay@users.noreply.github.com>

1 parent08a0e06 commit89889fbCopy full SHA for 89889fb

File tree

2 files changed

+30

-4

lines changed

tensorrt_llm/serve/scripts
- backend_request_func.py
- benchmark_serving.py

2 files changed

+30

-4

lines changed

`‎tensorrt_llm/serve/scripts/backend_request_func.py‎`

Lines changed: 9 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,7 @@ class RequestFuncOutput:`
`46`	`46`	`prompt_len:int=0`
`47`	`47`	`error:str=""`
`48`	`48`	`avg_decoded_tokens_per_iter:float=0.0# Average tokens decoded per iteration`
	`49`	`+exception_type:str=None# unset`
`49`	`50`
`50`	`51`
`51`	`52`	`asyncdefasync_request_trt_llm(`
`@@ -132,10 +133,11 @@ async def async_request_trt_llm(`
`132`	`133`	`else:`
`133`	`134`	`output.error=response.reasonor""`
`134`	`135`	`output.success=False`
`135`		`-exceptException:`
	`136`	`+exceptExceptionase:`
`136`	`137`	`output.success=False`
`137`	`138`	`exc_info=sys.exc_info()`
`138`	`139`	`output.error="".join(traceback.format_exception(*exc_info))`
	`140`	`+output.exception_type=e.__class__.__name__`
`139`	`141`	`finally:`
`140`	`142`	`ifsessionisNone:`
`141`	`143`	`awaitrequest_session.close()`
`@@ -259,12 +261,14 @@ async def async_request_openai_completions(`
`259`	`261`	`output.avg_decoded_tokens_per_iter=choice[`
`260`	`262`	`"avg_decoded_tokens_per_iter"]`
`261`	`263`	`else:`
	`264`	`+print(f"HTTP Error{response.status}:{response}")`
`262`	`265`	`output.error=response.reasonor""`
`263`	`266`	`output.success=False`
`264`		`-exceptException:`
	`267`	`+exceptExceptionase:`
`265`	`268`	`output.success=False`
`266`	`269`	`exc_info=sys.exc_info()`
`267`	`270`	`output.error="".join(traceback.format_exception(*exc_info))`
	`271`	`+output.exception_type=e.__class__.__name__`
`268`	`272`	`finally:`
`269`	`273`	`ifsessionisNone:`
`270`	`274`	`awaitrequest_session.close()`
`@@ -392,12 +396,14 @@ async def async_request_openai_chat_completions(`
`392`	`396`	`"avg_decoded_tokens_per_iter"]`
`393`	`397`
`394`	`398`	`else:`
	`399`	`+# TODO: Need to store the status code to debug and report`
`395`	`400`	`output.error=response.reasonor""`
`396`	`401`	`output.success=False`
`397`		`-exceptException:`
	`402`	`+exceptExceptionase:`
`398`	`403`	`output.success=False`
`399`	`404`	`exc_info=sys.exc_info()`
`400`	`405`	`output.error="".join(traceback.format_exception(*exc_info))`
	`406`	`+output.exception_type=e.__class__.__name__`
`401`	`407`	`finally:`
`402`	`408`	`ifsessionisNone:`
`403`	`409`	`awaitrequest_session.close()`

`‎tensorrt_llm/serve/scripts/benchmark_serving.py‎`

Lines changed: 21 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,12 @@ def calculate_metrics(`
`144`	`144`	`e2els:list[float]= []`
`145`	`145`	`tput_user:list[float]= []`
`146`	`146`	`latest_avg_decoded_tokens_per_iter:float=0.0`
	`147`	`+error_counts:dict[str,int]= {}`
`147`	`148`	`foriinrange(len(outputs)):`
	`149`	`+ifoutputs[i].exception_type:`
	`150`	`+exception_type=outputs[i].exception_type`
	`151`	`+error_counts[exception_type]=error_counts.get(exception_type,`
	`152`	`+0)+1`
`148`	`153`	`ifoutputs[i].success:`
`149`	`154`	`output_len=outputs[i].output_tokens`
`150`	`155`	`ifnotoutput_len:`
`@@ -179,6 +184,11 @@ def calculate_metrics(`
`179`	`184`	`else:`
`180`	`185`	`actual_output_lens.append(0)`
`181`	`186`
	`187`	`+total_error_count=sum(error_counts.values())`
	`188`	`+forexception_type,countinerror_counts.items():`
	`189`	`+print(f"Error type:{exception_type}, Count:{count} requests")`
	`190`	`+print(f"Total failed requests:{total_error_count}")`
	`191`	`+`
`182`	`192`	`ifgoodput_config_dict:`
`183`	`193`	`valid_metrics= []`
`184`	`194`	`slo_values= []`
`@@ -336,7 +346,8 @@ async def benchmark(`
`336`	`346`	`print(f"Burstiness factor:{burstiness} ({distribution})")`
`337`	`347`	`print(f"Maximum request concurrency:{max_concurrency}")`
`338`	`348`
`339`		`-pbar=Noneifdisable_tqdmelsetqdm(total=len(input_requests))`
	`349`	`+pbar=Noneifdisable_tqdmelsetqdm(total=len(input_requests),`
	`350`	`+desc="Benchmarking")`
`340`	`351`
`341`	`352`	`# This can be used once the minimum Python version is 3.10 or higher,`
`342`	`353`	`# and it will simplify the code in limited_request_func.`
`@@ -433,7 +444,10 @@ async def limited_request_func(request_func_input, streaming, pbar,`
`433`	`444`	`)`
`434`	`445`
`435`	`446`	`print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ',n=50,c='='))`
	`447`	`+print("{:<40} {:<10}".format("Total requests:",len(outputs)))`
`436`	`448`	`print("{:<40} {:<10}".format("Successful requests:",metrics.completed))`
	`449`	`+print("{:<40} {:<10}".format("Failed requests:",`
	`450`	`+len(outputs)-metrics.completed))`
`437`	`451`	`print("{:<40} {:<10.2f}".format("Benchmark duration (s):",`
`438`	`452`	`benchmark_duration))`
`439`	`453`	`print("{:<40} {:<10}".format("Total input tokens:",metrics.total_input))`
`@@ -455,6 +469,12 @@ async def limited_request_func(request_func_input, streaming, pbar,`
`455`	`469`	`ifmetrics.avg_decoded_tokens_per_iter>0.0:`
`456`	`470`	`print("{:<40} {:<10.2f}".format("Avg Decoded Tokens per Iter:",`
`457`	`471`	`metrics.avg_decoded_tokens_per_iter))`
	`472`	`+iflen(outputs)-metrics.completed>0:`
	`473`	`+print(`
	`474`	`+f"=======================!FAILED REQUESTS!=======================")`
	`475`	`+print(f"Total failed requests:{len(outputs)-metrics.completed}")`
	`476`	`+print(`
	`477`	`+f"=====================!CHECK LOG FOR ERRORS!====================")`
`458`	`478`
`459`	`479`	`result= {`
`460`	`480`	`"duration":benchmark_duration,`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit89889fb

File tree

2 files changed

2 files changed

`‎tensorrt_llm/serve/scripts/backend_request_func.py‎`

`‎tensorrt_llm/serve/scripts/benchmark_serving.py‎`

0 commit comments