@@ -144,7 +144,12 @@ def calculate_metrics(
144144e2els :list [float ]= []
145145tput_user :list [float ]= []
146146latest_avg_decoded_tokens_per_iter :float = 0.0
147+ error_counts :dict [str ,int ]= {}
147148for i in range (len (outputs )):
149+ if outputs [i ].exception_type :
150+ exception_type = outputs [i ].exception_type
151+ error_counts [exception_type ]= error_counts .get (exception_type ,
152+ 0 )+ 1
148153if outputs [i ].success :
149154output_len = outputs [i ].output_tokens
150155if not output_len :
@@ -179,6 +184,11 @@ def calculate_metrics(
179184else :
180185actual_output_lens .append (0 )
181186
187+ total_error_count = sum (error_counts .values ())
188+ for exception_type ,count in error_counts .items ():
189+ print (f"Error type:{ exception_type } , Count:{ count } requests" )
190+ print (f"Total failed requests:{ total_error_count } " )
191+
182192if goodput_config_dict :
183193valid_metrics = []
184194slo_values = []
@@ -336,7 +346,8 @@ async def benchmark(
336346print (f"Burstiness factor:{ burstiness } ({ distribution } )" )
337347print (f"Maximum request concurrency:{ max_concurrency } " )
338348
339- pbar = None if disable_tqdm else tqdm (total = len (input_requests ))
349+ pbar = None if disable_tqdm else tqdm (total = len (input_requests ),
350+ desc = "Benchmarking" )
340351
341352# This can be used once the minimum Python version is 3.10 or higher,
342353# and it will simplify the code in limited_request_func.
@@ -433,7 +444,10 @@ async def limited_request_func(request_func_input, streaming, pbar,
433444 )
434445
435446print ("{s:{c}^{n}}" .format (s = ' Serving Benchmark Result ' ,n = 50 ,c = '=' ))
447+ print ("{:<40} {:<10}" .format ("Total requests:" ,len (outputs )))
436448print ("{:<40} {:<10}" .format ("Successful requests:" ,metrics .completed ))
449+ print ("{:<40} {:<10}" .format ("Failed requests:" ,
450+ len (outputs )- metrics .completed ))
437451print ("{:<40} {:<10.2f}" .format ("Benchmark duration (s):" ,
438452benchmark_duration ))
439453print ("{:<40} {:<10}" .format ("Total input tokens:" ,metrics .total_input ))
@@ -455,6 +469,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
455469if metrics .avg_decoded_tokens_per_iter > 0.0 :
456470print ("{:<40} {:<10.2f}" .format ("Avg Decoded Tokens per Iter:" ,
457471metrics .avg_decoded_tokens_per_iter ))
472+ if len (outputs )- metrics .completed > 0 :
473+ print (
474+ f"=======================!FAILED REQUESTS!=======================" )
475+ print (f"Total failed requests:{ len (outputs )- metrics .completed } " )
476+ print (
477+ f"=====================!CHECK LOG FOR ERRORS!====================" )
458478
459479result = {
460480"duration" :benchmark_duration ,