1111import argparse
1212from torch .utils .data import Dataset ,DataLoader
1313import json
14+
1415torch .backends .cudnn .benchmark = True
1516# https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
16- # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
17+ # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
1718# If you check it using the profile tool, the cnn method such as winograd, fft, etc. is used for the first iteration and the best operation is selected for the device.
1819
1920
20-
2121MODEL_LIST = {
22-
23- models .mnasnet :models .mnasnet .__all__ [1 :],
22+ models .mnasnet :models .mnasnet .__all__ [1 :],
2423models .resnet :models .resnet .__all__ [1 :],
2524models .densenet :models .densenet .__all__ [1 :],
2625models .squeezenet :models .squeezenet .__all__ [1 :],
2726models .vgg :models .vgg .__all__ [1 :],
28- models .mobilenet :models .mobilenet .__all__ [1 :],
29- models .shufflenetv2 :models .shufflenetv2 .__all__ [1 :]
27+ models .mobilenet :models .mobilenet .mv2_all [1 :],
28+ models .mobilenet :models .mobilenet .mv3_all [1 :],
29+ models .shufflenetv2 :models .shufflenetv2 .__all__ [1 :],
3030}
3131
32- precisions = ["float" ,"half" ,' double' ]
32+ precisions = ["float" ,"half" ," double" ]
3333# For post-voltaic architectures, there is a possibility to use tensor-core at half precision.
3434# Due to the gradient overflow problem, apex is recommended for practical use.
35- device_name = str (torch .cuda .get_device_name (0 ))
35+ device_name = str (torch .cuda .get_device_name (0 ))
3636# Training settings
37- parser = argparse .ArgumentParser (description = 'PyTorch Benchmarking' )
38- parser .add_argument ('--WARM_UP' ,'-w' ,type = int ,default = 5 ,required = False ,help = "Num of warm up" )
39- parser .add_argument ('--NUM_TEST' ,'-n' ,type = int ,default = 50 ,required = False ,help = "Num of Test" )
40- parser .add_argument ('--BATCH_SIZE' ,'-b' ,type = int ,default = 12 ,required = False ,help = 'Num of batch size' )
41- parser .add_argument ('--NUM_CLASSES' ,'-c' ,type = int ,default = 1000 ,required = False ,help = 'Num of class' )
42- parser .add_argument ('--NUM_GPU' ,'-g' ,type = int ,default = 1 ,required = False ,help = 'Num of gpus' )
43- parser .add_argument ('--folder' ,'-f' ,type = str ,default = 'result' ,required = False ,help = 'folder to save results' )
37+ parser = argparse .ArgumentParser (description = "PyTorch Benchmarking" )
38+ parser .add_argument ("--WARM_UP" ,"-w" ,type = int ,default = 5 ,required = False ,help = "Num of warm up" )
39+ parser .add_argument ("--NUM_TEST" ,"-n" ,type = int ,default = 50 ,required = False ,help = "Num of Test" )
40+ parser .add_argument (
41+ "--BATCH_SIZE" ,"-b" ,type = int ,default = 12 ,required = False ,help = "Num of batch size"
42+ )
43+ parser .add_argument (
44+ "--NUM_CLASSES" ,"-c" ,type = int ,default = 1000 ,required = False ,help = "Num of class"
45+ )
46+ parser .add_argument ("--NUM_GPU" ,"-g" ,type = int ,default = 1 ,required = False ,help = "Num of gpus" )
47+ parser .add_argument (
48+ "--folder" ,"-f" ,type = str ,default = "result" ,required = False ,help = "folder to save results"
49+ )
4450args = parser .parse_args ()
45- args .BATCH_SIZE *= args .NUM_GPU
46- class RandomDataset (Dataset ):
51+ args .BATCH_SIZE *= args .NUM_GPU
4752
48- def __init__ (self ,length ):
53+
54+ class RandomDataset (Dataset ):
55+ def __init__ (self ,length ):
4956self .len = length
50- self .data = torch .randn (3 ,224 ,224 ,length )
57+ self .data = torch .randn (3 ,224 ,224 ,length )
5158
5259def __getitem__ (self ,index ):
53- return self .data [:,:,:, index ]
60+ return self .data [:, :, :, index ]
5461
5562def __len__ (self ):
5663return self .len
5764
58- rand_loader = DataLoader (dataset = RandomDataset (args .BATCH_SIZE * (args .WARM_UP + args .NUM_TEST )),
59- batch_size = args .BATCH_SIZE ,shuffle = False ,num_workers = 8 )
60- def train (precision = 'single' ):
65+
66+ rand_loader = DataLoader (
67+ dataset = RandomDataset (args .BATCH_SIZE * (args .WARM_UP + args .NUM_TEST )),
68+ batch_size = args .BATCH_SIZE ,
69+ shuffle = False ,
70+ num_workers = 8 ,
71+ )
72+
73+
74+ def train (precision = "single" ):
6175"""use fake image for training speed test"""
6276target = torch .LongTensor (args .BATCH_SIZE ).random_ (args .NUM_CLASSES ).cuda ()
6377criterion = nn .CrossEntropyLoss ()
@@ -66,108 +80,119 @@ def train(precision='single'):
6680for model_name in MODEL_LIST [model_type ]:
6781model = getattr (model_type ,model_name )(pretrained = False )
6882if args .NUM_GPU > 1 :
69- model = nn .DataParallel (model ,device_ids = range (args .NUM_GPU ))
70- model = getattr (model ,precision )()
71- model = model .to (' cuda' )
83+ model = nn .DataParallel (model ,device_ids = range (args .NUM_GPU ))
84+ model = getattr (model ,precision )()
85+ model = model .to (" cuda" )
7286durations = []
73- print (f' Benchmarking Training{ precision } precision type{ model_name } ' )
74- for step ,img in enumerate (rand_loader ):
75- img = getattr (img ,precision )()
87+ print (f" Benchmarking Training{ precision } precision type{ model_name } " )
88+ for step ,img in enumerate (rand_loader ):
89+ img = getattr (img ,precision )()
7690torch .cuda .synchronize ()
7791start = time .time ()
7892model .zero_grad ()
79- prediction = model (img .to (' cuda' ))
93+ prediction = model (img .to (" cuda" ))
8094loss = criterion (prediction ,target )
8195loss .backward ()
8296torch .cuda .synchronize ()
8397end = time .time ()
8498if step >= args .WARM_UP :
85- durations .append ((end - start )* 1000 )
86- print (f' { model_name } model average train time :{ sum (durations )/ len (durations )} ms' )
99+ durations .append ((end - start )* 1000 )
100+ print (f" { model_name } model average train time :{ sum (durations )/ len (durations )} ms" )
87101del model
88102benchmark [model_name ]= durations
89103return benchmark
90104
91- def inference (precision = 'float' ):
105+
106+ def inference (precision = "float" ):
92107benchmark = {}
93108with torch .no_grad ():
94109for model_type in MODEL_LIST .keys ():
95110for model_name in MODEL_LIST [model_type ]:
96111model = getattr (model_type ,model_name )(pretrained = False )
97112if args .NUM_GPU > 1 :
98- model = nn .DataParallel (model ,device_ids = range (args .NUM_GPU ))
99- model = getattr (model ,precision )()
100- model = model .to (' cuda' )
113+ model = nn .DataParallel (model ,device_ids = range (args .NUM_GPU ))
114+ model = getattr (model ,precision )()
115+ model = model .to (" cuda" )
101116model .eval ()
102117durations = []
103- print (f' Benchmarking Inference{ precision } precision type{ model_name } ' )
104- for step ,img in enumerate (rand_loader ):
105- img = getattr (img ,precision )()
118+ print (f" Benchmarking Inference{ precision } precision type{ model_name } " )
119+ for step ,img in enumerate (rand_loader ):
120+ img = getattr (img ,precision )()
106121torch .cuda .synchronize ()
107122start = time .time ()
108- model (img .to (' cuda' ))
123+ model (img .to (" cuda" ))
109124torch .cuda .synchronize ()
110125end = time .time ()
111126if step >= args .WARM_UP :
112- durations .append ((end - start )* 1000 )
113- print (f'{ model_name } model average inference time :{ sum (durations )/ len (durations )} ms' )
127+ durations .append ((end - start )* 1000 )
128+ print (
129+ f"{ model_name } model average inference time :{ sum (durations )/ len (durations )} ms"
130+ )
114131del model
115132benchmark [model_name ]= durations
116133return benchmark
117134
135+
118136f"{ platform .uname ()} \n { psutil .cpu_freq ()} \n cpu_count:{ psutil .cpu_count ()} \n memory_available:{ psutil .virtual_memory ().available } "
119137
120138
121- if __name__ == ' __main__' :
122- folder_name = args .folder
123-
124- device_name = f"{ device_name } _{ args .NUM_GPU } _gpus_"
125- system_configs = f"{ platform .uname ()} \n \
139+ if __name__ == " __main__" :
140+ folder_name = args .folder
141+
142+ device_name = f"{ device_name } _{ args .NUM_GPU } _gpus_"
143+ system_configs = f"{ platform .uname ()} \n \
126144{ psutil .cpu_freq ()} \n \
127145 cpu_count:{ psutil .cpu_count ()} \n \
128146 memory_available:{ psutil .virtual_memory ().available } "
129- gpu_configs = [torch .cuda .device_count (),torch .version .cuda ,torch .backends .cudnn .version (),torch .cuda .get_device_name (0 )]
130- gpu_configs = list (map (str ,gpu_configs ))
131- temp = ['Number of GPUs on current device : ' ,'CUDA Version : ' ,'Cudnn Version : ' ,'Device Name : ' ]
147+ gpu_configs = [
148+ torch .cuda .device_count (),
149+ torch .version .cuda ,
150+ torch .backends .cudnn .version (),
151+ torch .cuda .get_device_name (0 ),
152+ ]
153+ gpu_configs = list (map (str ,gpu_configs ))
154+ temp = [
155+ "Number of GPUs on current device : " ,
156+ "CUDA Version : " ,
157+ "Cudnn Version : " ,
158+ "Device Name : " ,
159+ ]
132160
133161os .makedirs (folder_name ,exist_ok = True )
134- with open (os .path .join (folder_name ,' config.json' ),'w' )as f :
162+ with open (os .path .join (folder_name ," config.json" ),"w" )as f :
135163json .dump (vars (args ),f ,indent = 2 )
136164now = datetime .datetime .now ()
137-
138- start_time = now .strftime ('%Y/%m/%d %H:%M:%S' )
139-
140- print (f'benchmark start :{ start_time } ' )
141165
142- for idx ,value in enumerate (zip (temp ,gpu_configs )):
143- gpu_configs [idx ]= '' .join (value )
166+ start_time = now .strftime ("%Y/%m/%d %H:%M:%S" )
167+
168+ print (f"benchmark start :{ start_time } " )
169+
170+ for idx ,value in enumerate (zip (temp ,gpu_configs )):
171+ gpu_configs [idx ]= "" .join (value )
144172print (gpu_configs [idx ])
145173print (system_configs )
146174
147- with open (os .path .join (folder_name ,"system_info.txt" ),"w" )as f :
148- f .writelines (f' benchmark start :{ start_time } \n ' )
149- f .writelines (' system_configs\n \n ' )
175+ with open (os .path .join (folder_name ,"system_info.txt" ),"w" )as f :
176+ f .writelines (f" benchmark start :{ start_time } \n " )
177+ f .writelines (" system_configs\n \n " )
150178f .writelines (system_configs )
151- f .writelines (' \n gpu_configs\n \n ' )
152- f .writelines (s + ' \n ' for s in gpu_configs )
179+ f .writelines (" \n gpu_configs\n \n " )
180+ f .writelines (s + " \n " for s in gpu_configs )
153181
154-
155182for precision in precisions :
156- train_result = train (precision )
183+ train_result = train (precision )
157184train_result_df = pd .DataFrame (train_result )
158- path = f' { folder_name } /{ device_name } _{ precision } _model_train_benchmark.csv'
185+ path = f" { folder_name } /{ device_name } _{ precision } _model_train_benchmark.csv"
159186train_result_df .to_csv (path ,index = False )
160187
161- inference_result = inference (precision )
188+ inference_result = inference (precision )
162189inference_result_df = pd .DataFrame (inference_result )
163- path = f' { folder_name } /{ device_name } _{ precision } _model_inference_benchmark.csv'
190+ path = f" { folder_name } /{ device_name } _{ precision } _model_inference_benchmark.csv"
164191inference_result_df .to_csv (path ,index = False )
165192
166193now = datetime .datetime .now ()
167194
168- end_time = now .strftime ('%Y/%m/%d %H:%M:%S' )
169- print (f'benchmark end :{ end_time } ' )
170- with open (os .path .join (folder_name ,"system_info.txt" ),"a" )as f :
171- f .writelines (f'benchmark end :{ end_time } \n ' )
172-
173-
195+ end_time = now .strftime ("%Y/%m/%d %H:%M:%S" )
196+ print (f"benchmark end :{ end_time } " )
197+ with open (os .path .join (folder_name ,"system_info.txt" ),"a" )as f :
198+ f .writelines (f"benchmark end :{ end_time } \n " )