Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf810c57

Browse files
committed
Fixed issueryujaehun#18
- cope with mobilenet v3 related error- issue # 18- edit code format using black
1 parente3df158 commitf810c57

File tree

1 file changed

+98
-73
lines changed

1 file changed

+98
-73
lines changed

‎benchmark_models.py‎

Lines changed: 98 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,53 +11,67 @@
1111
importargparse
1212
fromtorch.utils.dataimportDataset,DataLoader
1313
importjson
14+
1415
torch.backends.cudnn.benchmark=True
1516
# https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
16-
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
17+
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
1718
# If you check it using the profile tool, the cnn method such as winograd, fft, etc. is used for the first iteration and the best operation is selected for the device.
1819

1920

20-
2121
MODEL_LIST= {
22-
23-
models.mnasnet:models.mnasnet.__all__[1:],
22+
models.mnasnet:models.mnasnet.__all__[1:],
2423
models.resnet:models.resnet.__all__[1:],
2524
models.densenet:models.densenet.__all__[1:],
2625
models.squeezenet:models.squeezenet.__all__[1:],
2726
models.vgg:models.vgg.__all__[1:],
28-
models.mobilenet:models.mobilenet.__all__[1:],
29-
models.shufflenetv2:models.shufflenetv2.__all__[1:]
27+
models.mobilenet:models.mobilenet.mv2_all[1:],
28+
models.mobilenet:models.mobilenet.mv3_all[1:],
29+
models.shufflenetv2:models.shufflenetv2.__all__[1:],
3030
}
3131

32-
precisions=["float","half",'double']
32+
precisions=["float","half","double"]
3333
# For post-voltaic architectures, there is a possibility to use tensor-core at half precision.
3434
# Due to the gradient overflow problem, apex is recommended for practical use.
35-
device_name=str(torch.cuda.get_device_name(0))
35+
device_name=str(torch.cuda.get_device_name(0))
3636
# Training settings
37-
parser=argparse.ArgumentParser(description='PyTorch Benchmarking')
38-
parser.add_argument('--WARM_UP','-w',type=int,default=5,required=False,help="Num of warm up")
39-
parser.add_argument('--NUM_TEST','-n',type=int,default=50,required=False,help="Num of Test")
40-
parser.add_argument('--BATCH_SIZE','-b',type=int,default=12,required=False,help='Num of batch size')
41-
parser.add_argument('--NUM_CLASSES','-c',type=int,default=1000,required=False,help='Num of class')
42-
parser.add_argument('--NUM_GPU','-g',type=int,default=1,required=False,help='Num of gpus')
43-
parser.add_argument('--folder','-f',type=str,default='result',required=False,help='folder to save results')
37+
parser=argparse.ArgumentParser(description="PyTorch Benchmarking")
38+
parser.add_argument("--WARM_UP","-w",type=int,default=5,required=False,help="Num of warm up")
39+
parser.add_argument("--NUM_TEST","-n",type=int,default=50,required=False,help="Num of Test")
40+
parser.add_argument(
41+
"--BATCH_SIZE","-b",type=int,default=12,required=False,help="Num of batch size"
42+
)
43+
parser.add_argument(
44+
"--NUM_CLASSES","-c",type=int,default=1000,required=False,help="Num of class"
45+
)
46+
parser.add_argument("--NUM_GPU","-g",type=int,default=1,required=False,help="Num of gpus")
47+
parser.add_argument(
48+
"--folder","-f",type=str,default="result",required=False,help="folder to save results"
49+
)
4450
args=parser.parse_args()
45-
args.BATCH_SIZE*=args.NUM_GPU
46-
classRandomDataset(Dataset):
51+
args.BATCH_SIZE*=args.NUM_GPU
4752

48-
def__init__(self,length):
53+
54+
classRandomDataset(Dataset):
55+
def__init__(self,length):
4956
self.len=length
50-
self.data=torch.randn(3,224,224,length)
57+
self.data=torch.randn(3,224,224,length)
5158

5259
def__getitem__(self,index):
53-
returnself.data[:,:,:,index]
60+
returnself.data[:, :, :,index]
5461

5562
def__len__(self):
5663
returnself.len
5764

58-
rand_loader=DataLoader(dataset=RandomDataset(args.BATCH_SIZE*(args.WARM_UP+args.NUM_TEST)),
59-
batch_size=args.BATCH_SIZE,shuffle=False,num_workers=8)
60-
deftrain(precision='single'):
65+
66+
rand_loader=DataLoader(
67+
dataset=RandomDataset(args.BATCH_SIZE* (args.WARM_UP+args.NUM_TEST)),
68+
batch_size=args.BATCH_SIZE,
69+
shuffle=False,
70+
num_workers=8,
71+
)
72+
73+
74+
deftrain(precision="single"):
6175
"""use fake image for training speed test"""
6276
target=torch.LongTensor(args.BATCH_SIZE).random_(args.NUM_CLASSES).cuda()
6377
criterion=nn.CrossEntropyLoss()
@@ -66,108 +80,119 @@ def train(precision='single'):
6680
formodel_nameinMODEL_LIST[model_type]:
6781
model=getattr(model_type,model_name)(pretrained=False)
6882
ifargs.NUM_GPU>1:
69-
model=nn.DataParallel(model,device_ids=range(args.NUM_GPU))
70-
model=getattr(model,precision)()
71-
model=model.to('cuda')
83+
model=nn.DataParallel(model,device_ids=range(args.NUM_GPU))
84+
model=getattr(model,precision)()
85+
model=model.to("cuda")
7286
durations= []
73-
print(f'Benchmarking Training{precision} precision type{model_name}')
74-
forstep,imginenumerate(rand_loader):
75-
img=getattr(img,precision)()
87+
print(f"Benchmarking Training{precision} precision type{model_name}")
88+
forstep,imginenumerate(rand_loader):
89+
img=getattr(img,precision)()
7690
torch.cuda.synchronize()
7791
start=time.time()
7892
model.zero_grad()
79-
prediction=model(img.to('cuda'))
93+
prediction=model(img.to("cuda"))
8094
loss=criterion(prediction,target)
8195
loss.backward()
8296
torch.cuda.synchronize()
8397
end=time.time()
8498
ifstep>=args.WARM_UP:
85-
durations.append((end-start)*1000)
86-
print(f'{model_name} model average train time :{sum(durations)/len(durations)}ms')
99+
durations.append((end-start)*1000)
100+
print(f"{model_name} model average train time :{sum(durations)/len(durations)}ms")
87101
delmodel
88102
benchmark[model_name]=durations
89103
returnbenchmark
90104

91-
definference(precision='float'):
105+
106+
definference(precision="float"):
92107
benchmark= {}
93108
withtorch.no_grad():
94109
formodel_typeinMODEL_LIST.keys():
95110
formodel_nameinMODEL_LIST[model_type]:
96111
model=getattr(model_type,model_name)(pretrained=False)
97112
ifargs.NUM_GPU>1:
98-
model=nn.DataParallel(model,device_ids=range(args.NUM_GPU))
99-
model=getattr(model,precision)()
100-
model=model.to('cuda')
113+
model=nn.DataParallel(model,device_ids=range(args.NUM_GPU))
114+
model=getattr(model,precision)()
115+
model=model.to("cuda")
101116
model.eval()
102117
durations= []
103-
print(f'Benchmarking Inference{precision} precision type{model_name}')
104-
forstep,imginenumerate(rand_loader):
105-
img=getattr(img,precision)()
118+
print(f"Benchmarking Inference{precision} precision type{model_name}")
119+
forstep,imginenumerate(rand_loader):
120+
img=getattr(img,precision)()
106121
torch.cuda.synchronize()
107122
start=time.time()
108-
model(img.to('cuda'))
123+
model(img.to("cuda"))
109124
torch.cuda.synchronize()
110125
end=time.time()
111126
ifstep>=args.WARM_UP:
112-
durations.append((end-start)*1000)
113-
print(f'{model_name} model average inference time :{sum(durations)/len(durations)}ms')
127+
durations.append((end-start)*1000)
128+
print(
129+
f"{model_name} model average inference time :{sum(durations)/len(durations)}ms"
130+
)
114131
delmodel
115132
benchmark[model_name]=durations
116133
returnbenchmark
117134

135+
118136
f"{platform.uname()}\n{psutil.cpu_freq()}\ncpu_count:{psutil.cpu_count()}\nmemory_available:{psutil.virtual_memory().available}"
119137

120138

121-
if__name__=='__main__':
122-
folder_name=args.folder
123-
124-
device_name=f"{device_name}_{args.NUM_GPU}_gpus_"
125-
system_configs=f"{platform.uname()}\n\
139+
if__name__=="__main__":
140+
folder_name=args.folder
141+
142+
device_name=f"{device_name}_{args.NUM_GPU}_gpus_"
143+
system_configs=f"{platform.uname()}\n\
126144
{psutil.cpu_freq()}\n\
127145
cpu_count:{psutil.cpu_count()}\n\
128146
memory_available:{psutil.virtual_memory().available}"
129-
gpu_configs=[torch.cuda.device_count(),torch.version.cuda,torch.backends.cudnn.version(),torch.cuda.get_device_name(0)]
130-
gpu_configs=list(map(str,gpu_configs))
131-
temp=['Number of GPUs on current device : ','CUDA Version : ','Cudnn Version : ','Device Name : ']
147+
gpu_configs= [
148+
torch.cuda.device_count(),
149+
torch.version.cuda,
150+
torch.backends.cudnn.version(),
151+
torch.cuda.get_device_name(0),
152+
]
153+
gpu_configs=list(map(str,gpu_configs))
154+
temp= [
155+
"Number of GPUs on current device : ",
156+
"CUDA Version : ",
157+
"Cudnn Version : ",
158+
"Device Name : ",
159+
]
132160

133161
os.makedirs(folder_name,exist_ok=True)
134-
withopen(os.path.join(folder_name,'config.json'),'w')asf:
162+
withopen(os.path.join(folder_name,"config.json"),"w")asf:
135163
json.dump(vars(args),f,indent=2)
136164
now=datetime.datetime.now()
137-
138-
start_time=now.strftime('%Y/%m/%d %H:%M:%S')
139-
140-
print(f'benchmark start :{start_time}')
141165

142-
foridx,valueinenumerate(zip(temp,gpu_configs)):
143-
gpu_configs[idx]=''.join(value)
166+
start_time=now.strftime("%Y/%m/%d %H:%M:%S")
167+
168+
print(f"benchmark start :{start_time}")
169+
170+
foridx,valueinenumerate(zip(temp,gpu_configs)):
171+
gpu_configs[idx]="".join(value)
144172
print(gpu_configs[idx])
145173
print(system_configs)
146174

147-
withopen(os.path.join(folder_name,"system_info.txt"),"w")asf:
148-
f.writelines(f'benchmark start :{start_time}\n')
149-
f.writelines('system_configs\n\n')
175+
withopen(os.path.join(folder_name,"system_info.txt"),"w")asf:
176+
f.writelines(f"benchmark start :{start_time}\n")
177+
f.writelines("system_configs\n\n")
150178
f.writelines(system_configs)
151-
f.writelines('\ngpu_configs\n\n')
152-
f.writelines(s+'\n'forsingpu_configs)
179+
f.writelines("\ngpu_configs\n\n")
180+
f.writelines(s+"\n"forsingpu_configs)
153181

154-
155182
forprecisioninprecisions:
156-
train_result=train(precision)
183+
train_result=train(precision)
157184
train_result_df=pd.DataFrame(train_result)
158-
path=f'{folder_name}/{device_name}_{precision}_model_train_benchmark.csv'
185+
path=f"{folder_name}/{device_name}_{precision}_model_train_benchmark.csv"
159186
train_result_df.to_csv(path,index=False)
160187

161-
inference_result=inference(precision)
188+
inference_result=inference(precision)
162189
inference_result_df=pd.DataFrame(inference_result)
163-
path=f'{folder_name}/{device_name}_{precision}_model_inference_benchmark.csv'
190+
path=f"{folder_name}/{device_name}_{precision}_model_inference_benchmark.csv"
164191
inference_result_df.to_csv(path,index=False)
165192

166193
now=datetime.datetime.now()
167194

168-
end_time=now.strftime('%Y/%m/%d %H:%M:%S')
169-
print(f'benchmark end :{end_time}')
170-
withopen(os.path.join(folder_name,"system_info.txt"),"a")asf:
171-
f.writelines(f'benchmark end :{end_time}\n')
172-
173-
195+
end_time=now.strftime("%Y/%m/%d %H:%M:%S")
196+
print(f"benchmark end :{end_time}")
197+
withopen(os.path.join(folder_name,"system_info.txt"),"a")asf:
198+
f.writelines(f"benchmark end :{end_time}\n")

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp