33from shutil import copyfile
44from dnlp .data_process .process_cws import ProcessCWS
55
6+ def init ():
7+ model_path = '../dnlp/models/'
8+ if not os .path .exists (model_path ):
9+ os .makedirs (model_path )
610
711def copy ():
812src_folder = '../../datasets/'
913dst_base_folder = '../dnlp/data/cws/'
1014if not os .path .exists (dst_base_folder ):
1115os .makedirs (dst_base_folder )
12- files = ['pku_training.utf8' ,'pku_test.utf8' ]
16+ files = ['pku_training.utf8' ,'pku_test.utf8' , 'msr_training.utf8' , 'msr_test.utf8' ]
1317for f in files :
1418copyfile (src_folder + f ,dst_base_folder + f )
1519
@@ -19,9 +23,13 @@ def build_cws_datasets():
1923if not os .path .exists (base_folder ):
2024os .makedirs (base_folder )
2125ProcessCWS (files = ('pku_training.utf8' ,),base_folder = base_folder ,name = 'pku_training' )
22- dict_path = base_folder + 'pku_training_dict.utf8'
23- ProcessCWS (files = ('pku_test.utf8' ,),dict_path = dict_path ,base_folder = base_folder ,name = 'pku_test' ,mode = 'test' )
26+ ProcessCWS (files = ('msr_training.utf8' ,),base_folder = base_folder ,name = 'msr_training' )
27+ pku_dict_path = base_folder + 'pku_training_dict.utf8'
28+ ProcessCWS (files = ('pku_test.utf8' ,),dict_path = pku_dict_path ,base_folder = base_folder ,name = 'pku_test' ,mode = 'test' )
29+ msr_dict_path = base_folder + 'msr_training_dict.utf8'
30+ ProcessCWS (files = ('msr_test.utf8' ,),dict_path = msr_dict_path ,base_folder = base_folder ,name = 'msr_test' ,mode = 'test' )
2431
2532if __name__ == '__main__' :
33+ init ()
2634copy ()
2735build_cws_datasets ()