99
1010
1111class ProcessEMR (object ):
12- def __init__ (self ,base_folder :str ,dict_path :str = '' ,mode = 'train' ):
12+ def __init__ (self ,base_folder :str ,dict_path :str = '' ,mode = 'train' , directed = False ):
1313self .base_folder = base_folder
1414self .data_folder = base_folder + 'emr_paper/'
1515self .relation_name_file = base_folder + 'rel_names'
1616self .relation_pair_file = base_folder + 'rel_pairs'
17+ self .directed = directed
1718with open (self .relation_name_file ,'rb' )as f :
1819self .category_name = pickle .load (f )
1920# self.reversed_category_name = dict(zip(self.category_name.values(),self.category_name.keys()))
2021self .mode = mode
21- if self .mode == 'train' :
22+ if self .mode == 'train' :
2223self .window = 5
2324else :
2425self .window = 100
2526self .dict_path = dict_path
2627self .files = self .get_files ()
27- self .annotations = self .read_annotations ()
28+ self .annotations = self .read_annotations (directed )
2829self .dictionary = self .read_dictionary ()
2930self .statistics ()
3031self .relation_categories = {'PartOf' :'部位' ,'PropertyOf' :'性质' ,'DegreeOf' :'程度' ,'QualityValue' :'定性值' ,
@@ -40,8 +41,8 @@ def __init__(self, base_folder: str, dict_path: str = '', mode='train'):
4041self .relation_category_labels [relation_category ]= relation_category_index
4142relation_category_index += 1
4243print (len (self .relation_category_labels ))
43- with open (self .base_folder + 'relation_index.pickle' ,'wb' )as f :
44- pickle .dump (self .relation_category_labels ,f )
44+ with open (self .base_folder + 'relation_index.pickle' ,'wb' )as f :
45+ pickle .dump (self .relation_category_labels ,f )
4546self .two_categories = self .generate_re_two_training_data ()
4647self .multi_categories = self .generate_re_mutli_training_data ()
4748self .save_data ()
@@ -89,13 +90,17 @@ def map_to_indices(self, words):
8990return list (map (lambda w :self .dictionary [w ]if w in self .dictionary else self .dictionary [UNK ],words ))
9091
9192def save_data (self ):
92- with open (self .base_folder + self .mode + '_two.pickle' ,'wb' )as f :
93- pickle .dump (self .two_categories ,f )
94-
95- with open (self .base_folder + self .mode + '_multi.pickle' ,'wb' )as f :
96- pickle .dump (self .multi_categories ,f )
97-
93+ if self .directed :
94+ two_path = self .base_folder + self .mode + '_two_directed.pickle'
95+ multi_path = self .base_folder + self .mode + '_multi_directed.pickle'
96+ else :
97+ two_path = self .base_folder + self .mode + '_two.pickle'
98+ multi_path = self .base_folder + self .mode + '_multi.pickle'
99+ with open (two_path ,'wb' )as f :
100+ pickle .dump (self .two_categories ,f )
98101
102+ with open (multi_path ,'wb' )as f :
103+ pickle .dump (self .multi_categories ,f )
99104
100105def read_dictionary (self ,reverse = False ):
101106dictionary = {}
@@ -116,7 +121,7 @@ def get_files(self):
116121files .add (os .path .splitext (l )[0 ])
117122return files
118123
119- def read_annotations (self ):
124+ def read_annotations (self , directed = False ):
120125all_sentences = []
121126for file in self .files :
122127filename = self .data_folder + self .mode + '/' + file
@@ -150,7 +155,7 @@ def read_annotations(self):
150155print ('fuck your world' )
151156sentence ['new_entities' ][entity ['index' ]]= entity
152157
153- data = self .read_relation_in_single_file (filename + '.ann' ,sentence_dict )
158+ data = self .read_relation_in_single_file (filename + '.ann' ,sentence_dict , directed )
154159all_sentences .extend (data .values ())
155160return all_sentences
156161