Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd34d342

Browse files
add some codes
1 parent5f9a927 commitd34d342

File tree

9 files changed

+288
-127
lines changed

9 files changed

+288
-127
lines changed

‎python/dnlp/config/re_config.py‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# -*- coding:utf-8 -*-
22

33
classRECNNConfig(object):
4-
def__init__(self,window_size:tuple=(3,4,),filter_size:int=150,learning_rate:float=0.1,dropout_rate:float=0.5,
5-
lam:float=1e-4,word_embed_size:int=300,position_embed_size:int=50,batch_length:int=85,
6-
batch_size:int=50):
4+
def__init__(self,window_size:tuple=(3,4,),filter_size:int=150,learning_rate:float=0.05,dropout_rate:float=0.5,
5+
lam:float=5e-4,word_embed_size:int=300,position_embed_size:int=50,batch_length:int=85,
6+
batch_size:int=20):
77
self.__window_size=window_size
88
self.__filter_size=filter_size
99
self.__learning_rate=learning_rate

‎python/dnlp/core/re_cnn.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def max_pooling(self, x, window_size):
107107
returntf.nn.max_pool(x,ksize=[1,self.batch_length-window_size+1,1,1],
108108
strides=[1,1,1,1],padding='VALID')
109109

110-
deffit(self,epochs=40,interval=5):
110+
deffit(self,epochs=50,interval=5):
111111
withtf.Session()assess:
112112
tf.global_variables_initializer().run()
113113
sess.graph.finalize()

‎python/dnlp/core/word2vec.py‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
importtensorflowastf
66

77

8-
classSkipGram(object):
8+
classWord2Vec(object):
99
def__init__(self,src_filename:str,dest_filename:str,window_size:int=4,mode='skip_gram',batch_size:int=128,
1010
embed_size:int=100,num_sampled:int=64,steps:int=50000):
1111
withopen(src_filename,'rb')asf:
@@ -34,7 +34,8 @@ def train(self):
3434

3535
embed=tf.nn.embedding_lookup(self.embeddings,train_inputs)
3636
ifself.mode=='cbow':
37-
embed=tf.reduce_sum(embed,1)
37+
# embed = tf.reduce_sum(embed, 1)
38+
embed=tf.reduce_mean(embed,1)
3839

3940
nce_weights=tf.Variable(
4041
tf.truncated_normal([self.vocab_size,self.embed_size],

‎python/dnlp/data_process/process_embedding_pretrain.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
classEmbeddingPertrainProcess(Preprocessor):
99
def__init__(self,base_folder:str,files:tuple,dict_path:str,skip_window:int,
1010
output_name:str,mode:str='character',algorithm='skip_gram'):
11-
Preprocessor.__init__(self,base_folder=base_folder,files=files,dict_path=dict_path)
11+
Preprocessor.__init__(self,base_folder=base_folder,dict_path=dict_path)
1212
self.skip_window=skip_window
1313
self.files=files
1414
self.output_name=output_name

‎python/dnlp/data_process/process_emr.py‎

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@
99

1010

1111
classProcessEMR(object):
12-
def__init__(self,base_folder:str,dict_path:str='',mode='train'):
12+
def__init__(self,base_folder:str,dict_path:str='',mode='train',directed=False):
1313
self.base_folder=base_folder
1414
self.data_folder=base_folder+'emr_paper/'
1515
self.relation_name_file=base_folder+'rel_names'
1616
self.relation_pair_file=base_folder+'rel_pairs'
17+
self.directed=directed
1718
withopen(self.relation_name_file,'rb')asf:
1819
self.category_name=pickle.load(f)
1920
# self.reversed_category_name = dict(zip(self.category_name.values(),self.category_name.keys()))
2021
self.mode=mode
21-
ifself.mode=='train':
22+
ifself.mode=='train':
2223
self.window=5
2324
else:
2425
self.window=100
2526
self.dict_path=dict_path
2627
self.files=self.get_files()
27-
self.annotations=self.read_annotations()
28+
self.annotations=self.read_annotations(directed)
2829
self.dictionary=self.read_dictionary()
2930
self.statistics()
3031
self.relation_categories= {'PartOf':'部位','PropertyOf':'性质','DegreeOf':'程度','QualityValue':'定性值',
@@ -40,8 +41,8 @@ def __init__(self, base_folder: str, dict_path: str = '', mode='train'):
4041
self.relation_category_labels[relation_category]=relation_category_index
4142
relation_category_index+=1
4243
print(len(self.relation_category_labels))
43-
withopen(self.base_folder+'relation_index.pickle','wb')asf:
44-
pickle.dump(self.relation_category_labels,f)
44+
withopen(self.base_folder+'relation_index.pickle','wb')asf:
45+
pickle.dump(self.relation_category_labels,f)
4546
self.two_categories=self.generate_re_two_training_data()
4647
self.multi_categories=self.generate_re_mutli_training_data()
4748
self.save_data()
@@ -89,13 +90,17 @@ def map_to_indices(self, words):
8990
returnlist(map(lambdaw:self.dictionary[w]ifwinself.dictionaryelseself.dictionary[UNK],words))
9091

9192
defsave_data(self):
92-
withopen(self.base_folder+self.mode+'_two.pickle','wb')asf:
93-
pickle.dump(self.two_categories,f)
94-
95-
withopen(self.base_folder+self.mode+'_multi.pickle','wb')asf:
96-
pickle.dump(self.multi_categories,f)
97-
93+
ifself.directed:
94+
two_path=self.base_folder+self.mode+'_two_directed.pickle'
95+
multi_path=self.base_folder+self.mode+'_multi_directed.pickle'
96+
else:
97+
two_path=self.base_folder+self.mode+'_two.pickle'
98+
multi_path=self.base_folder+self.mode+'_multi.pickle'
99+
withopen(two_path,'wb')asf:
100+
pickle.dump(self.two_categories,f)
98101

102+
withopen(multi_path,'wb')asf:
103+
pickle.dump(self.multi_categories,f)
99104

100105
defread_dictionary(self,reverse=False):
101106
dictionary= {}
@@ -116,7 +121,7 @@ def get_files(self):
116121
files.add(os.path.splitext(l)[0])
117122
returnfiles
118123

119-
defread_annotations(self):
124+
defread_annotations(self,directed=False):
120125
all_sentences= []
121126
forfileinself.files:
122127
filename=self.data_folder+self.mode+'/'+file
@@ -150,7 +155,7 @@ def read_annotations(self):
150155
print('fuck your world')
151156
sentence['new_entities'][entity['index']]=entity
152157

153-
data=self.read_relation_in_single_file(filename+'.ann',sentence_dict)
158+
data=self.read_relation_in_single_file(filename+'.ann',sentence_dict,directed)
154159
all_sentences.extend(data.values())
155160
returnall_sentences
156161

‎python/dnlp/utils/evaluation.py‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,11 @@ def evaluate_ner(model, data_path:str):
122122
r=c_count/r_count
123123
f1=2*p*r/(p+r)
124124
print(p,r,f1)
125-
returnp,r,f1
125+
126+
returnfmt(p*100),fmt(r*100),fmt(f1*100)
126127
# average = 'macro'
127128
# print(precision_score(all_labels_true, all_labels_predict, average=average))
128129
# print(recall_score(all_labels_true, all_labels_predict, average=average))
130+
131+
deffmt(f):
132+
return'{0:.2f}'.format(f)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp