Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1976ddd

Browse files
add max margin loss
1 parent90891ae commit1976ddd

File tree

11 files changed

+1131
-107
lines changed

11 files changed

+1131
-107
lines changed

‎python/dnlp/config/config.py‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
classDnnCrfConfig(object):
55
def__init__(self,*,skip_left:int=0,skip_right:int=2,embed_size:int=100,hidden_units:int=150,
6-
learning_rate:float=0.2,lam:float=1e-4,dropout_rate:float=0.2,batch_length:int=300,
7-
batch_size=20):
6+
learning_rate:float=0.005,lam:float=1e-4,dropout_rate:float=0.2,batch_length:int=300,
7+
batch_size:int=30,hinge_rate:float=0.2):
88
self.__skip_left=skip_left
99
self.__skip_right=skip_right
1010
self.__embed_size=embed_size
@@ -14,6 +14,7 @@ def __init__(self, *, skip_left: int = 0, skip_right: int = 2, embed_size: int =
1414
self.__dropout_rate=dropout_rate
1515
self.__batch_length=batch_length
1616
self.__batch_size=batch_size
17+
self.__hinge_rate=hinge_rate
1718

1819
@property
1920
defskip_left(self):
@@ -51,6 +52,9 @@ def batch_length(self):
5152
defbatch_size(self):
5253
returnself.__batch_size
5354

55+
@property
56+
defhinge_rate(self):
57+
returnself.__hinge_rate
5458

5559
classMMTNNConfig(object):
5660
def__init__(self,*,skip_left:int=2,skip_right:int=2,character_embed_size:int=50,

‎python/dnlp/config/re_config.py‎

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# -*- coding:utf-8 -*-
2+
3+
classRECNNConfig(object):
4+
def__init__(self,window_size:tuple=(3,4,),filter_size:int=150,learning_rate:float=0.01,dropout_rate:float=0.5,
5+
lam:float=1e-3,word_embed_size:int=300,position_embed_size:int=50,batch_length:int=85,
6+
batch_size:int=20):
7+
self.__window_size=window_size
8+
self.__filter_size=filter_size
9+
self.__learning_rate=learning_rate
10+
self.__dropout_rate=dropout_rate
11+
self.__lam=lam
12+
self.__word_embed_size=word_embed_size
13+
self.__position_embed_size=position_embed_size
14+
self.__batch_length=batch_length
15+
self.__batch_size=batch_size
16+
17+
@property
18+
defwindow_size(self):
19+
returnself.__window_size
20+
21+
@property
22+
deffilter_size(self):
23+
returnself.__filter_size
24+
25+
@property
26+
deflearning_rate(self):
27+
returnself.__learning_rate
28+
29+
@property
30+
defdropout_rate(self):
31+
returnself.__dropout_rate
32+
33+
@property
34+
deflam(self):
35+
returnself.__lam
36+
37+
@property
38+
defword_embed_size(self):
39+
returnself.__word_embed_size
40+
41+
@property
42+
defposition_embed_size(self):
43+
returnself.__position_embed_size
44+
45+
@property
46+
defbatch_length(self):
47+
returnself.__batch_length
48+
49+
@property
50+
defbatch_size(self):
51+
returnself.__batch_size

‎python/dnlp/core/dnn_crf.py‎

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
importnumpyasnp
44
importmath
55
importos
6+
importtime
67
fromdnlp.core.dnn_crf_baseimportDnnCrfBase
7-
fromdnlp.config.configimportDnnCrfConfig
8+
fromdnlp.configimportDnnCrfConfig
89

910

1011
classDnnCrf(DnnCrfBase):
1112
def__init__(self,*,config:DnnCrfConfig=None,task='cws',data_path:str='',dtype:type=tf.float32,
12-
mode:str='train',dropout_position:str='input',predict:str='ll',nn:str,model_path:str='',
13+
mode:str='train',dropout_position:str='input',train:str='mm',predict:str='ll',nn:str,
14+
model_path:str='',
1315
embedding_path:str='',remark:str=''):
1416
ifmodenotin ['train','predict']:
1517
raiseException('mode error')
1618
ifnnnotin ['mlp','rnn','lstm','bilstm','gru']:
17-
raiseException('name ofneural network entered is not supported')
19+
raiseException('neural network name entered is not supported')
1820

1921
DnnCrfBase.__init__(self,config,data_path,mode,model_path)
2022
self.dtype=dtype
@@ -24,6 +26,7 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
2426
self.remark=remark
2527
self.embedding_path=embedding_path
2628
self.graph=tf.Graph()
29+
self.train=train
2730
withself.graph.as_default():
2831
# 构建
2932
# tf.reset_default_graph()
@@ -35,6 +38,12 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
3538
ifmode=='train':
3639
self.input=tf.placeholder(tf.int32, [self.batch_size,self.batch_length,self.windows_size])
3740
self.real_indices=tf.placeholder(tf.int32, [self.batch_size,self.batch_length])
41+
# self.sentence_inputs = tf.data.Dataset.from_tensor_slices(self.sentences).repeat(-1).batch(self.batch_size)
42+
# self.label_inputs = tf.data.Dataset.from_tensor_slices(self.labels).repeat(-1).batch(self.batch_size)
43+
# self.length_inputs = tf.data.Dataset.from_tensor_slices(self.sentence_lengths).repeat(-1).batch(self.batch_size)
44+
# self.sentence_iterator = self.sentence_inputs.make_initializable_iterator()
45+
# self.label_iterator = self.label_inputs.make_initializable_iterator()
46+
# self.length_iterator = self.length_inputs.make_initializable_iterator()
3847
else:
3948
self.input=tf.placeholder(tf.int32, [None,self.windows_size])
4049

@@ -43,7 +52,7 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
4352
# 查找表层
4453
self.embedding_layer=self.get_embedding_layer()
4554
# 执行drpout
46-
ifdropout_position=='input':
55+
ifmode=='train'anddropout_position=='input':
4756
self.embedding_layer=self.get_dropout_layer(self.embedding_layer)
4857
# 隐藏层
4958
ifnn=='mlp':
@@ -56,10 +65,11 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
5665
self.hidden_layer=self.get_gru_layer(self.embedding_layer)
5766
else:
5867
self.hidden_layer=self.get_rnn_layer(self.embedding_layer)
59-
ifdropout_position=='hidden':
68+
ifmode=='train'anddropout_position=='hidden':
6069
self.hidden_layer=self.get_dropout_layer(self.hidden_layer)
6170
# 输出层
6271
self.output=self.get_output_layer(self.hidden_layer)
72+
# self.output = tf.nn.softmax(self.output,2)
6373

6474
ifmode=='predict':
6575
ifpredict!='ll':
@@ -69,18 +79,46 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
6979
self.sess.run(tf.global_variables_initializer())
7080
tf.train.Saver().restore(save_path=self.model_path,sess=self.sess)
7181
else:
72-
self.crf_loss,_=tf.contrib.crf.crf_log_likelihood(self.output,self.real_indices,self.seq_length,
73-
self.transition)
74-
#self.loss = -self.loss
7582
self.regularization=tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(self.lam),
76-
self.params )
77-
self.loss=-self.crf_loss/self.batch_size+self.regularization
83+
self.params)
84+
iftrain=='ll':
85+
self.crf_loss,_=tf.contrib.crf.crf_log_likelihood(self.output,self.real_indices,self.seq_length,
86+
self.transition)
87+
# self.loss = -self.loss
88+
self.loss=-self.crf_loss/self.batch_size+self.regularization
89+
# self.optimizer = tf.train.AdagradOptimizer(self.learning_rate)
90+
# self.optimizer.minimize(self.loss)
91+
# self.train = self.optimizer.minimize(self.loss)
92+
else:
93+
self.true_seq=tf.placeholder(tf.int32, [self.batch_size,self.batch_length])
94+
self.pred_seq=tf.placeholder(tf.int32, [self.batch_size,self.batch_length])
95+
self.output_placeholder=tf.placeholder(self.dtype, [self.batch_size,self.batch_length,self.tags_count])
96+
batch_index=np.repeat(np.expand_dims(np.arange(0,self.batch_size),1),self.batch_length,1)
97+
sent_index=np.repeat(np.expand_dims(np.arange(0,self.batch_length),0),self.batch_size,0)
98+
true_index=tf.stack([batch_index,sent_index,self.true_seq],axis=2)
99+
pred_index=tf.stack([batch_index,sent_index,self.pred_seq ],axis=2)
100+
state_difference=tf.reduce_sum(
101+
tf.gather_nd(self.output_placeholder,pred_index)-tf.gather_nd(self.output_placeholder,true_index),
102+
axis=1)
103+
# r = tf.stack([self.true_seq[:, :-1], self.true_seq[:, 1:]], 2)
104+
transition_difference=tf.reduce_sum(
105+
tf.gather_nd(self.transition,tf.stack([self.pred_seq[:, :-1],self.pred_seq[:,1:]],2))-tf.gather_nd(
106+
self.transition,tf.stack([self.true_seq[:, :-1],self.true_seq[:,1:]],2)),axis=1)
107+
init_transition_difference=tf.gather_nd(self.transition_init,
108+
tf.expand_dims(self.pred_seq[:,0],1))-tf.gather_nd(
109+
self.transition_init,tf.expand_dims(self.true_seq[:,0],1))
110+
hinge_loss=tf.count_nonzero(self.pred_seq-self.true_seq,axis=1,dtype=self.dtype)
111+
self.score_diff=state_difference+transition_difference+init_transition_difference+self.hinge_rate*hinge_loss
112+
self.loss=tf.reduce_sum(tf.maximum(0.0,self.score_diff))/self.batch_size+self.regularization
113+
self.learning_rate=0.01
114+
self.optimizer=tf.train.GradientDescentOptimizer(self.learning_rate)
78115
self.optimizer=tf.train.AdagradOptimizer(self.learning_rate)
79-
self.new_optimizer=tf.train.AdamOptimizer()
116+
#self.new_optimizer = tf.train.AdamOptimizer()
80117
gvs=self.optimizer.compute_gradients(self.loss)
81-
cliped_grad= [(tf.clip_by_norm(grad,5)ifgradisnotNoneelsegrad,var)forgrad,varingvs]
82-
self.train=self.optimizer.apply_gradients(cliped_grad)# self.optimizer.minimize(self.loss)
83-
# self.train = self.optimizer.minimize(self.loss)
118+
cliped_grad= [(tf.clip_by_norm(grad,10)ifgradisnotNoneelsegrad,var)forgrad,varingvs]
119+
# self.train_model = self.optimizer.apply_gradients(cliped_grad)
120+
self.train_model=self.optimizer.minimize(self.loss)
121+
84122
current_dir=os.path.dirname(__file__)
85123
dest_dir=os.path.realpath(os.path.join(current_dir,'..\\data\\logs'))
86124
self.train_writer=tf.summary.FileWriter(dest_dir,flush_secs=10)
@@ -89,18 +127,63 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
89127
self.merged=tf.summary.merge_all()
90128

91129
deffit(self,epochs:int=50,interval:int=10):
130+
ifself.train=='ll':
131+
self.fit_ll(epochs,interval)
132+
else:
133+
self.fit_mm(epochs,interval)
134+
135+
deffit_ll(self,epochs:int=50,interval:int=10):
92136
withtf.Session(graph=self.graph)assess:
93137
tf.global_variables_initializer().run()
138+
# sess.run(self.sentence_iterator.initializer)
139+
# sess.run(self.label_iterator.initializer)
140+
# sess.run(self.length_iterator.initializer)
141+
# sentence = self.sentence_iterator.get_next()
142+
# label = self.label_iterator.get_next()
143+
# length = self.length_iterator.get_next()
94144
saver=tf.train.Saver(max_to_keep=epochs)
95145
forepochinrange(1,epochs+1):
96146
print('epoch:',epoch)
97147
j=0
98148
foriinrange(self.batch_count):
99-
characters,labels,lengths=self.get_batch()
100-
feed_dict= {self.input:characters,self.real_indices:labels,self.seq_length:lengths}
101-
_,summary,loss=sess.run([self.train,self.merged,self.mean_loss],feed_dict=feed_dict)
149+
sentences,labels,lengths=self.get_batch()
150+
# sentences = sess.run(sentence)
151+
# labels = sess.run(label)
152+
# lengths = sess.run(length)
153+
feed_dict= {self.input:sentences,self.real_indices:labels,self.seq_length:lengths}
154+
_,summary,loss=sess.run([self.train_model,self.merged,self.mean_loss],feed_dict=feed_dict)
102155
self.train_writer.add_summary(summary,j)
103156
j+=1
157+
ifepoch%interval==0:
158+
ifnotself.embedding_path:
159+
ifself.remark:
160+
model_path='../dnlp/models/emr/{0}-{1}-{2}-{3}.ckpt'.format(self.task,self.nn,self.remark,epoch)
161+
else:
162+
model_path='../dnlp/models/emr/{0}-{1}-{2}.ckpt'.format(self.task,self.nn,epoch)
163+
else:
164+
model_path='../dnlp/models/emr/{0}-{1}-embedding-{2}.ckpt'.format(self.task,self.nn,epoch)
165+
saver.save(sess,model_path)
166+
self.save_config(model_path)
167+
self.train_writer.close()
168+
169+
deffit_mm(self,epochs:int=50,interval:int=1):
170+
withtf.Session(graph=self.graph)assess:
171+
tf.global_variables_initializer().run()
172+
saver=tf.train.Saver(max_to_keep=epochs)
173+
forepochinrange(1,epochs+1):
174+
print('epoch:',epoch)
175+
start=time.time()
176+
foriinrange(self.batch_count):
177+
sentences,labels,lengths=self.get_batch()
178+
transition=self.transition.eval()
179+
transition_init=self.transition_init.eval()
180+
feed_dict= {self.input:sentences,self.seq_length:lengths}
181+
output=sess.run(self.output,feed_dict=feed_dict)
182+
pred_seq= []
183+
foriinrange(self.batch_size):
184+
pred_seq.append(self.viterbi(output[i, :lengths[i], :].T,transition,transition_init,self.batch_length))
185+
feed_dict= {self.true_seq:labels,self.pred_seq:pred_seq,self.output_placeholder:output}
186+
sess.run(self.train_model,feed_dict=feed_dict)
104187
ifepoch%interval==0:
105188
ifnotself.embedding_path:
106189
ifself.remark:
@@ -111,16 +194,17 @@ def fit(self, epochs: int = 50, interval: int = 10):
111194
model_path='../dnlp/models/{0}-{1}-embedding-{2}.ckpt'.format(self.task,self.nn,epoch)
112195
saver.save(sess,model_path)
113196
self.save_config(model_path)
114-
self.train_writer.close()
197+
print('epoch time', (time.time()-start)/60)
115198

116199
defpredict(self,sentence:str,return_labels=False):
117200
ifself.mode!='predict':
118201
raiseException('mode is not allowed to predict')
119202

120203
input=self.indices2input(self.sentence2indices(sentence))
121204
runner= [self.output,self.transition,self.transition_init]
122-
output,trans,trans_init=self.sess.run(runner,feed_dict={self.input:input})
123-
labels=self.viterbi(output,trans,trans_init)
205+
output,trans,trans_init=self.sess.run(runner,feed_dict={self.input:input,self.seq_length:[len(sentence)]})
206+
output=np.squeeze(output,0)
207+
labels=self.viterbi(output.T,trans,trans_init)
124208
ifself.task=='cws':
125209
result=self.tags2words(sentence,labels)
126210
else:
@@ -152,7 +236,7 @@ def predict_ll(self, sentence: str, return_labels=False):
152236

153237
defget_embedding_layer(self)->tf.Tensor:
154238
ifself.embedding_path:
155-
embeddings=tf.Variable(np.load(self.embedding_path),trainable=False,name='embeddings')
239+
embeddings=tf.Variable(np.load(self.embedding_path),trainable=True,name='embeddings')
156240
else:
157241
embeddings=self.__get_variable([self.dict_size,self.embed_size],'embeddings')
158242
self.params.append(embeddings)
@@ -178,7 +262,7 @@ def get_rnn_layer(self, layer: tf.Tensor) -> tf.Tensor:
178262

179263
defget_lstm_layer(self,layer:tf.Tensor)->tf.Tensor:
180264
lstm=tf.nn.rnn_cell.BasicLSTMCell(self.hidden_units)
181-
lstm_output,lstm_out_state=tf.nn.dynamic_rnn(lstm,layer,dtype=self.dtype)
265+
lstm_output,lstm_out_state=tf.nn.dynamic_rnn(lstm,layer,sequence_length=self.seq_length,dtype=self.dtype)
182266
self.params+= [vforvintf.global_variables()ifv.name.startswith('rnn')]
183267
returnlstm_output
184268

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp