Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commited9ddb0

Browse files
update codes
1 parentcc05df8 commited9ddb0

File tree

7 files changed

+70
-23
lines changed

7 files changed

+70
-23
lines changed

‎python/dnlp/config/sequence_labeling_config.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
classDnnCrfConfig(object):
55
def__init__(self,*,skip_left:int=0,skip_right:int=0,embed_size:int=100,hidden_units:int=150,
6-
learning_rate:float=0.005,lam:float=1e-4,dropout_rate:float=0.2,batch_length:int=300,
6+
learning_rate:float=0.01,lam:float=1e-4,dropout_rate:float=0.2,batch_length:int=300,
77
batch_size:int=30,hinge_rate:float=0.2):
88
self.__skip_left=skip_left
99
self.__skip_right=skip_right

‎python/dnlp/core/dnn_crf.py‎

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
classDnnCrf(DnnCrfBase):
1212
def__init__(self,*,config:DnnCrfConfig=None,task='cws',data_path:str='',dtype:type=tf.float32,
13-
mode:str='train',dropout_position:str='input',train:str='mm',predict:str='ll',nn:str,
13+
mode:str='train',dropout_position:str='input',train:str='mm',predict:str='mm',nn:str,
1414
model_path:str='',
1515
embedding_path:str='',remark:str=''):
1616
ifmodenotin ['train','predict']:
@@ -75,7 +75,8 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
7575
ifmode=='predict':
7676
ifpredict!='ll':
7777
self.output=tf.squeeze(tf.transpose(self.output),axis=2)
78-
self.seq,self.best_score=tf.contrib.crf.crf_decode(self.output,self.transition,self.seq_length)
78+
ifpredict=='ll':
79+
self.seq,self.best_score=tf.contrib.crf.crf_decode(self.output,self.transition,self.seq_length)
7980
self.sess=tf.Session()
8081
self.sess.run(tf.global_variables_initializer())
8182
tf.train.Saver().restore(save_path=self.model_path,sess=self.sess)
@@ -105,16 +106,16 @@ def __init__(self, *, config: DnnCrfConfig = None, task='cws', data_path: str =
105106
tf.expand_dims(self.pred_seq[:,0],1))-tf.gather_nd(
106107
self.transition_init,tf.expand_dims(self.true_seq[:,0],1))
107108
self.hinge_loss=tf.count_nonzero(self.pred_seq-self.true_seq,axis=1,dtype=self.dtype)
108-
self.seq,self.best_score=tf.contrib.crf.crf_decode(self.output,self.transition,self.seq_length)
109+
#self.seq, self.best_score = tf.contrib.crf.crf_decode(self.output, self.transition, self.seq_length)
109110
# self.score_diff = self.state_difference + self.transition_difference + self.init_transition_difference + self.hinge_rate*self.hinge_loss
110111
self.score_diff=self.state_difference+self.transition_difference+self.hinge_rate*self.hinge_loss
111112
self.loss=tf.reduce_sum(tf.maximum(0.0,self.score_diff))/self.batch_size+self.regularization
112-
self.learning_rate=0.005
113-
self.optimizer=tf.train.GradientDescentOptimizer(self.learning_rate)
114-
#self.optimizer = tf.train.AdagradOptimizer(self.learning_rate)
113+
#self.learning_rate = 0.005
114+
#self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
115+
self.optimizer=tf.train.AdagradOptimizer(self.learning_rate)
115116
# self.new_optimizer = tf.train.AdamOptimizer()
116-
gvs=self.optimizer.compute_gradients(self.loss)
117-
cliped_grad= [(tf.clip_by_norm(grad,10)ifgradisnotNoneelsegrad,var)forgrad,varingvs]
117+
#gvs = self.optimizer.compute_gradients(self.loss)
118+
#cliped_grad = [(tf.clip_by_norm(grad, 10) if grad is not None else grad, var) for grad, var in gvs]
118119
# self.train_model = self.optimizer.apply_gradients(cliped_grad)
119120
self.train_model=self.optimizer.minimize(self.loss)
120121

@@ -172,7 +173,7 @@ def fit_mm(self, epochs: int = 50, interval: int = 1):
172173
forepochinrange(1,epochs+1):
173174
print('epoch:',epoch)
174175
start=time.time()
175-
foriinrange(self.batch_count):
176+
forjinrange(self.batch_count):
176177
sentences,labels,lengths=self.get_batch()
177178
transition=self.transition.eval()
178179
transition_init=self.transition_init.eval()
@@ -182,11 +183,13 @@ def fit_mm(self, epochs: int = 50, interval: int = 1):
182183
# seq = sess.run(self.seq, feed_dict=feed_dict)
183184
foriinrange(self.batch_size):
184185
# seq = sess.run(self.seq,feed_dict=feed_dict)
185-
pred_seq.append(self.viterbi(output[i, :lengths[i], :].T,transition,transition_init,self.batch_length))
186+
seq=self.viterbi(output[i, :lengths[i], :].T,transition,transition_init,labels[i],
187+
self.batch_length,True)
188+
pred_seq.append(seq)
186189
# pred_seq.append(seq)
187190
feed_dict= {self.true_seq:labels,self.pred_seq:pred_seq,self.output_placeholder:output}
188-
ifepoch>2:
189-
self.eval_params(sess,feed_dict)
191+
#if epoch > 2:
192+
# self.eval_params(sess, feed_dict)
190193
sess.run(self.train_model,feed_dict=feed_dict)
191194
ifepoch%interval==0:
192195
ifnotself.embedding_path:
@@ -214,8 +217,8 @@ def predict(self, sentence: str, return_labels=False):
214217
input=self.indices2input(self.sentence2indices(sentence))
215218
runner= [self.output,self.transition,self.transition_init]
216219
output,trans,trans_init=self.sess.run(runner,feed_dict={self.input:input,self.seq_length: [len(sentence)]})
217-
output=np.squeeze(output,0)
218-
labels=self.viterbi(output.T,trans,trans_init)
220+
#output = np.squeeze(output, 0)
221+
labels=self.viterbi(output,trans,trans_init)
219222
ifself.task=='cws':
220223
result=self.tags2words(sentence,labels)
221224
else:

‎python/dnlp/core/dnn_crf_base.py‎

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,27 +103,38 @@ def get_batch(self) -> (np.ndarray, np.ndarray, np.ndarray):
103103
self.batch_start=new_start
104104
returnself.indices2input(chs_batch),np.array(lls_batch,dtype=np.int32),np.array(len_batch,dtype=np.int32)
105105

106-
defviterbi(self,emission:np.ndarray,transition:np.ndarray,transition_init:np.ndarray,labels:np.ndarray=None,padding_length=-1):
106+
defviterbi(self,emission:np.ndarray,transition:np.ndarray,transition_init:np.ndarray,labels:np.ndarray=None,
107+
padding_length=-1,is_constraint=False):
108+
constraint= [[1,2],[1,2],[0,3],[0,3]]
107109
length=emission.shape[1]
108110
ifpadding_length==-1:
109111
padding_length=length
110112
path=np.ones([self.tags_count,length],dtype=np.int32)*-1
111113
corr_path=np.zeros([padding_length],dtype=np.int32)
112114
path_score=np.ones([self.tags_count,length],dtype=np.float64)* (np.finfo('f').min/2)
113115
path_score[:,0]=transition_init+emission[:,0]
116+
path[:,0]=np.arange(0,4)
117+
iflabelsisnotNone:
118+
foriinrange(self.tags_count):
119+
ifi!=labels[0]:
120+
path_score[i,0]+=self.hinge_rate
114121

115122
forposinrange(1,length):
116123
fortinrange(self.tags_count):
117124
forprevinrange(self.tags_count):
125+
ifis_constraintandtnotinconstraint[prev]:
126+
continue
127+
# prev = path[prev_index, pos-1]
118128
temp=path_score[prev][pos-1]+transition[prev][t]+emission[t][pos]
119-
iflabels[pos-1]!=prev:
120-
temp+=self.hinge_rate
129+
iflabelsisnotNone:
130+
iflabels[pos]!=t:
131+
temp+=self.hinge_rate
121132
iftemp>=path_score[t][pos]:
122133
path[t][pos]=prev
123134
path_score[t][pos]=temp
124-
foriinrange(self.tags_count):
125-
ifi!=labels[length-1]:
126-
path_score[i][length-1]+=self.tags_count
135+
#for i in range(self.tags_count):
136+
# if i!= labels[length-1]:
137+
# path_score[i][length-1]+=self.tags_count
127138
max_index=np.argmax(path_score[:,-1])
128139
corr_path[length-1]=max_index
129140
foriinrange(length-1,0,-1):

‎python/dnlp/utils/evaluation.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def evaluate_cws(model, data_path: str):
8484
forsentence,labelinzip(characters,labels_true):
8585
iflen(sentence)<=3:
8686
continue
87-
words,labels_predict=model.predict_ll(sentence,return_labels=True)
87+
words,labels_predict=model.predict(sentence,return_labels=True)
8888
all_labels_predict.extend(labels_predict)
8989
all_labels_true.extend(label)
9090
c,p,r=get_cws_statistics(label,labels_predict)

‎python/scripts/cws_ner.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def train_cws():
2222
deftest_cws():
2323
# sentence = '小明来自南京师范大学'
2424
sentence='中国人民决心继承邓小平同志的遗志,继续把建设有中国特色社会主义事业推向前进。'
25-
model_path='../dnlp/models/cws-lstm-2.ckpt'
25+
model_path='../dnlp/models/cws-lstm-1.ckpt'
2626
config=DnnCrfConfig()
2727
dnncrf=DnnCrf(config=config,mode='predict',model_path=model_path,nn='lstm')
2828
res,labels=dnncrf.predict(sentence,return_labels=True)

‎python/test/test_dnn_crf_base.py‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# -*- coding:utf-8 -*-
2+
fromunittestimportTestCase
3+
fromdnlp.core.dnn_crf_baseimport*
4+
classTestDnnCRFBase(TestCase):
5+
deftest_viterbi(self):
6+
transition= []
7+
transtion_init= []
8+

‎requirements.txt‎

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
bleach==1.5.0
2+
cycler==0.10.0
3+
enum34==1.1.6
4+
html5lib==0.9999999
5+
joblib==0.11
6+
Markdown==2.6.9
7+
matplotlib==2.1.1
8+
numpy==1.14.0
9+
pandas==0.21.1
10+
protobuf==3.5.0.post1
11+
pyparsing==2.2.0
12+
python-crfsuite==0.9.5
13+
python-dateutil==2.6.1
14+
pytz==2017.3
15+
scikit-learn==0.19.1
16+
scipy==1.0.0
17+
six==1.11.0
18+
sklearn==0.0
19+
sklearn-crfsuite==0.3.6
20+
tabulate==0.8.2
21+
tensorflow-gpu==1.4.0
22+
tensorflow-tensorboard==0.4.0rc3
23+
tqdm==4.19.5
24+
Werkzeug==0.12.2
25+
XlsxWriter==1.0.2

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp