supercoderhawk/DeepLearning_NLPPublic

NotificationsYou must be signed in to change notification settings
Fork40
Star159

Commitbab0698

committed

Merge branch 'emr' into develop

# Conflicts:#python/dnlp/config/re_config.py#python/dnlp/config/sequence_labeling_config.py#python/dnlp/core/dnn_crf.py#python/dnlp/core/dnn_crf_base.py#python/dnlp/core/re_cnn.py#python/scripts/cws_ner.py#python/scripts/init_datasets.py#python/scripts/pipeline.py#python/scripts/rel.py

2 parents88f4555 +9a13f6d commitbab0698Copy full SHA for bab0698

File tree

15 files changed

+870

-40

lines changed

python
- dnlp
  - config
    - __init__.py
    - sequence_labeling_config.py
  - core
  - data_process
  - utils
    - evaluation.py
- scripts
  - emr_ner.py
  - generate_dictionary.py
- setup.py

15 files changed

+870

-40

lines changed

`‎python/dnlp/config/init.py‎`

Lines changed: 3 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`		`-#-- coding: UTF-8 --`
	`1`	`+#-- coding: UTF-8 --`
	`2`	`+fromdnlp.config.sequence_labeling_configimportDnnCrfConfig`
	`3`	`+fromdnlp.config.re_configimportRECNNConfig`

`‎python/dnlp/config/config.py‎renamed to ‎python/dnlp/config/sequence_labeling_config.py‎`

File renamed without changes.

`‎python/dnlp/core/cbow.py‎`

Lines changed: 0 additions & 1 deletion

This file was deleted.

`‎python/dnlp/core/dnn_crf_emr.py‎`

Lines changed: 235 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,235 @@`
	`1`	`+# -- coding: UTF-8 --`
	`2`	`+importtensorflowastf`
	`3`	`+importnumpyasnp`
	`4`	`+importmath`
	`5`	`+fromdnlp.core.dnn_crf_baseimportDnnCrfBase`
	`6`	`+fromdnlp.configimportDnnCrfConfig`
	`7`	`+`
	`8`	`+`
	`9`	`+classDnnCrfEmr(DnnCrfBase):`
	`10`	`+def__init__(self,*,config:DnnCrfConfig=None,data_path:str='',dtype:type=tf.float32,task:str='ner',mode:str='train',`
	`11`	`+nn:str,model_path:str=''):`
	`12`	`+ifmodenotin ['train','predict']:`
	`13`	`+raiseException('mode error')`
	`14`	`+ifnnnotin ['mlp','rnn','lstm','bilstm','gru']:`
	`15`	`+raiseException('name of neural network entered is not supported')`
	`16`	`+`
	`17`	`+DnnCrfBase.__init__(self,config,data_path,mode,model_path)`
	`18`	`+self.dtype=dtype`
	`19`	`+self.mode=mode`
	`20`	`+self.nn=nn`
	`21`	`+self.task=task`
	`22`	`+`
	`23`	`+# 构建`
	`24`	`+tf.reset_default_graph()`
	`25`	`+self.transition=self.__get_variable([self.tags_count,self.tags_count],'transition')`
	`26`	`+self.transition_init=self.__get_variable([self.tags_count],'transition_init')`
	`27`	`+self.params= [self.transition,self.transition_init]`
	`28`	`+# 输入层`
	`29`	`+ifmode=='train':`
	`30`	`+self.input=tf.placeholder(tf.int32, [self.batch_size,self.batch_length,self.windows_size])`
	`31`	`+self.real_indices=tf.placeholder(tf.int32, [self.batch_size,self.batch_length])`
	`32`	`+self.seq_length=tf.placeholder(tf.int32, [None])`
	`33`	`+else:`
	`34`	`+self.input=tf.placeholder(tf.int32, [None,self.windows_size])`
	`35`	`+`
	`36`	`+# 查找表层`
	`37`	`+self.embedding_layer=self.get_embedding_layer()`
	`38`	`+# 隐藏层`
	`39`	`+ifnn=='mlp':`
	`40`	`+self.hidden_layer=self.get_mlp_layer(tf.transpose(self.embedding_layer))`
	`41`	`+elifnn=='lstm':`
	`42`	`+self.hidden_layer=self.get_lstm_layer(tf.transpose(self.embedding_layer))`
	`43`	`+elifnn=='gru':`
	`44`	`+self.hidden_layer=self.get_gru_layer(tf.transpose(self.embedding_layer))`
	`45`	`+else:`
	`46`	`+self.hidden_layer=self.get_rnn_layer(tf.transpose(self.embedding_layer))`
	`47`	`+# 输出层`
	`48`	`+self.output=self.get_output_layer(self.hidden_layer)`
	`49`	`+`
	`50`	`+ifmode=='predict':`
	`51`	`+self.output=tf.squeeze(self.output,axis=1)`
	`52`	`+self.sess=tf.Session()`
	`53`	`+self.sess.run(tf.global_variables_initializer())`
	`54`	`+tf.train.Saver().restore(save_path=self.model_path,sess=self.sess)`
	`55`	`+else:`
	`56`	`+# 构建训练函数`
	`57`	`+# 训练用placeholder`
	`58`	`+self.ll_corr=tf.placeholder(tf.int32,shape=[None,3])`
	`59`	`+self.ll_curr=tf.placeholder(tf.int32,shape=[None,3])`
	`60`	`+self.trans_corr=tf.placeholder(tf.int32, [None,2])`
	`61`	`+self.trans_curr=tf.placeholder(tf.int32, [None,2])`
	`62`	`+self.trans_init_corr=tf.placeholder(tf.int32, [None,1])`
	`63`	`+self.trans_init_curr=tf.placeholder(tf.int32, [None,1])`
	`64`	`+# 损失函数`
	`65`	`+self.loss,self.loss_with_init=self.get_loss()`
	`66`	`+self.optimizer=tf.train.AdagradOptimizer(self.learning_rate)`
	`67`	`+self.train=self.optimizer.minimize(self.loss)`
	`68`	`+self.train_with_init=self.optimizer.minimize(self.loss_with_init)`
	`69`	`+`
	`70`	`+deffit(self,epochs:int=100,interval:int=20):`
	`71`	`+withtf.Session()assess:`
	`72`	`+tf.global_variables_initializer().run()`
	`73`	`+saver=tf.train.Saver(max_to_keep=100)`
	`74`	`+forepochinrange(1,epochs+1):`
	`75`	`+print('epoch:',epoch)`
	`76`	`+for_inrange(self.batch_count):`
	`77`	`+characters,labels,lengths=self.get_batch()`
	`78`	`+self.fit_batch(characters,labels,lengths,sess)`
	`79`	`+ifepoch%interval==0:`
	`80`	`+model_path='../dnlp/models/emr_old/{0}-{1}.ckpt'.format(self.nn,epoch)`
	`81`	`+saver.save(sess,model_path)`
	`82`	`+self.save_config(model_path)`
	`83`	`+`
	`84`	`+deffit_batch(self,characters,labels,lengths,sess):`
	`85`	`+scores=sess.run(self.output,feed_dict={self.input:characters})`
	`86`	`+transition=self.transition.eval(session=sess)`
	`87`	`+transition_init=self.transition_init.eval(session=sess)`
	`88`	`+update_labels_pos=None`
	`89`	`+update_labels_neg=None`
	`90`	`+current_labels= []`
	`91`	`+trans_pos_indices= []`
	`92`	`+trans_neg_indices= []`
	`93`	`+trans_init_pos_indices= []`
	`94`	`+trans_init_neg_indices= []`
	`95`	`+foriinrange(self.batch_size):`
	`96`	`+current_label=self.viterbi(scores[:, :lengths[i],i],transition,transition_init)`
	`97`	`+current_labels.append(current_label)`
	`98`	`+diff_tag=np.subtract(labels[i, :lengths[i]],current_label)`
	`99`	`+update_index=np.where(diff_tag!=0)[0]`
	`100`	`+update_length=len(update_index)`
	`101`	`+ifupdate_length==0:`
	`102`	`+continue`
	`103`	`+update_label_pos=np.stack([labels[i,update_index],update_index,i*np.ones([update_length])],axis=-1)`
	`104`	`+update_label_neg=np.stack([current_label[update_index],update_index,i*np.ones([update_length])],axis=-1)`
	`105`	`+ifupdate_labels_posisnotNone:`
	`106`	`+np.concatenate((update_labels_pos,update_label_pos))`
	`107`	`+np.concatenate((update_labels_neg,update_label_neg))`
	`108`	`+else:`
	`109`	`+update_labels_pos=update_label_pos`
	`110`	`+update_labels_neg=update_label_neg`
	`111`	`+`
	`112`	`+trans_pos_index,trans_neg_index,trans_init_pos,trans_init_neg,update_init=self.generate_transition_update_index(`
	`113`	`+labels[i, :lengths[i]],current_labels[i])`
	`114`	`+`
	`115`	`+trans_pos_indices.extend(trans_pos_index)`
	`116`	`+trans_neg_indices.extend(trans_neg_index)`
	`117`	`+`
	`118`	`+ifupdate_init:`
	`119`	`+trans_init_pos_indices.append(trans_init_pos)`
	`120`	`+trans_init_neg_indices.append(trans_init_neg)`
	`121`	`+`
	`122`	`+ifupdate_labels_posisnotNoneandupdate_labels_negisnotNone:`
	`123`	`+feed_dict= {self.input:characters,self.ll_curr:update_labels_neg,self.ll_corr:update_labels_pos,`
	`124`	`+self.trans_curr:trans_neg_indices,self.trans_corr:trans_pos_indices}`
	`125`	`+`
	`126`	`+ifnottrans_init_pos_indices:`
	`127`	`+sess.run(self.train,feed_dict)`
	`128`	`+else:`
	`129`	`+feed_dict[self.trans_init_corr]=trans_init_pos_indices`
	`130`	`+feed_dict[self.trans_init_curr]=trans_init_neg_indices`
	`131`	`+sess.run(self.train_with_init,feed_dict)`
	`132`	`+`
	`133`	`+defgenerate_transition_update_index(self,correct_labels,current_labels):`
	`134`	`+ifcorrect_labels.shape!=current_labels.shape:`
	`135`	`+print('sequence length is not equal')`
	`136`	`+returnNone`
	`137`	`+`
	`138`	`+before_corr=correct_labels[0]`
	`139`	`+before_curr=current_labels[0]`
	`140`	`+update_init=False`
	`141`	`+`
	`142`	`+trans_init_pos=None`
	`143`	`+trans_init_neg=None`
	`144`	`+trans_pos= []`
	`145`	`+trans_neg= []`
	`146`	`+`
	`147`	`+ifbefore_corr!=before_curr:`
	`148`	`+trans_init_pos= [before_corr]`
	`149`	`+trans_init_neg= [before_curr]`
	`150`	`+update_init=True`
	`151`	`+`
	`152`	`+for_, (corr_label,curr_label)inenumerate(zip(correct_labels[1:],current_labels[1:])):`
	`153`	`+ifcorr_label!=curr_labelorbefore_corr!=before_curr:`
	`154`	`+trans_pos.append([before_corr,corr_label])`
	`155`	`+trans_neg.append([before_curr,curr_label])`
	`156`	`+before_corr=corr_label`
	`157`	`+before_curr=curr_label`
	`158`	`+`
	`159`	`+returntrans_pos,trans_neg,trans_init_pos,trans_init_neg,update_init`
	`160`	`+`
	`161`	`+defpredict_ll(self,sentence:str,return_labels=False):`
	`162`	`+ifself.mode!='predict':`
	`163`	`+raiseException('mode is not allowed to predict')`
	`164`	`+`
	`165`	`+input=self.indices2input(self.sentence2indices(sentence))`
	`166`	`+runner= [self.output,self.transition,self.transition_init]`
	`167`	`+output,trans,trans_init=self.sess.run(runner,feed_dict={self.input:input})`
	`168`	`+labels=self.viterbi(output,trans,trans_init)`
	`169`	`+ifself.task=='cws':`
	`170`	`+result=self.tags2words(sentence,labels)`
	`171`	`+else:`
	`172`	`+result=self.tags2entities(sentence,labels)`
	`173`	`+ifnotreturn_labels:`
	`174`	`+returnresult`
	`175`	`+else:`
	`176`	`+returnresult,self.tag2sequences(labels)`
	`177`	`+`
	`178`	`+defget_embedding_layer(self)->tf.Tensor:`
	`179`	`+embeddings=self.__get_variable([self.dict_size,self.embed_size],'embeddings')`
	`180`	`+self.params.append(embeddings)`
	`181`	`+ifself.mode=='train':`
	`182`	`+input_size= [self.batch_size,self.batch_length,self.concat_embed_size]`
	`183`	`+layer=tf.reshape(tf.nn.embedding_lookup(embeddings,self.input),input_size)`
	`184`	`+else:`
	`185`	`+layer=tf.reshape(tf.nn.embedding_lookup(embeddings,self.input), [1,-1,self.concat_embed_size])`
	`186`	`+returnlayer`
	`187`	`+`
	`188`	`+defget_mlp_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`189`	`+hidden_weight=self.__get_variable([self.hidden_units,self.concat_embed_size],'hidden_weight')`
	`190`	`+hidden_bias=self.__get_variable([self.hidden_units,1,1],'hidden_bias')`
	`191`	`+self.params+= [hidden_weight,hidden_bias]`
	`192`	`+layer=tf.sigmoid(tf.tensordot(hidden_weight,layer, [[1], [0]])+hidden_bias)`
	`193`	`+returnlayer`
	`194`	`+`
	`195`	`+defget_rnn_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`196`	`+rnn=tf.nn.rnn_cell.BasicRNNCell(self.hidden_units)`
	`197`	`+rnn_output,rnn_out_state=tf.nn.dynamic_rnn(rnn,layer,dtype=self.dtype)`
	`198`	`+self.params+= [vforvintf.global_variables()ifv.name.startswith('rnn')]`
	`199`	`+returntf.transpose(rnn_output)`
	`200`	`+`
	`201`	`+defget_lstm_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`202`	`+lstm=tf.nn.rnn_cell.BasicLSTMCell(self.hidden_units)`
	`203`	`+lstm_output,lstm_out_state=tf.nn.dynamic_rnn(lstm,layer,dtype=self.dtype)`
	`204`	`+self.params+= [vforvintf.global_variables()ifv.name.startswith('rnn')]`
	`205`	`+returntf.transpose(lstm_output)`
	`206`	`+`
	`207`	`+defget_gru_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`208`	`+gru=tf.nn.rnn_cell.GRUCell(self.hidden_units)`
	`209`	`+gru_output,gru_out_state=tf.nn.dynamic_rnn(gru,layer,dtype=self.dtype)`
	`210`	`+self.params+= [vforvintf.global_variables()ifv.name.startswith('rnn')]`
	`211`	`+returntf.transpose(gru_output)`
	`212`	`+`
	`213`	`+defget_dropout_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`214`	`+returntf.layers.dropout(layer,self.dropout_rate)`
	`215`	`+`
	`216`	`+defget_output_layer(self,layer:tf.Tensor)->tf.Tensor:`
	`217`	`+output_weight=self.__get_variable([self.tags_count,self.hidden_units],'output_weight')`
	`218`	`+output_bias=self.__get_variable([self.tags_count,1,1],'output_bias')`
	`219`	`+self.params+= [output_weight,output_bias]`
	`220`	`+returntf.tensordot(output_weight,layer, [[1], [0]])+output_bias`
	`221`	`+`
	`222`	`+defget_loss(self)-> (tf.Tensor,tf.Tensor):`
	`223`	`+output_loss=tf.reduce_sum(tf.gather_nd(self.output,self.ll_curr)-tf.gather_nd(self.output,self.ll_corr))`
	`224`	`+trans_loss=tf.gather_nd(self.transition,self.trans_curr)-tf.gather_nd(self.transition,self.trans_corr)`
	`225`	`+trans_i_curr=tf.gather_nd(self.transition_init,self.trans_init_curr)`
	`226`	`+trans_i_corr=tf.gather_nd(self.transition_init,self.trans_init_corr)`
	`227`	`+trans_init_loss=tf.reduce_sum(trans_i_curr-trans_i_corr)`
	`228`	`+loss=output_loss+trans_loss`
	`229`	`+regu=tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(self.lam),self.params)`
	`230`	`+l1=loss+regu`
	`231`	`+l2=l1+trans_init_loss`
	`232`	`+returnl1,l2`
	`233`	`+`
	`234`	`+def__get_variable(self,size,name)->tf.Variable:`
	`235`	`+returntf.Variable(tf.truncated_normal(size,stddev=1.0/math.sqrt(size[-1]),dtype=self.dtype),name=name)`

`‎python/dnlp/core/re_cnn_base.py‎`

Lines changed: 54 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1 +1,54 @@`
`1`		`-#-- coding: UTF-8 --`
	`1`	`+# -- coding:utf-8 --`
	`2`	`+importnumpyasnp`
	`3`	`+importpickle`
	`4`	`+fromdnlp.configimportRECNNConfig`
	`5`	`+fromdnlp.utils.constantimportBATCH_PAD,BATCH_PAD_VAL`
	`6`	`+`
	`7`	`+`
	`8`	`+classRECNNBase(object):`
	`9`	`+def__init__(self,config:RECNNConfig,dict_path:str,data_path:str='',mode='train'):`
	`10`	`+self.window_size=config.window_size`
	`11`	`+self.filter_size=config.filter_size`
	`12`	`+self.learning_rate=config.learning_rate`
	`13`	`+self.dropout_rate=config.dropout_rate`
	`14`	`+self.lam=config.lam`
	`15`	`+self.word_embed_size=config.word_embed_size`
	`16`	`+self.position_embed_size=config.position_embed_size`
	`17`	`+self.batch_length=config.batch_length`
	`18`	`+ifmode=='train':`
	`19`	`+self.batch_size=config.batch_size`
	`20`	`+else:`
	`21`	`+self.batch_size=1`
	`22`	`+self.dictionary=self.read_dictionary(dict_path)`
	`23`	`+self.words_size=len(self.dictionary)`
	`24`	`+`
	`25`	`+defread_dictionary(self,dict_path):`
	`26`	`+withopen(dict_path,encoding='utf-8')asf:`
	`27`	`+content=f.read().splitlines()`
	`28`	`+dictionary= {}`
	`29`	`+dict_arr=map(lambdaitem:item.split(' '),content)`
	`30`	`+for_,dict_iteminenumerate(dict_arr):`
	`31`	`+dictionary[dict_item[0]]=int(dict_item[1])`
	`32`	`+`
	`33`	`+returndictionary`
	`34`	`+`
	`35`	`+defload_data(self):`
	`36`	`+primary= []`
	`37`	`+secondary= []`
	`38`	`+words= []`
	`39`	`+labels= []`
	`40`	`+withopen(self.data_path,'rb')asf:`
	`41`	`+data=pickle.load(f)`
	`42`	`+forsentenceindata:`
	`43`	`+sentence_words=sentence['words']`
	`44`	`+iflen(sentence_words)<self.batch_length:`
	`45`	`+sentence_words+= [self.dictionary[BATCH_PAD]]* (self.batch_length-len(sentence_words))`
	`46`	`+else:`
	`47`	`+sentence_words=sentence_words[:self.batch_length]`
	`48`	`+words.append(sentence_words)`
	`49`	`+primary.append(np.arange(self.batch_length)-sentence['primary']+self.batch_length-1)`
	`50`	`+secondary.append(np.arange(self.batch_length)-sentence['secondary']+self.batch_length-1)`
	`51`	`+sentence_labels=np.zeros([self.relation_count])`
	`52`	`+sentence_labels[sentence['type']]=1`
	`53`	`+labels.append(sentence_labels)`
	`54`	`+returnnp.array(words,np.int32),np.array(primary,np.int32),np.array(secondary,np.int32),np.array(labels,np.float32)`

`‎python/dnlp/core/skip_gram.py‎renamed to ‎python/dnlp/core/word2vec.py‎`

Lines changed: 14 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,15 +5,17 @@`
`5`	`5`	`importtensorflowastf`
`6`	`6`
`7`	`7`
`8`		`-classSkipGram(object):`
`9`		`-def__init__(self,src_filename:str,dest_filename:str,batch_size:int=128,embed_size:int=100,`
`10`		`-num_sampled:int=64,steps:int=50000):`
	`8`	`+classWord2Vec(object):`
	`9`	`+def__init__(self,src_filename:str,dest_filename:str,window_size:int=4,mode='skip_gram',batch_size:int=128,`
	`10`	`+embed_size:int=100,num_sampled:int=64,steps:int=50000):`
`11`	`11`	`withopen(src_filename,'rb')asf:`
`12`	`12`	`data=pickle.load(f)`
`13`	`13`	`self.input=data['input']`
`14`	`14`	`self.output=data['output']`
`15`	`15`	`self.dictionary=data['dictionary']`
`16`	`16`	`self.vocab_size=len(self.dictionary)`
	`17`	`+self.mode=mode`
	`18`	`+self.window_size=window_size`
`17`	`19`	`self.start=0`
`18`	`20`	`self.dest_filename=dest_filename`
`19`	`21`	`self.batch_size=batch_size`
`@@ -24,10 +26,16 @@ def __init__(self, src_filename: str, dest_filename: str, batch_size: int = 128,`
`24`	`26`	`self.embeddings=tf.Variable(tf.random_uniform([self.vocab_size,self.embed_size],-1.0,1.0))`
`25`	`27`
`26`	`28`	`deftrain(self):`
`27`		`-train_inputs=tf.placeholder(tf.int32,shape=[self.batch_size])`
	`29`	`+ifself.mode=='skip_gram':`
	`30`	`+train_inputs=tf.placeholder(tf.int32,shape=[self.batch_size])`
	`31`	`+else:`
	`32`	`+train_inputs=tf.placeholder(tf.int32,shape=[self.batch_size,self.window_size])`
`28`	`33`	`train_labels=tf.placeholder(tf.int32,shape=[self.batch_size,1])`
`29`	`34`
`30`	`35`	`embed=tf.nn.embedding_lookup(self.embeddings,train_inputs)`
	`36`	`+ifself.mode=='cbow':`
	`37`	`+# embed = tf.reduce_sum(embed, 1)`
	`38`	`+embed=tf.reduce_mean(embed,1)`
`31`	`39`
`32`	`40`	`nce_weights=tf.Variable(`
`33`	`41`	`tf.truncated_normal([self.vocab_size,self.embed_size],`
`@@ -37,13 +45,13 @@ def train(self):`
`37`	`45`	`loss=tf.reduce_mean(`
`38`	`46`	`tf.nn.nce_loss(weights=nce_weights,biases=nce_biases,labels=train_labels,inputs=embed,`
`39`	`47`	`num_sampled=self.num_sampled,num_classes=self.vocab_size))`
`40`		`-optimizer=tf.train.GradientDescentOptimizer(0.2).minimize(loss)`
	`48`	`+optimizer=tf.train.AdagradOptimizer(0.2).minimize(loss)`
`41`	`49`
`42`	`50`	`withtf.Session()assess:`
`43`	`51`	`tf.global_variables_initializer().run()`
`44`	`52`
`45`	`53`	`aver_loss=0`
`46`		`-forstepinrange(1,self.steps+1):`
	`54`	`+forstepinrange(1,self.steps+1):`
`47`	`55`	`batch_inputs,batch_labels=self.generate_batch()`
`48`	`56`	`feed_dict= {train_inputs:batch_inputs,train_labels:batch_labels}`
`49`	`57`	`_,loss_val=sess.run([optimizer,loss],feed_dict=feed_dict)`

`‎python/dnlp/data_process/process_brat.py‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -3,4 +3,4 @@`
`3`	`3`
`4`	`4`	`classProcessBrat(object):`
`5`	`5`	`def__init__(self):`
`6`		`-pass`
	`6`	`+pass`

`‎python/dnlp/data_process/process_conll.py‎`

Lines changed: 8 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,8 @@`
	`1`	`+# -- coding:utf-8 --`
	`2`	`+fromdnlp.data_process.processorimportPreprocessor`
	`3`	`+classProcessConll(Preprocessor):`
	`4`	`+def__init__(self,*,files:tuple,name:str,base_folder:str='dnlp/data/',dict_path:str=''):`
	`5`	`+ifdict_path:`
	`6`	`+Preprocessor.__init__(self,base_folder=base_folder,dict_path=dict_path)`
	`7`	`+else:`
	`8`	`+Preprocessor.__init__(self,base_folder=base_folder,files=files,dict_path=base_folder+name+'.utf8')`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitbab0698

File tree

15 files changed

15 files changed

`‎python/dnlp/config/init.py‎`

`‎python/dnlp/config/config.py‎renamed to ‎python/dnlp/config/sequence_labeling_config.py‎`

`‎python/dnlp/core/cbow.py‎`

`‎python/dnlp/core/dnn_crf_emr.py‎`

`‎python/dnlp/core/re_cnn_base.py‎`

`‎python/dnlp/core/skip_gram.py‎renamed to ‎python/dnlp/core/word2vec.py‎`

`‎python/dnlp/data_process/process_brat.py‎`

`‎python/dnlp/data_process/process_conll.py‎`

0 commit comments