Commitd6bbcb0

committed

add pipeline script

1 parent0095bca commitd6bbcb0Copy full SHA for d6bbcb0

File tree

1 file changed

+137

-12

lines changed

python/scripts
- pipeline.py

1 file changed

+137

-12

lines changed

`‎python/scripts/pipeline.py‎`

Lines changed: 137 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,29 +1,154 @@`
`1`	`1`	`# -- coding:utf-8 --`
	`2`	`+importnumpyasnp`
	`3`	`+importpickle`
	`4`	`+fromitertoolsimportaccumulate,permutations`
`2`	`5`	`fromdnlp.config.sequence_labeling_configimportDnnCrfConfig`
`3`	`6`	`fromdnlp.core.dnn_crfimportDnnCrf`
	`7`	`+fromdnlp.core.re_cnnimportRECNN`
	`8`	`+fromdnlp.config.re_configimportRECNNConfig`
	`9`	`+fromdnlp.utils.constantimportUNK,BATCH_PAD`
	`10`	`+`
	`11`	`+`
	`12`	`+defread_dictionary(dict_path:str,reverse=False):`
	`13`	`+dictionary= {}`
	`14`	`+withopen(dict_path,encoding='utf8')asd:`
	`15`	`+items=d.readlines()`
	`16`	`+foriteminitems:`
	`17`	`+pair=item.split(' ')`
	`18`	`+dictionary[pair[0]]=int(pair[1])`
	`19`	`+ifreverse:`
	`20`	`+returndictionary,dict(zip(dictionary.values(),dictionary.keys()))`
	`21`	`+else:`
	`22`	`+returndictionary`
	`23`	`+`
	`24`	`+`
	`25`	`+BASE_FOLDER='../dnlp/data/emr/'`
	`26`	`+DICT_PATH=BASE_FOLDER+'emr_merged_word_dict.utf8'`
	`27`	`+DICTIONARY=read_dictionary(DICT_PATH)`
	`28`	`+withopen(BASE_FOLDER+'rel_names','rb')asf:`
	`29`	`+REL_PAIR_NAMES=pickle.load(f)`
	`30`	`+REL_PAIR_NAMES=dict(zip(REL_PAIR_NAMES.values(),REL_PAIR_NAMES.keys()))`
	`31`	`+forrel_nameinREL_PAIR_NAMES:`
	`32`	`+REL_PAIR_NAMES[rel_name]=REL_PAIR_NAMES[rel_name].split(':')`
	`33`	`+`
	`34`	`+REL_NAMES= {'PartOf':'部位','PropertyOf':'性质','DegreeOf':'程度','QualityValue':'定性值',`
	`35`	`+'QuantityValue':'定量值','UnitOf':'单位','TimeOf':'持续时间','StartTime':'开始时间',`
	`36`	`+'EndTime':'结束时间','Moment':'时间点','DateOf':'日期','ResultOf':'结果',`
	`37`	`+'LocationOf':'地点','DiseaseTypeOf':'疾病分型分期','SpecOf':'规格','UsageOf':'用法',`
	`38`	`+'DoseOf':'用量','FamilyOf':'家族成员','ModifierOf':'其他修饰词','UseMedicine':'用药',`
	`39`	`+'LeadTo':'导致','Find':'发现','Confirm':'证实','Adopt':'采取','Take':'用药',`
	`40`	`+'Limit':'限定','AlongWith':'伴随','Complement':'补足'}`
	`41`	`+REL_NAME_LIST=list(REL_NAMES.keys())`
	`42`	`+ENTITY_NAMES= {'Sign':'体征','Symptom':'症状','Part':'部位','Property':'属性','Degree':'程度',`
	`43`	`+'Quality':'定性值','Quantity':'定量值','Unit':'单位','Time':'时间','Date':'日期',`
	`44`	`+'Result':'结果',`
	`45`	`+'Disease':'疾病','DiseaseType':'疾病类型','Examination':'检查','Location':'地址',`
	`46`	`+'Medicine':'药物','Spec':'规格','Usage':'用法','Dose':'用量','Treatment':'治疗',`
	`47`	`+'Family':'家族史',`
	`48`	`+'Modifier':'修饰词'}`
	`49`	`+`
	`50`	`+`
`4`	`51`	`defner(sentence):`
`5`	`52`	`data_path=''`
`6`		`-config_bi_bigram=DnnCrfConfig(skip_left=0,skip_right=0)`
`7`		`-lstmcrf=DnnCrf(config=config_bi_bigram,task='ner',data_path=data_path,nn='lstm',remark='lstm')`
`8`		`-returnlstmcrf.predict(sentence)`
	`53`	`+model_path='../dnlp/models/emr/ner-lstm-50.ckpt'`
	`54`	`+config=DnnCrfConfig(skip_left=1,skip_right=1)`
	`55`	`+lstmcrf=DnnCrf(config=config,task='ner',model_path=model_path,mode='predict',data_path=data_path,nn='lstm',`
	`56`	`+remark='lstm')`
	`57`	`+returnlstmcrf.predict_ll(sentence)`
	`58`	`+`
	`59`	`+`
`9`	`60`	`defcws(sentence):`
`10`		`-config=DnnCrfConfig()`
`11`		`-model_path='../dnlp/models/emr/cws-lstm-emr_cws-20.ckpt'`
	`61`	`+config=DnnCrfConfig(skip_left=1,skip_right=1)`
	`62`	`+model_path='../dnlp/models/emr/cws-lstm-emr_cws-50.ckpt'`
`12`	`63`	`dnncrf=DnnCrf(config=config,model_path=model_path,mode='predict',nn='lstm',task='cws',remark='emr_cws')`
`13`		`-returndnncrf.predict(sentence)`
	`64`	`+returndnncrf.predict_ll(sentence)`
`14`	`65`
`15`		`-defprepare_rel(sentence):`
	`66`	`+`
	`67`	`+defprepare_rel(sentence,batch_length=85):`
`16`	`68`	`cws_res=cws(sentence)`
	`69`	`+ner_res=ner(sentence)`
	`70`	`+lengths=list(accumulate([len(l)forlincws_res]))`
	`71`	`+ne_candidates= []`
	`72`	`+words=list(map(lambdaw:DICTIONARY[w]ifwinDICTIONARYelseDICTIONARY[UNK],cws_res))`
	`73`	`+iflen(words)<batch_length:`
	`74`	`+words+= [DICTIONARY[BATCH_PAD]]* (batch_length-len(words))`
	`75`	`+else:`
	`76`	`+words=words[:batch_length]`
	`77`	`+forne,sinner_res:`
	`78`	`+idx=cws_res.index(ne)`
	`79`	`+ifidx!=-1:`
	`80`	`+ne_candidates.append(idx)`
	`81`	`+else:`
	`82`	`+print('fuck')`
	`83`	`+rel_candidates=list(permutations(ne_candidates,2))`
	`84`	`+primary,secondary=generate_rel(rel_candidates,batch_length)`
	`85`	`+word_array=np.array([[words]]*len(rel_candidates))`
	`86`	`+rel_count=len(rel_candidates)`
	`87`	`+returnnp.array([words]rel_count),primary,secondary, [cws_res]rel_count,rel_candidates`
	`88`	`+`
	`89`	`+`
	`90`	`+defgenerate_rel(rel_candidates,batch_length):`
	`91`	`+primary= []`
	`92`	`+secondary= []`
	`93`	`+forf,sinrel_candidates:`
	`94`	`+primary.append(np.arange(batch_length)-f+batch_length-1)`
	`95`	`+secondary.append(np.arange(batch_length)-s+batch_length-1)`
	`96`	`+returnnp.array(primary),np.array(secondary)`
`17`	`97`
`18`	`98`
`19`		`-defrel():`
	`99`	`+defrel_extract(sentences):`
	`100`	`+words= []`
	`101`	`+rel_pairs= []`
	`102`	`+sentence_words= []`
	`103`	`+primary= []`
	`104`	`+secondary= []`
	`105`	`+forsentenceinsentences:`
	`106`	`+w,p,s,ww,pp=prepare_rel(sentence)`
	`107`	`+sentence_words.extend(w)`
	`108`	`+primary.extend(p)`
	`109`	`+secondary.extend(s)`
	`110`	`+words.extend(ww)`
	`111`	`+rel_pairs.extend(pp)`
	`112`	`+config_two=RECNNConfig(window_size=(2,3,4))`
	`113`	`+config_mutli=RECNNConfig(window_size=(2,3,4))`
	`114`	`+model_path_two='../dnlp/models/re_two/50-2_3_4_directed.ckpt'`
	`115`	`+model_path_multi='../dnlp/models/re_multi/50-2_3_4_directed.ckpt'`
	`116`	`+recnn2=RECNN(config=config_two,dict_path=DICT_PATH,mode='test',model_path=model_path_two,relation_count=2,data_mode='test')`
	`117`	`+recnn=RECNN(config=config_two,dict_path=DICT_PATH,mode='test',model_path=model_path_multi,relation_count=28,data_mode='test')`
	`118`	`+two_res=recnn2.predict(sentence_words,primary,secondary)`
	`119`	`+true_words= [words[i]foriintwo_resifi]`
	`120`	`+true_rel_pairs= [rel_pairs[i]foriintwo_resifi ]`
	`121`	`+true_sentence_words= [sentence_words[i]foriintwo_resifi]`
	`122`	`+true_primary= [primary[i]foriintwo_resifi]`
	`123`	`+true_secondary= [secondary[i]foriintwo_resifi]`
	`124`	`+multi_res=recnn.predict(true_sentence_words,true_primary,true_secondary)`
	`125`	`+get_rel_result(true_words,true_rel_pairs,multi_res)`
	`126`	`+`
	`127`	`+defget_rel_result(words,rel_pairs,rel_types):`
	`128`	`+result= {}`
	`129`	`+forsentence_words, (primary_idx,secondary_idx),rel_typeinzip(words,rel_pairs,rel_types):`
	`130`	`+rel_type_name=REL_NAME_LIST[rel_type]`
	`131`	`+primary=sentence_words[primary_idx]`
	`132`	`+secondary=sentence_words[secondary_idx]`
	`133`	`+primary_type,secondary_type=REL_PAIR_NAMES[rel_type_name]`
	`134`	`+primary_type=ENTITY_NAMES[primary_type]`
	`135`	`+secondary_type=ENTITY_NAMES[secondary_type]`
	`136`	`+# result[]`
	`137`	`+`
	`138`	`+`
	`139`	`+defexport():`
`20`	`140`	`pass`
`21`	`141`
`22`	`142`	`defget_sentences(filename):`
`23`		`-withopen('../dnlp/data/emr/emr_paper/train/'+filename,encoding='utf-8')asf:`
`24`		`-returnf.read().split('。')`
	`143`	`+withopen('../dnlp/data/emr/emr_paper/train/'+filename,encoding='utf-8')asf:`
	`144`	`+sentences= [l+'。'forlinf.read().split('。')]`
	`145`	`+ifsentences[-1]=='。':`
	`146`	`+sentences=sentences[:-1]`
	`147`	`+else:`
	`148`	`+sentences[-1]=sentences[-1][:-1]`
	`149`	`+returnsentences`
	`150`	`+`
`25`	`151`
`26`	`152`	`if__name__=='__main__':`
`27`	`153`	`sentences=get_sentences('996716_admission.txt')`
`28`		`-forsentenceinsentences:`
`29`		`-prepare_rel(sentence)`
	`154`	`+rel_extract(sentences)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitd6bbcb0

File tree

1 file changed

1 file changed

`‎python/scripts/pipeline.py‎`

0 commit comments