Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd6bbcb0

Browse files
add pipeline script
1 parent0095bca commitd6bbcb0

File tree

1 file changed

+137
-12
lines changed

1 file changed

+137
-12
lines changed

‎python/scripts/pipeline.py‎

Lines changed: 137 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,154 @@
11
# -*- coding:utf-8 -*-
2+
importnumpyasnp
3+
importpickle
4+
fromitertoolsimportaccumulate,permutations
25
fromdnlp.config.sequence_labeling_configimportDnnCrfConfig
36
fromdnlp.core.dnn_crfimportDnnCrf
7+
fromdnlp.core.re_cnnimportRECNN
8+
fromdnlp.config.re_configimportRECNNConfig
9+
fromdnlp.utils.constantimportUNK,BATCH_PAD
10+
11+
12+
defread_dictionary(dict_path:str,reverse=False):
13+
dictionary= {}
14+
withopen(dict_path,encoding='utf8')asd:
15+
items=d.readlines()
16+
foriteminitems:
17+
pair=item.split(' ')
18+
dictionary[pair[0]]=int(pair[1])
19+
ifreverse:
20+
returndictionary,dict(zip(dictionary.values(),dictionary.keys()))
21+
else:
22+
returndictionary
23+
24+
25+
BASE_FOLDER='../dnlp/data/emr/'
26+
DICT_PATH=BASE_FOLDER+'emr_merged_word_dict.utf8'
27+
DICTIONARY=read_dictionary(DICT_PATH)
28+
withopen(BASE_FOLDER+'rel_names','rb')asf:
29+
REL_PAIR_NAMES=pickle.load(f)
30+
REL_PAIR_NAMES=dict(zip(REL_PAIR_NAMES.values(),REL_PAIR_NAMES.keys()))
31+
forrel_nameinREL_PAIR_NAMES:
32+
REL_PAIR_NAMES[rel_name]=REL_PAIR_NAMES[rel_name].split(':')
33+
34+
REL_NAMES= {'PartOf':'部位','PropertyOf':'性质','DegreeOf':'程度','QualityValue':'定性值',
35+
'QuantityValue':'定量值','UnitOf':'单位','TimeOf':'持续时间','StartTime':'开始时间',
36+
'EndTime':'结束时间','Moment':'时间点','DateOf':'日期','ResultOf':'结果',
37+
'LocationOf':'地点','DiseaseTypeOf':'疾病分型分期','SpecOf':'规格','UsageOf':'用法',
38+
'DoseOf':'用量','FamilyOf':'家族成员','ModifierOf':'其他修饰词','UseMedicine':'用药',
39+
'LeadTo':'导致','Find':'发现','Confirm':'证实','Adopt':'采取','Take':'用药',
40+
'Limit':'限定','AlongWith':'伴随','Complement':'补足'}
41+
REL_NAME_LIST=list(REL_NAMES.keys())
42+
ENTITY_NAMES= {'Sign':'体征','Symptom':'症状','Part':'部位','Property':'属性','Degree':'程度',
43+
'Quality':'定性值','Quantity':'定量值','Unit':'单位','Time':'时间','Date':'日期',
44+
'Result':'结果',
45+
'Disease':'疾病','DiseaseType':'疾病类型','Examination':'检查','Location':'地址',
46+
'Medicine':'药物','Spec':'规格','Usage':'用法','Dose':'用量','Treatment':'治疗',
47+
'Family':'家族史',
48+
'Modifier':'修饰词'}
49+
50+
451
defner(sentence):
552
data_path=''
6-
config_bi_bigram=DnnCrfConfig(skip_left=0,skip_right=0)
7-
lstmcrf=DnnCrf(config=config_bi_bigram,task='ner',data_path=data_path,nn='lstm',remark='lstm')
8-
returnlstmcrf.predict(sentence)
53+
model_path='../dnlp/models/emr/ner-lstm-50.ckpt'
54+
config=DnnCrfConfig(skip_left=1,skip_right=1)
55+
lstmcrf=DnnCrf(config=config,task='ner',model_path=model_path,mode='predict',data_path=data_path,nn='lstm',
56+
remark='lstm')
57+
returnlstmcrf.predict_ll(sentence)
58+
59+
960
defcws(sentence):
10-
config=DnnCrfConfig()
11-
model_path='../dnlp/models/emr/cws-lstm-emr_cws-20.ckpt'
61+
config=DnnCrfConfig(skip_left=1,skip_right=1)
62+
model_path='../dnlp/models/emr/cws-lstm-emr_cws-50.ckpt'
1263
dnncrf=DnnCrf(config=config,model_path=model_path,mode='predict',nn='lstm',task='cws',remark='emr_cws')
13-
returndnncrf.predict(sentence)
64+
returndnncrf.predict_ll(sentence)
1465

15-
defprepare_rel(sentence):
66+
67+
defprepare_rel(sentence,batch_length=85):
1668
cws_res=cws(sentence)
69+
ner_res=ner(sentence)
70+
lengths=list(accumulate([len(l)forlincws_res]))
71+
ne_candidates= []
72+
words=list(map(lambdaw:DICTIONARY[w]ifwinDICTIONARYelseDICTIONARY[UNK],cws_res))
73+
iflen(words)<batch_length:
74+
words+= [DICTIONARY[BATCH_PAD]]* (batch_length-len(words))
75+
else:
76+
words=words[:batch_length]
77+
forne,sinner_res:
78+
idx=cws_res.index(ne)
79+
ifidx!=-1:
80+
ne_candidates.append(idx)
81+
else:
82+
print('fuck')
83+
rel_candidates=list(permutations(ne_candidates,2))
84+
primary,secondary=generate_rel(rel_candidates,batch_length)
85+
word_array=np.array([[words]]*len(rel_candidates))
86+
rel_count=len(rel_candidates)
87+
returnnp.array([words]*rel_count),primary,secondary, [cws_res]*rel_count,rel_candidates
88+
89+
90+
defgenerate_rel(rel_candidates,batch_length):
91+
primary= []
92+
secondary= []
93+
forf,sinrel_candidates:
94+
primary.append(np.arange(batch_length)-f+batch_length-1)
95+
secondary.append(np.arange(batch_length)-s+batch_length-1)
96+
returnnp.array(primary),np.array(secondary)
1797

1898

19-
defrel():
99+
defrel_extract(sentences):
100+
words= []
101+
rel_pairs= []
102+
sentence_words= []
103+
primary= []
104+
secondary= []
105+
forsentenceinsentences:
106+
w,p,s,ww,pp=prepare_rel(sentence)
107+
sentence_words.extend(w)
108+
primary.extend(p)
109+
secondary.extend(s)
110+
words.extend(ww)
111+
rel_pairs.extend(pp)
112+
config_two=RECNNConfig(window_size=(2,3,4))
113+
config_mutli=RECNNConfig(window_size=(2,3,4))
114+
model_path_two='../dnlp/models/re_two/50-2_3_4_directed.ckpt'
115+
model_path_multi='../dnlp/models/re_multi/50-2_3_4_directed.ckpt'
116+
recnn2=RECNN(config=config_two,dict_path=DICT_PATH,mode='test',model_path=model_path_two,relation_count=2,data_mode='test')
117+
recnn=RECNN(config=config_two,dict_path=DICT_PATH,mode='test',model_path=model_path_multi,relation_count=28,data_mode='test')
118+
two_res=recnn2.predict(sentence_words,primary,secondary)
119+
true_words= [words[i]foriintwo_resifi]
120+
true_rel_pairs= [rel_pairs[i]foriintwo_resifi ]
121+
true_sentence_words= [sentence_words[i]foriintwo_resifi]
122+
true_primary= [primary[i]foriintwo_resifi]
123+
true_secondary= [secondary[i]foriintwo_resifi]
124+
multi_res=recnn.predict(true_sentence_words,true_primary,true_secondary)
125+
get_rel_result(true_words,true_rel_pairs,multi_res)
126+
127+
defget_rel_result(words,rel_pairs,rel_types):
128+
result= {}
129+
forsentence_words, (primary_idx,secondary_idx),rel_typeinzip(words,rel_pairs,rel_types):
130+
rel_type_name=REL_NAME_LIST[rel_type]
131+
primary=sentence_words[primary_idx]
132+
secondary=sentence_words[secondary_idx]
133+
primary_type,secondary_type=REL_PAIR_NAMES[rel_type_name]
134+
primary_type=ENTITY_NAMES[primary_type]
135+
secondary_type=ENTITY_NAMES[secondary_type]
136+
# result[]
137+
138+
139+
defexport():
20140
pass
21141

22142
defget_sentences(filename):
23-
withopen('../dnlp/data/emr/emr_paper/train/'+filename,encoding='utf-8')asf:
24-
returnf.read().split('。')
143+
withopen('../dnlp/data/emr/emr_paper/train/'+filename,encoding='utf-8')asf:
144+
sentences= [l+'。'forlinf.read().split('。')]
145+
ifsentences[-1]=='。':
146+
sentences=sentences[:-1]
147+
else:
148+
sentences[-1]=sentences[-1][:-1]
149+
returnsentences
150+
25151

26152
if__name__=='__main__':
27153
sentences=get_sentences('996716_admission.txt')
28-
forsentenceinsentences:
29-
prepare_rel(sentence)
154+
rel_extract(sentences)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp