Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf4f64ec

Browse files
refactor the project
1 parented9ddb0 commitf4f64ec

File tree

99 files changed

+2190
-3760
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+2190
-3760
lines changed

‎.gitignore‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,5 +165,4 @@ ENV/
165165
.mypy_cache/
166166

167167
tmp/
168-
python/dnlp/data/
169-
python/dnlp/models/
168+
data/

‎ReadMe.md‎

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,15 @@
66

77
*`python >= 3.5`
88
*`tensorflow >= 1.3.0`
9-
*`sklearn`
10-
*`scipy`
119

1210
##项目结构
1311
本项目的核心代码位于`python\dnlp`目录下
1412

15-
```bash
16-
python/dnlp
17-
│ cws.py# 分词
18-
│ ner.py# 命名实体识别
19-
│ rel_extract.py# 关系抽取
20-
│ __init__.py
21-
22-
├─config
23-
│ config.py# 配置项
24-
│ __init__.py
25-
26-
├─core# 核心功能模块
27-
│ │ dnn_crf.py# 基于dnn-crf的序列标注
28-
│ │ dnn_crf_base.py# 基于dnn-crf的序列标注的基类
29-
│ │ mmtnn.py# max-margin tensor nural network模型
30-
│ │ re_cnn.py# 基于cnn的关系抽取
31-
│ │ __init__.py
32-
33-
├─data_process# 训练和测试数据的预处理
34-
│ processor.py# 基类
35-
│ process_cws.py# 对分词的预处理
36-
│ process_emr.py
37-
│ process_ner.py# 对命名实体识别的预处理
38-
│ process_pos.py# 对词性标注的预处理
39-
│ __init__.py
40-
41-
42-
├─models# 保存训练后的模型
43-
44-
├─scripts# 运行脚本,包括初始化数据集和训练测试等等
45-
│ init_datasets.py# 初始化训练数据
46-
│ cws_ner.py# 进行分词和命名实体识别的训练和使用
47-
│ __init__.py
48-
49-
├─tests# 单元测试
50-
├─utils# 公用函数
51-
constant.py# 一些常量
52-
__init__.py
53-
54-
```
13+
*`dnlp\data_process`: 数据预处理。
14+
*`dnlp\seq_label`: 序列标注的模型代码,可用于分词、词性标注和实体识别。
15+
*`dnlp\rel_extract`: 关系抽取的模型代码。
16+
*`dnlp\joint_extract`: 实体和关系联合抽取的模型代码。
17+
*`dnlp\runner`: 运行脚本
5518

5619
##运行
5720

@@ -78,7 +41,7 @@ python python\scripts\cws_ner.py -p
7841
*[Max-Margin Tensor Neural Network for Chinese Word Segmentation](www.aclweb.org/anthology/P14-1028) (待实现,文件[`mmtnn.py`](https://github.com/supercoderhawk/DeepLearning_NLP/blob/master/python/dnlp/core/mmtnn.py)
7942

8043
##实体关系抽取
81-
*[relation extraction: perspective from convolutional neural networks](http://aclweb.org/anthology/W15-1506)待实现,文件[`re_cnn.py`](https://github.com/supercoderhawk/DeepLearning_NLP/blob/master/python/dnlp/core/re_cnn.py)
44+
*[relation extraction: perspective from convolutional neural networks](http://aclweb.org/anthology/W15-1506)已实现,文件[`re_cnn.py`](https://github.com/supercoderhawk/DeepLearning_NLP/blob/master/python/dnlp/core/re_cnn.py)
8245

8346

8447
##ToDo-List
File renamed without changes.

‎dnlp/config/__init__.py‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#-*- coding: UTF-8 -*-
2+
fromdnlp.config.seq_label_configimportNeuralNetworkCRFConfig
3+
fromdnlp.config.rel_extract_configimportRECNNConfig
File renamed without changes.

‎dnlp/config/seq_label_config.py‎

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# -*- coding: UTF-8 -*-
2+
fromdnlp.utils.constantimportLOSS_LOG_LIKEHOOD,NNCRF_DROPOUT_EMBEDDING
3+
4+
5+
classNeuralNetworkCRFConfig(object):
6+
def__init__(self,*,
7+
skip_left:int=0,
8+
skip_right:int=0,
9+
word_embed_size:int=100,
10+
hidden_layers:tuple= ({'type':'lstm','units':150},),
11+
learning_rate:float=0.05,
12+
regularization_rate:float=1e-4,
13+
dropout_rate:float=0.2,
14+
dropout_position:str=NNCRF_DROPOUT_EMBEDDING,
15+
batch_length:int=300,
16+
batch_size:int=20,
17+
hinge_rate:float=0.2,
18+
dict_path:str='',
19+
model_name:str='',
20+
training_filename:str='',
21+
label_schema:str='BIES',
22+
loss_function_name:str=LOSS_LOG_LIKEHOOD):
23+
self.__skip_left=skip_left
24+
self.__skip_right=skip_right
25+
self.__word_embed_size=word_embed_size
26+
self.__hidden_layers=hidden_layers
27+
self.__learning_rate=learning_rate
28+
self.__regularization_rate=regularization_rate
29+
self.__dropout_rate=dropout_rate
30+
self.__dropout_position=dropout_position
31+
self.__batch_length=batch_length
32+
self.__batch_size=batch_size
33+
self.__hinge_rate=hinge_rate
34+
self.__dict_path=dict_path
35+
self.__model_name=model_name
36+
self.__training_filename=training_filename
37+
self.__label_schema=label_schema
38+
self.__loss_function_name=loss_function_name
39+
40+
@property
41+
defskip_left(self):
42+
returnself.__skip_left
43+
44+
@property
45+
defskip_right(self):
46+
returnself.__skip_right
47+
48+
@property
49+
defword_embed_size(self):
50+
returnself.__word_embed_size
51+
52+
@property
53+
defhidden_layers(self):
54+
returnself.__hidden_layers
55+
56+
@property
57+
deflearning_rate(self):
58+
returnself.__learning_rate
59+
60+
@property
61+
defregularization_rate(self):
62+
returnself.__regularization_rate
63+
64+
@property
65+
defdropout_rate(self):
66+
returnself.__dropout_rate
67+
68+
@property
69+
defdropout_position(self):
70+
returnself.__dropout_position
71+
72+
@property
73+
defbatch_length(self):
74+
returnself.__batch_length
75+
76+
@property
77+
defbatch_size(self):
78+
returnself.__batch_size
79+
80+
@property
81+
defhinge_rate(self):
82+
returnself.__hinge_rate
83+
84+
@property
85+
defdict_path(self):
86+
returnself.__dict_path
87+
88+
@property
89+
defmodel_name(self):
90+
returnself.__model_name
91+
92+
@property
93+
deftraining_filename(self):
94+
returnself.__training_filename
95+
96+
@property
97+
defconcat_embed_size(self):
98+
return (self.__skip_right+self.__skip_left+1)*self.__word_embed_size
99+
100+
@property
101+
defconcat_window_size(self):
102+
returnself.__skip_left+self.__skip_right+1
103+
104+
@property
105+
deflabel_schema(self):
106+
returnself.__label_schema
107+
108+
@property
109+
deftag_count(self):
110+
returnlen(self.__label_schema)
111+
112+
@property
113+
defloss_function_name(self):
114+
returnself.__loss_function_name
115+
116+
117+
classMMTNNConfig(object):
118+
def__init__(self,*,skip_left:int=2,
119+
skip_right:int=2,
120+
character_embed_size:int=50,
121+
label_embed_size:int=50,
122+
hidden_unit:int=150,
123+
learning_rate:float=0.2,
124+
lam:float=10e-4,
125+
dropout_rate:float=0.4,
126+
batch_length:int=150,
127+
batch_size:int=20):
128+
self.__skip_left=skip_left
129+
self.__skip_right=skip_right
130+
self.__character_embed_size=character_embed_size
131+
self.__label_embed_size=label_embed_size
132+
self.__hidden_unit=hidden_unit
133+
self.__learning_rate=learning_rate
134+
self.__lam=lam
135+
self.__dropout_rate=dropout_rate
136+
self.__batch_length=batch_length
137+
self.__batch_size=batch_size
138+
139+
@property
140+
defskip_left(self):
141+
returnself.__skip_left
142+
143+
@property
144+
defskip_right(self):
145+
returnself.__skip_right
146+
147+
@property
148+
defcharacter_embed_size(self):
149+
returnself.__character_embed_size
150+
151+
@property
152+
deflabel_embed_size(self):
153+
returnself.__label_embed_size
154+
155+
@property
156+
defhidden_unit(self):
157+
returnself.__hidden_unit
158+
159+
@property
160+
deflearning_rate(self):
161+
returnself.__learning_rate
162+
163+
@property
164+
deflam(self):
165+
returnself.__lam
166+
167+
@property
168+
defdropout_rate(self):
169+
returnself.__dropout_rate
170+
171+
@property
172+
defbatch_length(self):
173+
returnself.__batch_length
174+
175+
@property
176+
defbatch_size(self):
177+
returnself.__batch_size

‎dnlp/configuration.yaml‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
nn_crf:
2+
skip_right:1
3+
skip_left:1
4+
learning_rate:0.01
5+

‎dnlp/data_model/__init__.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# -*- coding: utf-8 -*-

‎dnlp/data_model/entity.py‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# -*- coding: utf-8 -*-
2+
3+
classEntity(object):
4+
def__init__(self,text,offset,entity_type):
5+
self.__text=text
6+
self.__entity_type=entity_type
7+
8+
@property
9+
deftext(self):
10+
returnself.__text
11+
12+
@property
13+
defentity_type(self):
14+
returnself.__entity_type
File renamed without changes.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp