Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit228bf77

Browse files
author
Montana Low
committed
docs
1 parenteeae31c commit228bf77

File tree

1 file changed

+139
-13
lines changed

1 file changed

+139
-13
lines changed

‎pgml/pgml/model.py‎

Lines changed: 139 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,32 @@
1010
frompgml.sqlimportq
1111

1212
classProject(object):
13+
"""
14+
Use projects to refine multiple models of a particular dataset on a specific objective.
15+
16+
Attributes:
17+
id (int): a unique identifier
18+
name (str): a human friendly unique identifier
19+
objective (str): the purpose of this project
20+
created_at (Timestamp): when this project was created
21+
updated_at (Timestamp): when this project was last updated
22+
"""
23+
1324
_cache= {}
1425

26+
def__init__(self):
27+
self._deployed_model=None
28+
1529
@classmethod
16-
deffind(cls,id):
30+
deffind(cls,id:int):
31+
"""
32+
Get a Project from the database.
33+
34+
Args:
35+
id (int): the project id
36+
Returns:
37+
Project or None: instantiated from the database if found
38+
"""
1739
result=plpy.execute(f"""
1840
SELECT *
1941
FROM pgml.projects
@@ -29,7 +51,18 @@ def find(cls, id):
2951
returnproject
3052

3153
@classmethod
32-
deffind_by_name(cls,name):
54+
deffind_by_name(cls,name:str):
55+
"""
56+
Get a Project from the database by name.
57+
58+
This is the prefered API to retrieve projects, and they are cached by
59+
name to avoid needing to go to he database on every usage.
60+
61+
Args:
62+
name (str): the project name
63+
Returns:
64+
Project or None: instantiated from the database if found
65+
"""
3366
ifnameincls._cache:
3467
returncls._cache[name]
3568

@@ -48,7 +81,17 @@ def find_by_name(cls, name):
4881
returnproject
4982

5083
@classmethod
51-
defcreate(cls,name,objective):
84+
defcreate(cls,name:str,objective:str):
85+
"""
86+
Create a Project and save it to the database.
87+
88+
Args:
89+
name (str): a human friendly identifier
90+
objective (str): valid values are ["regression", "classification"].
91+
Returns:
92+
Project: instantiated from the database
93+
"""
94+
5295
project=Project()
5396
project.__dict__=dict(plpy.execute(f"""
5497
INSERT INTO pgml.projects (name, objective)
@@ -59,18 +102,48 @@ def create(cls, name, objective):
59102
cls._cache[name]=project
60103
returnproject
61104

62-
def__init__(self):
63-
self._deployed_model=None
64-
65105
@property
66106
defdeployed_model(self):
107+
"""
108+
Returns:
109+
Model: that should currently be used for predictions
110+
"""
67111
ifself._deployed_modelisNone:
68112
self._deployed_model=Model.find_deployed(self.id)
69113
returnself._deployed_model
70114

71115
classSnapshot(object):
116+
"""
117+
Snapshots capture a set of training & test data for repeatability.
118+
119+
Attributes:
120+
id (int): a unique identifier
121+
relation_name (str): the name of the table or view to snapshot
122+
y_column_name (str): the label for training data
123+
test_size (float or int, optional): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If train_size is also None, it will be set to 0.25.
124+
test_sampling (str, optional): How to sample to create the test data. Defaults to "random". Valid values are ["first", "last", "random"].
125+
status (str): The current status of the snapshot, e.g. 'new' or 'created'
126+
created_at (Timestamp): when this snapshot was created
127+
updated_at (Timestamp): when this snapshot was last updated
128+
"""
72129
@classmethod
73-
defcreate(cls,relation_name,y_column_name,test_size,test_sampling):
130+
defcreate(cls,relation_name:str,y_column_name:str,test_size:floatorint,test_sampling:str):
131+
"""
132+
Create a Snapshot and save it to the database.
133+
134+
This creates both a metadata record in the snapshots table, as well as creating a new table
135+
that holds a snapshot of all the data currently present in the relation so that training
136+
runs may be repeated, or further analysis may be conducted against the input.
137+
138+
Args:
139+
relation_name (str): the name of the table or view to snapshot
140+
y_column_name (str): the label for training data
141+
test_size (float or int, optional): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If train_size is also None, it will be set to 0.25.
142+
test_sampling: (str, optional): How to sample to create the test data. Defaults to "random". Valid values are ["first", "last", "random"].
143+
Returns:
144+
Snapshot: metadata instantiated from the database
145+
"""
146+
74147
snapshot=Snapshot()
75148
snapshot.__dict__=dict(plpy.execute(f"""
76149
INSERT INTO pgml.snapshots (relation_name, y_column_name, test_size, test_sampling, status)
@@ -90,6 +163,10 @@ def create(cls, relation_name, y_column_name, test_size, test_sampling):
90163
returnsnapshot
91164

92165
defdata(self):
166+
"""
167+
Returns:
168+
list, list, list, list: All rows from the snapshot split into X_train, X_test, y_train, y_test sets.
169+
"""
93170
data=plpy.execute(f"""
94171
SELECT *
95172
FROM pgml."snapshot_{self.id}"
@@ -141,11 +218,35 @@ def data(self):
141218
# TODO normalize and clean data
142219

143220
classModel(object):
221+
"""Models use an algorithm on a snapshot of data to record the parameters learned.
222+
223+
Attributes:
224+
project (str): the project the model belongs to
225+
snapshot (str): the snapshot that provides the training and test data
226+
algorithm_name (str): the name of the algorithm used to train this model
227+
status (str): The current status of the model, e.g. 'new', 'training' or 'successful'
228+
created_at (Timestamp): when this model was created
229+
updated_at (Timestamp): when this model was last updated
230+
mean_squared_error (float):
231+
r2_score (float):
232+
pickle (bytes): the serialized version of the model parameters
233+
algorithm: the in memory version of the model parameters that can make predictions
234+
"""
144235
@classmethod
145-
defcreate(cls,project,snapshot,algorithm_name):
236+
defcreate(cls,project:Project,snapshot:Snapshot,algorithm_name:str):
237+
"""
238+
Create a Model and save it to the database.
239+
240+
Args:
241+
project (str):
242+
snapshot (str):
243+
algorithm_name (str):
244+
Returns:
245+
Model: instantiated from the database
246+
"""
146247
result=plpy.execute(f"""
147248
INSERT INTO pgml.models (project_id, snapshot_id, algorithm_name, status)
148-
VALUES ({q(project.id)},{q(snapshot.id)},{q(algorithm_name)}, 'training')
249+
VALUES ({q(project.id)},{q(snapshot.id)},{q(algorithm_name)}, 'new')
149250
RETURNING *
150251
""")
151252
model=Model()
@@ -155,7 +256,13 @@ def create(cls, project, snapshot, algorithm_name):
155256
returnmodel
156257

157258
@classmethod
158-
deffind_deployed(cls,project_id):
259+
deffind_deployed(cls,project_id:int):
260+
"""
261+
Args:
262+
project_id (int): The project id
263+
Returns:
264+
Model: that should currently be used for predictions of the project
265+
"""
159266
result=plpy.execute(f"""
160267
SELECT models.*
161268
FROM pgml.models
@@ -179,6 +286,10 @@ def __init__(self):
179286

180287
@property
181288
defproject(self):
289+
"""
290+
Returns:
291+
Project: that this model belongs to
292+
"""
182293
ifself._projectisNone:
183294
self._project=Project.find(self.project_id)
184295
returnself._project
@@ -197,7 +308,13 @@ def algorithm(self):
197308

198309
returnself._algorithm
199310

200-
deffit(self,snapshot):
311+
deffit(self,snapshot:Snapshot):
312+
"""
313+
Learns the parameters of this model and records them in the database.
314+
315+
Args:
316+
snapshot (Snapshot): dataset used to train this model
317+
"""
201318
X_train,X_test,y_train,y_test=snapshot.data()
202319

203320
# Train the model
@@ -220,12 +337,21 @@ def fit(self, snapshot):
220337
""")[0])
221338

222339
defdeploy(self):
340+
"""Promote this model to the active version for the project that will be used for predictions"""
223341
plpy.execute(f"""
224342
INSERT INTO pgml.deployments (project_id, model_id)
225343
VALUES ({q(self.project_id)},{q(self.id)})
226344
""")
227345

228-
defpredict(self,data):
346+
defpredict(self,data:list):
347+
"""Use the model for a set of features.
348+
349+
Args:
350+
data (list): list of features to form a single prediction for
351+
352+
Returns:
353+
float or int: scores for regressions or ints for classifications
354+
"""
229355
returnself.algorithm.predict(data)
230356

231357

@@ -236,7 +362,7 @@ def train(
236362
y_column_name:str,
237363
test_size:floatorint=0.1,
238364
test_sampling:str="random"
239-
)->None:
365+
):
240366
"""Create a regression model from a table or view filled with training data.
241367
242368
Args:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp