NotificationsYou must be signed in to change notification settings
Fork352
Star6.6k

Commit228bf77

Montana Low

committed

docs

1 parenteeae31c commit228bf77Copy full SHA for 228bf77

File tree

1 file changed

+139

-13

lines changed

pgml/pgml
- model.py

1 file changed

+139

-13

lines changed

`‎pgml/pgml/model.py‎`

Lines changed: 139 additions & 13 deletions

Original file line number	Diff line number	Diff line change
`@@ -10,10 +10,32 @@`
`10`	`10`	`frompgml.sqlimportq`
`11`	`11`
`12`	`12`	`classProject(object):`
	`13`	`+"""`
	`14`	`+ Use projects to refine multiple models of a particular dataset on a specific objective.`
	`15`	`+`
	`16`	`+ Attributes:`
	`17`	`+ id (int): a unique identifier`
	`18`	`+ name (str): a human friendly unique identifier`
	`19`	`+ objective (str): the purpose of this project`
	`20`	`+ created_at (Timestamp): when this project was created`
	`21`	`+ updated_at (Timestamp): when this project was last updated`
	`22`	`+ """`
	`23`	`+`
`13`	`24`	`_cache= {}`
`14`	`25`
	`26`	`+def__init__(self):`
	`27`	`+self._deployed_model=None`
	`28`	`+`
`15`	`29`	`@classmethod`
`16`		`-deffind(cls,id):`
	`30`	`+deffind(cls,id:int):`
	`31`	`+"""`
	`32`	`+ Get a Project from the database.`
	`33`	`+`
	`34`	`+ Args:`
	`35`	`+ id (int): the project id`
	`36`	`+ Returns:`
	`37`	`+ Project or None: instantiated from the database if found`
	`38`	`+ """`
`17`	`39`	`result=plpy.execute(f"""`
`18`	`40`	`SELECT *`
`19`	`41`	`FROM pgml.projects`
`@@ -29,7 +51,18 @@ def find(cls, id):`
`29`	`51`	`returnproject`
`30`	`52`
`31`	`53`	`@classmethod`
`32`		`-deffind_by_name(cls,name):`
	`54`	`+deffind_by_name(cls,name:str):`
	`55`	`+"""`
	`56`	`+ Get a Project from the database by name.`
	`57`	`+`
	`58`	`+ This is the prefered API to retrieve projects, and they are cached by`
	`59`	`+ name to avoid needing to go to he database on every usage.`
	`60`	`+`
	`61`	`+ Args:`
	`62`	`+ name (str): the project name`
	`63`	`+ Returns:`
	`64`	`+ Project or None: instantiated from the database if found`
	`65`	`+ """`
`33`	`66`	`ifnameincls._cache:`
`34`	`67`	`returncls._cache[name]`
`35`	`68`
`@@ -48,7 +81,17 @@ def find_by_name(cls, name):`
`48`	`81`	`returnproject`
`49`	`82`
`50`	`83`	`@classmethod`
`51`		`-defcreate(cls,name,objective):`
	`84`	`+defcreate(cls,name:str,objective:str):`
	`85`	`+"""`
	`86`	`+ Create a Project and save it to the database.`
	`87`	`+`
	`88`	`+ Args:`
	`89`	`+ name (str): a human friendly identifier`
	`90`	`+ objective (str): valid values are ["regression", "classification"].`
	`91`	`+ Returns:`
	`92`	`+ Project: instantiated from the database`
	`93`	`+ """`
	`94`	`+`
`52`	`95`	`project=Project()`
`53`	`96`	`project.__dict__=dict(plpy.execute(f"""`
`54`	`97`	`INSERT INTO pgml.projects (name, objective)`
`@@ -59,18 +102,48 @@ def create(cls, name, objective):`
`59`	`102`	`cls._cache[name]=project`
`60`	`103`	`returnproject`
`61`	`104`
`62`		`-def__init__(self):`
`63`		`-self._deployed_model=None`
`64`		`-`
`65`	`105`	`@property`
`66`	`106`	`defdeployed_model(self):`
	`107`	`+"""`
	`108`	`+ Returns:`
	`109`	`+ Model: that should currently be used for predictions`
	`110`	`+ """`
`67`	`111`	`ifself._deployed_modelisNone:`
`68`	`112`	`self._deployed_model=Model.find_deployed(self.id)`
`69`	`113`	`returnself._deployed_model`
`70`	`114`
`71`	`115`	`classSnapshot(object):`
	`116`	`+"""`
	`117`	`+ Snapshots capture a set of training & test data for repeatability.`
	`118`	`+`
	`119`	`+ Attributes:`
	`120`	`+ id (int): a unique identifier`
	`121`	`+ relation_name (str): the name of the table or view to snapshot`
	`122`	`+ y_column_name (str): the label for training data`
	`123`	`+ test_size (float or int, optional): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If train_size is also None, it will be set to 0.25.`
	`124`	`+ test_sampling (str, optional): How to sample to create the test data. Defaults to "random". Valid values are ["first", "last", "random"].`
	`125`	`+ status (str): The current status of the snapshot, e.g. 'new' or 'created'`
	`126`	`+ created_at (Timestamp): when this snapshot was created`
	`127`	`+ updated_at (Timestamp): when this snapshot was last updated`
	`128`	`+ """`
`72`	`129`	`@classmethod`
`73`		`-defcreate(cls,relation_name,y_column_name,test_size,test_sampling):`
	`130`	`+defcreate(cls,relation_name:str,y_column_name:str,test_size:floatorint,test_sampling:str):`
	`131`	`+"""`
	`132`	`+ Create a Snapshot and save it to the database.`
	`133`	`+`
	`134`	`+ This creates both a metadata record in the snapshots table, as well as creating a new table`
	`135`	`+ that holds a snapshot of all the data currently present in the relation so that training`
	`136`	`+ runs may be repeated, or further analysis may be conducted against the input.`
	`137`	`+`
	`138`	`+ Args:`
	`139`	`+ relation_name (str): the name of the table or view to snapshot`
	`140`	`+ y_column_name (str): the label for training data`
	`141`	`+ test_size (float or int, optional): If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If train_size is also None, it will be set to 0.25.`
	`142`	`+ test_sampling: (str, optional): How to sample to create the test data. Defaults to "random". Valid values are ["first", "last", "random"].`
	`143`	`+ Returns:`
	`144`	`+ Snapshot: metadata instantiated from the database`
	`145`	`+ """`
	`146`	`+`
`74`	`147`	`snapshot=Snapshot()`
`75`	`148`	`snapshot.__dict__=dict(plpy.execute(f"""`
`76`	`149`	`INSERT INTO pgml.snapshots (relation_name, y_column_name, test_size, test_sampling, status)`
`@@ -90,6 +163,10 @@ def create(cls, relation_name, y_column_name, test_size, test_sampling):`
`90`	`163`	`returnsnapshot`
`91`	`164`
`92`	`165`	`defdata(self):`
	`166`	`+"""`
	`167`	`+ Returns:`
	`168`	`+ list, list, list, list: All rows from the snapshot split into X_train, X_test, y_train, y_test sets.`
	`169`	`+ """`
`93`	`170`	`data=plpy.execute(f"""`
`94`	`171`	`SELECT *`
`95`	`172`	`FROM pgml."snapshot_{self.id}"`
`@@ -141,11 +218,35 @@ def data(self):`
`141`	`218`	`# TODO normalize and clean data`
`142`	`219`
`143`	`220`	`classModel(object):`
	`221`	`+"""Models use an algorithm on a snapshot of data to record the parameters learned.`
	`222`	`+`
	`223`	`+ Attributes:`
	`224`	`+ project (str): the project the model belongs to`
	`225`	`+ snapshot (str): the snapshot that provides the training and test data`
	`226`	`+ algorithm_name (str): the name of the algorithm used to train this model`
	`227`	`+ status (str): The current status of the model, e.g. 'new', 'training' or 'successful'`
	`228`	`+ created_at (Timestamp): when this model was created`
	`229`	`+ updated_at (Timestamp): when this model was last updated`
	`230`	`+ mean_squared_error (float):`
	`231`	`+ r2_score (float):`
	`232`	`+ pickle (bytes): the serialized version of the model parameters`
	`233`	`+ algorithm: the in memory version of the model parameters that can make predictions`
	`234`	`+ """`
`144`	`235`	`@classmethod`
`145`		`-defcreate(cls,project,snapshot,algorithm_name):`
	`236`	`+defcreate(cls,project:Project,snapshot:Snapshot,algorithm_name:str):`
	`237`	`+"""`
	`238`	`+ Create a Model and save it to the database.`
	`239`	`+`
	`240`	`+ Args:`
	`241`	`+ project (str):`
	`242`	`+ snapshot (str):`
	`243`	`+ algorithm_name (str):`
	`244`	`+ Returns:`
	`245`	`+ Model: instantiated from the database`
	`246`	`+ """`
`146`	`247`	`result=plpy.execute(f"""`
`147`	`248`	`INSERT INTO pgml.models (project_id, snapshot_id, algorithm_name, status)`
`148`		`- VALUES ({q(project.id)},{q(snapshot.id)},{q(algorithm_name)}, 'training')`
	`249`	`+ VALUES ({q(project.id)},{q(snapshot.id)},{q(algorithm_name)}, 'new')`
`149`	`250`	`RETURNING *`
`150`	`251`	`""")`
`151`	`252`	`model=Model()`
`@@ -155,7 +256,13 @@ def create(cls, project, snapshot, algorithm_name):`
`155`	`256`	`returnmodel`
`156`	`257`
`157`	`258`	`@classmethod`
`158`		`-deffind_deployed(cls,project_id):`
	`259`	`+deffind_deployed(cls,project_id:int):`
	`260`	`+"""`
	`261`	`+ Args:`
	`262`	`+ project_id (int): The project id`
	`263`	`+ Returns:`
	`264`	`+ Model: that should currently be used for predictions of the project`
	`265`	`+ """`
`159`	`266`	`result=plpy.execute(f"""`
`160`	`267`	`SELECT models.*`
`161`	`268`	`FROM pgml.models`
`@@ -179,6 +286,10 @@ def __init__(self):`
`179`	`286`
`180`	`287`	`@property`
`181`	`288`	`defproject(self):`
	`289`	`+"""`
	`290`	`+ Returns:`
	`291`	`+ Project: that this model belongs to`
	`292`	`+ """`
`182`	`293`	`ifself._projectisNone:`
`183`	`294`	`self._project=Project.find(self.project_id)`
`184`	`295`	`returnself._project`
`@@ -197,7 +308,13 @@ def algorithm(self):`
`197`	`308`
`198`	`309`	`returnself._algorithm`
`199`	`310`
`200`		`-deffit(self,snapshot):`
	`311`	`+deffit(self,snapshot:Snapshot):`
	`312`	`+"""`
	`313`	`+ Learns the parameters of this model and records them in the database.`
	`314`	`+`
	`315`	`+ Args:`
	`316`	`+ snapshot (Snapshot): dataset used to train this model`
	`317`	`+ """`
`201`	`318`	`X_train,X_test,y_train,y_test=snapshot.data()`
`202`	`319`
`203`	`320`	`# Train the model`
`@@ -220,12 +337,21 @@ def fit(self, snapshot):`
`220`	`337`	`""")[0])`
`221`	`338`
`222`	`339`	`defdeploy(self):`
	`340`	`+"""Promote this model to the active version for the project that will be used for predictions"""`
`223`	`341`	`plpy.execute(f"""`
`224`	`342`	`INSERT INTO pgml.deployments (project_id, model_id)`
`225`	`343`	`VALUES ({q(self.project_id)},{q(self.id)})`
`226`	`344`	`""")`
`227`	`345`
`228`		`-defpredict(self,data):`
	`346`	`+defpredict(self,data:list):`
	`347`	`+"""Use the model for a set of features.`
	`348`	`+`
	`349`	`+ Args:`
	`350`	`+ data (list): list of features to form a single prediction for`
	`351`	`+`
	`352`	`+ Returns:`
	`353`	`+ float or int: scores for regressions or ints for classifications`
	`354`	`+ """`
`229`	`355`	`returnself.algorithm.predict(data)`
`230`	`356`
`231`	`357`
`@@ -236,7 +362,7 @@ def train(`
`236`	`362`	`y_column_name:str,`
`237`	`363`	`test_size:floatorint=0.1,`
`238`	`364`	`test_sampling:str="random"`
`239`		`-)->None:`
	`365`	`+):`
`240`	`366`	`"""Create a regression model from a table or view filled with training data.`
`241`	`367`
`242`	`368`	`Args:`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit228bf77

File tree

1 file changed

1 file changed

`‎pgml/pgml/model.py‎`

0 commit comments