Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

add the california housing example#16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
levkk merged 1 commit intomasterfrommontana/housing
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletionsREADME.md
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -211,4 +211,9 @@ Run the test:
psql -f sql/test.sql
```

One liner:
```
cd pgml; sudo python3 setup.py install; cd ../; sudo -u postgres psql -f sql/test.sql
```

Make sure to run it exactly like this, from the root directory of the repo.
41 changes: 41 additions & 0 deletionsexamples/california_housing/run.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
-- This example trains models on the sklean california_housing dataset
-- which is a copy of the test set from the StatLib repository
-- https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html
--
-- This demonstrates using a table with individual columns as features
-- for regression.
SELECT pgml.load_dataset('california_housing');

-- view the dataset
SELECT * from pgml.california_housing;

-- train a simple model to classify the data
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target');

-- check out the predictions
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
FROM pgml.california_housing
LIMIT 10;

-- -- train some more models with different algorithms
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'svm');
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'random_forest');
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosting_trees');
-- TODO SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'dense_neural_network');
-- -- check out all that hard work
SELECT * FROM pgml.trained_models;

-- deploy the random_forest model for prediction use
SELECT pgml.deploy('California Housing Prediction', 'random_forest');
-- check out that throughput
SELECT * FROM pgml.deployed_models;

-- do some hyper param tuning
-- TODO SELECT pgml.hypertune(100, 'California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosted_trees');
-- deploy the "best" model for prediction use
SELECT pgml.deploy('California Housing Prediction', 'best_fit');

-- check out the improved predictions
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
FROM pgml.california_housing
LIMIT 10;
3 changes: 3 additions & 0 deletionsexamples/digits/run.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2,6 +2,9 @@
-- which is a copy of the test set of the UCI ML hand-written digits datasets
-- https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
--
-- This demonstrates using a table with a single array feature column
-- for classification.
--
-- The final result after a few seconds of training is not terrible. Maybe not perfect
-- enough for mission critical applications, but it's telling how quickly "off the shelf"
-- solutions can solve problems these days.
Expand Down
28 changes: 26 additions & 2 deletionspgml/pgml/datasets.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,45 @@
import plpy
from sklearn.datasets import load_digits as d
import sklearn.datasets

from pgml.sql import q
from pgml.exceptions import PgMLException

def load(source: str):
if source == "digits":
load_digits()
elif source == "california_housing":
load_california_housing()
else:
raise PgMLException(f"Invalid dataset name: {source}. Valid values are ['digits'].")
return "OK"

def load_digits():
dataset =d()
dataset =sklearn.datasets.load_digits()
a = plpy.execute("DROP TABLE IF EXISTS pgml.digits")
a = plpy.execute("CREATE TABLE pgml.digits (image SMALLINT[], target INTEGER)")
a = plpy.execute(f"""COMMENT ON TABLE pgml.digits IS {q(dataset["DESCR"])}""")
for X, y in zip(dataset["data"], dataset["target"]):
X = ",".join("%i" % x for x in list(X))
plpy.execute(f"""INSERT INTO pgml.digits (image, target) VALUES ('{{{X}}}', {y})""")

def load_california_housing():
dataset = sklearn.datasets.fetch_california_housing()
a = plpy.execute("DROP TABLE IF EXISTS pgml.california_housing")
a = plpy.execute("""
CREATE TABLE pgml.california_housing (
median_income FLOAT4, -- median income in block group
house_age FLOAT4, -- median house age in block group
avg_rooms FLOAT4, -- average number of rooms per household
avg_bedrooms FLOAT4, -- average number of bedrooms per household
population FLOAT4, -- block group population
avg_occupants FLOAT4, -- average number of household members
latitude FLOAT4, -- block group latitude
longitude FLOAT4, -- block group longitudetarget INTEGER
target FLOAT4
)""")
a = plpy.execute(f"""COMMENT ON TABLE pgml.california_housing IS {q(dataset["DESCR"])}""")
for X, y in zip(dataset["data"], dataset["target"]):
plpy.execute(f"""
INSERT INTO pgml.california_housing (median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude, target)
VALUES ({q(X[0])}, {q(X[1])}, {q(X[2])}, {q(X[3])}, {q(X[4])}, {q(X[5])}, {q(X[6])}, {q(X[7])}, {q(y)})""")

2 changes: 1 addition & 1 deletionsql/install.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -136,7 +136,7 @@ $$ LANGUAGE plpython3u;
---
--- Predict
---
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, featuresNUMERIC[])
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, featuresDOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
AS $$
from pgml.model import Project
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp