Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022
diff --git a/pgml-extension/pgml_rust/src/api.rs b/pgml-extension/pgml_rust/src/api.rs
 use std::fmt::Write;
 use std::str::FromStr;

 use pgx::*;
 use crate::orm::Strategy;
 use crate::orm::Task;

 #[allow(clippy::too_many_arguments)]
 #[pg_extern]
 fn train(
    project_name: &str,
        vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())],
    );

    let mut deploy = false;
    if deployed_metrics.is_none() {
        deploy = true;
    } else {
        let deployed_metrics = deployed_metrics.unwrap().0;
        let deployed_metrics = deployed_metrics.as_object().unwrap();
        if project.task == Task::classification
            && deployed_metrics.get("f1").unwrap().as_f64()
                < new_metrics.get("f1").unwrap().as_f64()
        {
            deploy = true;
        }
        if project.task == Task::regression
            && deployed_metrics.get("r2").unwrap().as_f64()
                < new_metrics.get("r2").unwrap().as_f64()
        {
            deploy = true;
    let mut deploy = true;
    if let Some(deployed_metrics) = deployed_metrics {
        let deployed_metrics = deployed_metrics.0.as_object().unwrap();
        match project.task {
            Task::classification => {
                if deployed_metrics.get("f1").unwrap().as_f64()
                    > new_metrics.get("f1").unwrap().as_f64()
                {
                    deploy = false;
                }
            }
            Task::regression => {
                if deployed_metrics.get("r2").unwrap().as_f64()
                    > new_metrics.get("r2").unwrap().as_f64()
                {
                    deploy = false;
                }
            }
        }
    }

        vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())],
    );
    let project_id =
        project_id.expect(format!("Project named `{}` does not exist.", project_name).as_str());
        project_id.unwrap_or_else(|| panic!("Project named `{}` does not exist.", project_name));
    let task = Task::from_str(&task.unwrap()).unwrap();

    let mut sql = "SELECT models.id, models.algorithm::TEXT FROM pgml_rust.models JOIN pgml_rust.projects ON projects.id = models.project_id".to_string();
    let mut predicate = "\nWHERE projects.name = $1".to_string();
    match algorithm {
        Some(algorithm) => {
            predicate += &format!(
                "\nAND algorithm::TEXT = '{}'",
                algorithm.to_string().as_str()
            )
        }
        _ => (),
    if let Some(algorithm) = algorithm {
        let _ = write!(
            predicate,
            "\nAND algorithm::TEXT = '{}'",
            algorithm.to_string().as_str()
        );
    }
    match strategy {
        Strategy::best_score => match task {
            Task::regression => {
                sql += &format!("{predicate}\nORDER BY models.metrics->>'r2' DESC NULLS LAST");
                let _ = write!(
                    sql,
                    "{predicate}\nORDER BY models.metrics->>'r2' DESC NULLS LAST"
                );
            }
            Task::classification => {
                sql += &format!("{predicate}\nORDER BY models.metrics->>'f1' DESC NULLS LAST");
                let _ = write!(
                    sql,
                    "{predicate}\nORDER BY models.metrics->>'f1' DESC NULLS LAST"
                );
            }
        },
        Strategy::most_recent => {
 sql += &format!("{predicate}\nORDER by models.created_at DESC");
 let _ = write!(sql,"{predicate}\nORDER by models.created_at DESC");
        }
        Strategy::rollback => {
            sql += &format!(
            let _ = write!(
                sql,
                "
                JOIN pgml_rust.deployments ON deployments.project_id = projects.id
                    AND deployments.model_id = models.id
    limit: Option<default!(i64, "NULL")>,
 ) -> impl std::iter::Iterator<Item = (name!(table_name, String), name!(rows, i64))> {
    // cast limit since pgx doesn't support usize
    let limit: Option<usize> = match limit {
        Some(limit) => Some(limit.try_into().unwrap()),
        None => None,
    };
    let limit: Option<usize> = limit.map(|limit| limit.try_into().unwrap());
    let (name, rows) = match source {
        "breast_cancer" => crate::orm::dataset::load_breast_cancer(limit),
        "diabetes" => crate::orm::dataset::load_diabetes(limit),
diff --git a/pgml-extension/pgml_rust/src/lib.rs b/pgml-extension/pgml_rust/src/lib.rs

    match guard.get(&model_id) {
        Some(data) => {
            let bst = Booster::load_buffer(&data).unwrap();
            let bst = Booster::load_buffer(data).unwrap();
            let dmat = DMatrix::from_dense(&features, 1).unwrap();

            bst.predict(&dmat).unwrap()[0]

    match guard.get(&model_id) {
        Some(data) => {
            let bst = Booster::load_buffer(&data).unwrap();
            let bst = Booster::load_buffer(data).unwrap();
            let dmat = DMatrix::from_dense(&features, num_rows as usize).unwrap();

            bst.predict(&dmat).unwrap()
diff --git a/pgml-extension/pgml_rust/src/orm/algorithm.rs b/pgml-extension/pgml_rust/src/orm/algorithm.rs
 use pgx::*;
 use serde::Deserialize;

 #[derive(PostgresEnum, Copy, Clone, PartialEq, Debug, Deserialize)]
 #[derive(PostgresEnum, Copy, Clone,Eq,PartialEq, Debug, Deserialize)]
 #[allow(non_camel_case_types)]
 pub enum Algorithm {
    linear,
diff --git a/pgml-extension/pgml_rust/src/orm/dataset.rs b/pgml-extension/pgml_rust/src/orm/dataset.rs

    pub fn distinct_labels(&self) -> u32 {
        let mut v = HashSet::new();
        // Treat the f32 values as u32 for std::cmp::Eq. We don't
        // Treat the f32 values as u32 for std::cmp::Eq. We don't
        // care about the nuance of nan equality here, they should
        // already be filtered out upstream.
        self.y.iter().for_each(|i| if !i.is_nan() { v.insert(i.to_bits()); });
        self.y.iter().for_each(|i| {
            if !i.is_nan() {
                v.insert(i.to_bits());
            }
        });
        v.len().try_into().unwrap()
    }
 }
        None => diabetes.num_samples,
    };
    for i in 0..limit {
        let age = diabetes.data[(i * diabetes.num_features) + 0];
        let age = diabetes.data[(i * diabetes.num_features)];
        let sex = diabetes.data[(i * diabetes.num_features) + 1];
        let bmi = diabetes.data[(i * diabetes.num_features) + 2];
        let bp = diabetes.data[(i * diabetes.num_features) + 3];
        let target = digits.target[i];
        // shape the image in a 2d array
        let mut image = vec![vec![0.; 8]; 8];
        #[allow(clippy::needless_range_loop)] // x & y are in fact used
        for x in 0..8 {
            for y in 0..8 {
                image[x][y] = digits.data[(i * 64) + (x * 8) + y];
        None => iris.num_samples,
    };
    for i in 0..limit {
        let sepal_length = iris.data[(i * iris.num_features) + 0];
        let sepal_length = iris.data[(i * iris.num_features)];
        let sepal_width = iris.data[(i * iris.num_features) + 1];
        let petal_length = iris.data[(i * iris.num_features) + 2];
        let petal_width = iris.data[(i * iris.num_features) + 3];
            vec![
                (
                    PgBuiltInOids::FLOAT4OID.oid(),
                    breast_cancer.data[(i * breast_cancer.num_features) + 0].into_datum(),
                    breast_cancer.data[(i * breast_cancer.num_features)].into_datum(),
                ),
                (
                    PgBuiltInOids::FLOAT4OID.oid(),
diff --git a/pgml-extension/pgml_rust/src/orm/estimator.rs b/pgml-extension/pgml_rust/src/orm/estimator.rs
 use crate::orm::Dataset;
 use crate::orm::Task;

 #[allow(clippy::type_complexity)]
 static DEPLOYED_ESTIMATORS_BY_PROJECT_NAME: Lazy<Mutex<HashMap<String, Arc<Box<dyn Estimator>>>>> =
    Lazy::new(|| Mutex::new(HashMap::new()));

 pub fn find_deployed_estimator_by_project_name(name: &str) -> Arc<Box<dyn Estimator>> {
    {
        let estimators = DEPLOYED_ESTIMATORS_BY_PROJECT_NAME.lock().unwrap();
        let estimator = estimators.get(name);
        ifestimator.is_some() {
            return estimator.unwrap().clone();
        iflet Some(estimator) = estimator {
            return estimator.clone();
        }
    }

        LIMIT 1;",
        vec![(PgBuiltInOids::TEXTOID.oid(), name.into_datum())],
    );
    let task = Task::from_str(
        &task.expect(
            format!(
                "Project {} does not have a trained and deployed model.",
                name
            )
            .as_str(),
        ),
    )
    let task = Task::from_str(&task.unwrap_or_else(|| {
        panic!(
            "Project {} does not have a trained and deployed model.",
            name
        )
    }))
    .unwrap();
    let algorithm = Algorithm::from_str(
        &algorithm.expect(
            format!(
                "Project {} does not have a trained and deployed model.",
                name
            )
            .as_str(),
        ),
    )
    let algorithm = Algorithm::from_str(&algorithm.unwrap_or_else(|| {
        panic!(
            "Project {} does not have a trained and deployed model.",
            name
        )
    }))
    .unwrap();
    let data = data.expect(
 format!(
    let data = data.unwrap_or_else(|| {
 panic!(
            "Project {} does not have a trained and deployed model.",
            name
        )
        .as_str(),
    );
    });

    let e: Box<dyn Estimator> = match task {
        Task::regression => match algorithm {
    smartcore::api::Predictor::predict(predictor, &features).unwrap()[0]
 }

 fn calc_metrics(y_test: &Array1<f32>, y_hat: &Array1<f32>, distinct_labels: u32, task: Task) -> HashMap<String, f32> {
 fn calc_metrics(
    y_test: &Array1<f32>,
    y_hat: &Array1<f32>,
    distinct_labels: u32,
    task: Task,
 ) -> HashMap<String, f32> {
    let mut results = HashMap::new();
    match task {
        Task::regression => {
diff --git a/pgml-extension/pgml_rust/src/orm/model.rs b/pgml-extension/pgml_rust/src/orm/model.rs
                  (PgBuiltInOids::JSONBOID.oid(), search_args.into_datum()),
              ])
          ).first();
            if result.len() > 0 {
            if!result.is_empty() {
                model = Some(Model {
                    id: result.get_datum(1).unwrap(),
                    project_id: result.get_datum(2).unwrap(),
                    hyperparams: result.get_datum(5).unwrap(),
                    status: result.get_datum(6).unwrap(),
                    metrics: result.get_datum(7),
                    search: search, // TODO
                    search, // TODO
                    search_params: result.get_datum(9).unwrap(),
                    search_args: result.get_datum(10).unwrap(),
                    created_at: result.get_datum(11).unwrap(),
        });
        let mut model = model.unwrap();
        let dataset = snapshot.dataset();
        model.fit(&project, &dataset);
        model.test(&project, &dataset);
        model.fit(project, &dataset);
        model.test(project, &dataset);
        model
    }

                        .unwrap(),
                    )),
                };
                let bytes: Vec<u8> = rmp_serde::to_vec(&*estimator.as_ref().unwrap()).unwrap();
                let bytes: Vec<u8> = rmp_serde::to_vec(estimator.as_ref().unwrap()).unwrap();
                Spi::get_one_with_args::<i64>(
                  "INSERT INTO pgml_rust.files (model_id, path, part, data) VALUES($1, 'estimator.rmp', 0, $2) RETURNING id",
                  vec![
            }
            Algorithm::xgboost => {
                let mut dtrain =
                    DMatrix::from_dense(&dataset.x_train(), dataset.num_train_rows).unwrap();
                    DMatrix::from_dense(dataset.x_train(), dataset.num_train_rows).unwrap();
                let mut dtest =
                    DMatrix::from_dense(&dataset.x_test(), dataset.num_test_rows).unwrap();
                dtrain.set_labels(&dataset.y_train()).unwrap();
                dtest.set_labels(&dataset.y_test()).unwrap();
                    DMatrix::from_dense(dataset.x_test(), dataset.num_test_rows).unwrap();
                dtrain.set_labels(dataset.y_train()).unwrap();
                dtest.set_labels(dataset.y_test()).unwrap();

                // specify datasets to evaluate against during training
                let evaluation_sets = &[(&dtrain, "train"), (&dtest, "test")];
                        .objective(match project.task {
                            Task::regression => xgboost::parameters::learning::Objective::RegLinear,
                            Task::classification => {
                                xgboost::parameters::learning::Objective::MultiSoftmax(dataset.distinct_labels())
                            },
                                xgboost::parameters::learning::Objective::MultiSoftmax(
                                    dataset.distinct_labels(),
                                )
                            }
                        })
                        .build()
                        .unwrap();
    }

    fn test(&mut self, project: &Project, dataset: &Dataset) {
        let metrics = self
            .estimator
            .as_ref()
            .unwrap()
            .test(project.task, &dataset);
        self.metrics = Some(JsonB(json!(metrics.clone())));
        let metrics = self.estimator.as_ref().unwrap().test(project.task, dataset);
        self.metrics = Some(JsonB(json!(metrics)));
        Spi::get_one_with_args::<i64>(
            "UPDATE pgml_rust.models SET metrics = $1 WHERE id = $2 RETURNING id",
            vec![
diff --git a/pgml-extension/pgml_rust/src/orm/project.rs b/pgml-extension/pgml_rust/src/orm/project.rs
                    (PgBuiltInOids::INT8OID.oid(), id.into_datum()),
                ])
            ).first();
            if result.len() > 0 {
            if!result.is_empty() {
                project = Some(Project {
                    id: result.get_datum(1).unwrap(),
                    name: result.get_datum(2).unwrap(),
                    (PgBuiltInOids::TEXTOID.oid(), name.into_datum()),
                ])
            ).first();
            if result.len() > 0 {
            if!result.is_empty() {
                project = Some(Project {
                    id: result.get_datum(1).unwrap(),
                    name: result.get_datum(2).unwrap(),
                    (PgBuiltInOids::TEXTOID.oid(), task.to_string().into_datum()),
                ])
            ).first();
            if result.len() > 0 {
            if!result.is_empty() {
                project = Some(Project {
                    id: result.get_datum(1).unwrap(),
                    name: result.get_datum(2).unwrap(),
diff --git a/pgml-extension/pgml_rust/src/orm/sampling.rs b/pgml-extension/pgml_rust/src/orm/sampling.rs
 use pgx::*;
 use serde::Deserialize;

 #[derive(PostgresEnum, Copy, Clone, PartialEq, Debug, Deserialize)]
 #[derive(PostgresEnum, Copy, Clone,Eq,PartialEq, Debug, Deserialize)]
 #[allow(non_camel_case_types)]
 pub enum Sampling {
    random,
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,4 @@
		use std::fmt::Write;
		use std::str::FromStr;

		use pgx::*;
Expand All		@@ -11,6 +12,7 @@ use crate::orm::Snapshot;
		use crate::orm::Strategy;
		use crate::orm::Task;

		#[allow(clippy::too_many_arguments)]
		#[pg_extern]
		fn train(
		project_name: &str,
Expand DownExpand Up		@@ -76,23 +78,24 @@ fn train(
		vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())],
		);

		let mut deploy = false;
		if deployed_metrics.is_none() {
		deploy = true;
		} else {
		let deployed_metrics = deployed_metrics.unwrap().0;
		let deployed_metrics = deployed_metrics.as_object().unwrap();
		if project.task == Task::classification
		&& deployed_metrics.get("f1").unwrap().as_f64()
		< new_metrics.get("f1").unwrap().as_f64()
		{
		deploy = true;
		}
		if project.task == Task::regression
		&& deployed_metrics.get("r2").unwrap().as_f64()
		< new_metrics.get("r2").unwrap().as_f64()
		{
		deploy = true;
		let mut deploy = true;
		if let Some(deployed_metrics) = deployed_metrics {
		let deployed_metrics = deployed_metrics.0.as_object().unwrap();
		match project.task {
		Task::classification => {
		if deployed_metrics.get("f1").unwrap().as_f64()
		> new_metrics.get("f1").unwrap().as_f64()
		{
		deploy = false;
		}
		}
		Task::regression => {
		if deployed_metrics.get("r2").unwrap().as_f64()
		> new_metrics.get("r2").unwrap().as_f64()
		{
		deploy = false;
		}
		}
		}
		}

Expand DownExpand Up		@@ -133,34 +136,39 @@ fn deploy(
		vec![(PgBuiltInOids::TEXTOID.oid(), project_name.into_datum())],
		);
		let project_id =
		project_id.expect(format!("Project named `{}` does not exist.", project_name).as_str());
		project_id.unwrap_or_else(\|\| panic!("Project named `{}` does not exist.", project_name));
		let task = Task::from_str(&task.unwrap()).unwrap();

		let mut sql = "SELECT models.id, models.algorithm::TEXT FROM pgml_rust.models JOIN pgml_rust.projects ON projects.id = models.project_id".to_string();
		let mut predicate = "\nWHERE projects.name = $1".to_string();
		match algorithm {
		Some(algorithm) => {
		predicate += &format!(
		"\nAND algorithm::TEXT = '{}'",
		algorithm.to_string().as_str()
		)
		}
		_ => (),
		if let Some(algorithm) = algorithm {
		let _ = write!(
		predicate,
		"\nAND algorithm::TEXT = '{}'",
		algorithm.to_string().as_str()
		);
		}
		match strategy {
		Strategy::best_score => match task {
		Task::regression => {
		sql += &format!("{predicate}\nORDER BY models.metrics->>'r2' DESC NULLS LAST");
		let _ = write!(
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. What come on clippy, this seems totally fine. Copy link ContributorAuthor montanalowSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. format! does an extra allocation compared to write... 🤷‍♂️
		sql,
		"{predicate}\nORDER BY models.metrics->>'r2' DESC NULLS LAST"
		);
		}
		Task::classification => {
		sql += &format!("{predicate}\nORDER BY models.metrics->>'f1' DESC NULLS LAST");
		let _ = write!(
		sql,
		"{predicate}\nORDER BY models.metrics->>'f1' DESC NULLS LAST"
		);
		}
		},
		Strategy::most_recent => {
		sql += &format!("{predicate}\nORDER by models.created_at DESC");
		let _ = write!(sql,"{predicate}\nORDER by models.created_at DESC");
		}
		Strategy::rollback => {
		sql += &format!(
		let _ = write!(
		sql,
		"
		JOIN pgml_rust.deployments ON deployments.project_id = projects.id
		AND deployments.model_id = models.id
Expand DownExpand Up		@@ -230,10 +238,7 @@ fn load_dataset(
		limit: Option<default!(i64, "NULL")>,
		) -> impl std::iter::Iterator<Item = (name!(table_name, String), name!(rows, i64))> {
		// cast limit since pgx doesn't support usize
		let limit: Option<usize> = match limit {
		Some(limit) => Some(limit.try_into().unwrap()),
		None => None,
		};
		let limit: Option<usize> = limit.map(\|limit\| limit.try_into().unwrap());
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Weird. Copy link ContributorAuthor montanalowSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. yeah, turns out you can map things like this...
		let (name, rows) = match source {
		"breast_cancer" => crate::orm::dataset::load_breast_cancer(limit),
		"diabetes" => crate::orm::dataset::load_diabetes(limit),
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -28,7 +28,7 @@ fn model_predict(model_id: i64, features: Vec<f32>) -> f32 {

		match guard.get(&model_id) {
		Some(data) => {
		let bst = Booster::load_buffer(&data).unwrap();
		let bst = Booster::load_buffer(data).unwrap();
		let dmat = DMatrix::from_dense(&features, 1).unwrap();

		bst.predict(&dmat).unwrap()[0]
Expand DownExpand Up		@@ -66,7 +66,7 @@ fn model_predict_batch(model_id: i64, features: Vec<f32>, num_rows: i32) -> Vec<

		match guard.get(&model_id) {
		Some(data) => {
		let bst = Booster::load_buffer(&data).unwrap();
		let bst = Booster::load_buffer(data).unwrap();
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. This was a clippy warning? Should of been a compilation error... Copy link ContributorAuthor montanalowSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. The compiler handles the extra dereference for you.
		let dmat = DMatrix::from_dense(&features, num_rows as usize).unwrap();

		bst.predict(&dmat).unwrap()
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,7 +1,7 @@
		use pgx::*;
		use serde::Deserialize;

		#[derive(PostgresEnum, Copy, Clone, PartialEq, Debug, Deserialize)]
		#[derive(PostgresEnum, Copy, Clone,Eq,PartialEq, Debug, Deserialize)]
		#[allow(non_camel_case_types)]
		pub enum Algorithm {
		linear,
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,10 +30,14 @@ impl Dataset {

		pub fn distinct_labels(&self) -> u32 {
		let mut v = HashSet::new();
		// Treat the f32 values as u32 for std::cmp::Eq. We don't
		// Treat the f32 values as u32 for std::cmp::Eq. We don't
		// care about the nuance of nan equality here, they should
		// already be filtered out upstream.
		self.y.iter().for_each(\|i\| if !i.is_nan() { v.insert(i.to_bits()); });
		self.y.iter().for_each(\|i\| {
		if !i.is_nan() {
		v.insert(i.to_bits());
		}
		});
		v.len().try_into().unwrap()
		}
		}
Expand DownExpand Up		@@ -86,7 +90,7 @@ pub fn load_diabetes(limit: Option<usize>) -> (String, i64) {
		None => diabetes.num_samples,
		};
		for i in 0..limit {
		let age = diabetes.data[(i * diabetes.num_features) + 0];
		let age = diabetes.data[(i * diabetes.num_features)];
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Hah!
		let sex = diabetes.data[(i * diabetes.num_features) + 1];
		let bmi = diabetes.data[(i * diabetes.num_features) + 2];
		let bp = diabetes.data[(i * diabetes.num_features) + 3];
Expand DownExpand Up		@@ -153,6 +157,7 @@ pub fn load_digits(limit: Option<usize>) -> (String, i64) {
		let target = digits.target[i];
		// shape the image in a 2d array
		let mut image = vec![vec![0.; 8]; 8];
		#[allow(clippy::needless_range_loop)] // x & y are in fact used
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Oh no, don't get me started with these haha! Rubocop was plenty, thank you
		for x in 0..8 {
		for y in 0..8 {
		image[x][y] = digits.data[(i * 64) + (x * 8) + y];
Expand DownExpand Up		@@ -213,7 +218,7 @@ pub fn load_iris(limit: Option<usize>) -> (String, i64) {
		None => iris.num_samples,
		};
		for i in 0..limit {
		let sepal_length = iris.data[(i * iris.num_features) + 0];
		let sepal_length = iris.data[(i * iris.num_features)];
		let sepal_width = iris.data[(i * iris.num_features) + 1];
		let petal_length = iris.data[(i * iris.num_features) + 2];
		let petal_width = iris.data[(i * iris.num_features) + 3];
Expand DownExpand Up		@@ -354,7 +359,7 @@ pub fn load_breast_cancer(limit: Option<usize>) -> (String, i64) {
		vec![
		(
		PgBuiltInOids::FLOAT4OID.oid(),
		breast_cancer.data[(i * breast_cancer.num_features) + 0].into_datum(),
		breast_cancer.data[(i * breast_cancer.num_features)].into_datum(),
		),
		(
		PgBuiltInOids::FLOAT4OID.oid(),
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,15 +13,16 @@ use crate::orm::Algorithm;
		use crate::orm::Dataset;
		use crate::orm::Task;

		#[allow(clippy::type_complexity)]
Copy link Contributor levkkSep 13, 2022• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Clippy needs to get it together.
		static DEPLOYED_ESTIMATORS_BY_PROJECT_NAME: Lazy<Mutex<HashMap<String, Arc<Box<dyn Estimator>>>>> =
		Lazy::new(\|\| Mutex::new(HashMap::new()));

		pub fn find_deployed_estimator_by_project_name(name: &str) -> Arc<Box<dyn Estimator>> {
		{
		let estimators = DEPLOYED_ESTIMATORS_BY_PROJECT_NAME.lock().unwrap();
		let estimator = estimators.get(name);
		ifestimator.is_some() {
		return estimator.unwrap().clone();
		iflet Some(estimator) = estimator {
		return estimator.clone();
		}
		}

Expand All		@@ -40,33 +41,26 @@ pub fn find_deployed_estimator_by_project_name(name: &str) -> Arc<Box<dyn Estima
		LIMIT 1;",
		vec![(PgBuiltInOids::TEXTOID.oid(), name.into_datum())],
		);
		let task = Task::from_str(
		&task.expect(
		format!(
		"Project {} does not have a trained and deployed model.",
		name
		)
		.as_str(),
		),
		)
		let task = Task::from_str(&task.unwrap_or_else(\|\| {
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Ok what's up with these. Why does not like`expect`? Copy link ContributorAuthor montanalowSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. expect input is always executed which may have side effects or allocations. or_else is only executed if needed.
		panic!(
		"Project {} does not have a trained and deployed model.",
		name
		)
		}))
		.unwrap();
		let algorithm = Algorithm::from_str(
		&algorithm.expect(
		format!(
		"Project {} does not have a trained and deployed model.",
		name
		)
		.as_str(),
		),
		)
		let algorithm = Algorithm::from_str(&algorithm.unwrap_or_else(\|\| {
		panic!(
		"Project {} does not have a trained and deployed model.",
		name
		)
		}))
		.unwrap();
		let data = data.expect(
		format!(
		let data = data.unwrap_or_else(\|\| {
		panic!(
		"Project {} does not have a trained and deployed model.",
		name
		)
		.as_str(),
		);
		});

		let e: Box<dyn Estimator> = match task {
		Task::regression => match algorithm {
Expand DownExpand Up		@@ -125,7 +119,12 @@ fn predict_smartcore(
		smartcore::api::Predictor::predict(predictor, &features).unwrap()[0]
		}

		fn calc_metrics(y_test: &Array1<f32>, y_hat: &Array1<f32>, distinct_labels: u32, task: Task) -> HashMap<String, f32> {
		fn calc_metrics(
		y_test: &Array1<f32>,
		y_hat: &Array1<f32>,
		distinct_labels: u32,
		task: Task,
		) -> HashMap<String, f32> {
		let mut results = HashMap::new();
		match task {
		Task::regression => {
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -60,7 +60,7 @@ impl Model {
		(PgBuiltInOids::JSONBOID.oid(), search_args.into_datum()),
		])
		).first();
		if result.len() > 0 {
		if!result.is_empty() {
Copy link Contributor levkkSep 13, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. He got you there.
		model = Some(Model {
		id: result.get_datum(1).unwrap(),
		project_id: result.get_datum(2).unwrap(),
Expand All		@@ -69,7 +69,7 @@ impl Model {
		hyperparams: result.get_datum(5).unwrap(),
		status: result.get_datum(6).unwrap(),
		metrics: result.get_datum(7),
		search: search, // TODO
		search, // TODO
		search_params: result.get_datum(9).unwrap(),
		search_args: result.get_datum(10).unwrap(),
		created_at: result.get_datum(11).unwrap(),
Expand All		@@ -82,8 +82,8 @@ impl Model {
		});
		let mut model = model.unwrap();
		let dataset = snapshot.dataset();
		model.fit(&project, &dataset);
		model.test(&project, &dataset);
		model.fit(project, &dataset);
		model.test(project, &dataset);
		model
		}

Expand DownExpand Up		@@ -119,7 +119,7 @@ impl Model {
		.unwrap(),
		)),
		};
		let bytes: Vec<u8> = rmp_serde::to_vec(&*estimator.as_ref().unwrap()).unwrap();
		let bytes: Vec<u8> = rmp_serde::to_vec(estimator.as_ref().unwrap()).unwrap();
		Spi::get_one_with_args::<i64>(
		"INSERT INTO pgml_rust.files (model_id, path, part, data) VALUES($1, 'estimator.rmp', 0, $2) RETURNING id",
		vec![
Expand All		@@ -131,11 +131,11 @@ impl Model {
		}
		Algorithm::xgboost => {
		let mut dtrain =
		DMatrix::from_dense(&dataset.x_train(), dataset.num_train_rows).unwrap();
		DMatrix::from_dense(dataset.x_train(), dataset.num_train_rows).unwrap();
		let mut dtest =
		DMatrix::from_dense(&dataset.x_test(), dataset.num_test_rows).unwrap();
		dtrain.set_labels(&dataset.y_train()).unwrap();
		dtest.set_labels(&dataset.y_test()).unwrap();
		DMatrix::from_dense(dataset.x_test(), dataset.num_test_rows).unwrap();
		dtrain.set_labels(dataset.y_train()).unwrap();
		dtest.set_labels(dataset.y_test()).unwrap();

		// specify datasets to evaluate against during training
		let evaluation_sets = &[(&dtrain, "train"), (&dtest, "test")];
Expand All		@@ -146,8 +146,10 @@ impl Model {
		.objective(match project.task {
		Task::regression => xgboost::parameters::learning::Objective::RegLinear,
		Task::classification => {
		xgboost::parameters::learning::Objective::MultiSoftmax(dataset.distinct_labels())
		},
		xgboost::parameters::learning::Objective::MultiSoftmax(
		dataset.distinct_labels(),
		)
		}
		})
		.build()
		.unwrap();
Expand DownExpand Up		@@ -207,12 +209,8 @@ impl Model {
		}

		fn test(&mut self, project: &Project, dataset: &Dataset) {
		let metrics = self
		.estimator
		.as_ref()
		.unwrap()
		.test(project.task, &dataset);
		self.metrics = Some(JsonB(json!(metrics.clone())));
		let metrics = self.estimator.as_ref().unwrap().test(project.task, dataset);
		self.metrics = Some(JsonB(json!(metrics)));
		Spi::get_one_with_args::<i64>(
		"UPDATE pgml_rust.models SET metrics = $1 WHERE id = $2 RETURNING id",
		vec![
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,7 +25,7 @@ impl Project {
		(PgBuiltInOids::INT8OID.oid(), id.into_datum()),
		])
		).first();
		if result.len() > 0 {
		if!result.is_empty() {
		project = Some(Project {
		id: result.get_datum(1).unwrap(),
		name: result.get_datum(2).unwrap(),
Expand All		@@ -50,7 +50,7 @@ impl Project {
		(PgBuiltInOids::TEXTOID.oid(), name.into_datum()),
		])
		).first();
		if result.len() > 0 {
		if!result.is_empty() {
		project = Some(Project {
		id: result.get_datum(1).unwrap(),
		name: result.get_datum(2).unwrap(),
Expand All		@@ -76,7 +76,7 @@ impl Project {
		(PgBuiltInOids::TEXTOID.oid(), task.to_string().into_datum()),
		])
		).first();
		if result.len() > 0 {
		if!result.is_empty() {
		project = Some(Project {
		id: result.get_datum(1).unwrap(),
		name: result.get_datum(2).unwrap(),
Expand Down