- Notifications
You must be signed in to change notification settings - Fork328
Refactor the initialization of GUC parameters.#1360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
4612b4f
0e479a6
0432f03
f6ddf45
1bf1ff3
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -1,47 +1,54 @@ | ||||||||||
use anyhow::{bail, Error}; | ||||||||||
use pgrx::GucSetting; | ||||||||||
#[cfg(any(test, feature = "pg_test"))] | ||||||||||
use pgrx::{pg_schema, pg_test}; | ||||||||||
use serde_json::Value; | ||||||||||
use std::ffi::CStr; | ||||||||||
use crate::config::{PGML_HF_TRUST_REMOTE_CODE, PGML_HF_TRUST_WHITELIST, PGML_HF_WHITELIST}; | ||||||||||
/// Verify that the model in the task JSON is allowed based on the huggingface whitelists. | ||||||||||
pub fn verify_task(task: &Value) -> Result<(), Error> { | ||||||||||
let task_model = match get_model_name(task) { | ||||||||||
Some(model) => model.to_string(), | ||||||||||
None => return Ok(()), | ||||||||||
}; | ||||||||||
let whitelisted_models = config_csv_list(&PGML_HF_WHITELIST.1); | ||||||||||
let model_is_allowed = whitelisted_models.is_empty() || whitelisted_models.contains(&task_model); | ||||||||||
if !model_is_allowed { | ||||||||||
bail!( | ||||||||||
"model {} is not whitelisted. Consider adding to {} in postgresql.conf", | ||||||||||
task_model, | ||||||||||
PGML_HF_WHITELIST.0 | ||||||||||
); | ||||||||||
} | ||||||||||
let task_trust = get_trust_remote_code(task); | ||||||||||
let trust_remote_code = PGML_HF_TRUST_REMOTE_CODE.1.get(); | ||||||||||
let trusted_models = config_csv_list(&PGML_HF_TRUST_WHITELIST.1); | ||||||||||
let model_is_trusted = trusted_models.is_empty() || trusted_models.contains(&task_model); | ||||||||||
let remote_code_allowed = trust_remote_code && model_is_trusted; | ||||||||||
if !remote_code_allowed && task_trust == Some(true) { | ||||||||||
bail!( | ||||||||||
"model {} is not trusted to run remote code. Consider setting {} = 'true' or adding {} to {}", | ||||||||||
task_model, | ||||||||||
PGML_HF_TRUST_REMOTE_CODE.0, | ||||||||||
task_model, | ||||||||||
PGML_HF_TRUST_WHITELIST.0 | ||||||||||
); | ||||||||||
} | ||||||||||
Ok(()) | ||||||||||
} | ||||||||||
fn config_csv_list(csv_list: &GucSetting<Option<&'static CStr>>) -> Vec<String> { | ||||||||||
matchcsv_list.get() { | ||||||||||
Some(value) => value | ||||||||||
.to_string_lossy() | ||||||||||
.trim_matches('"') | ||||||||||
.split(',') | ||||||||||
.filter_map(|s| if s.is_empty() { None } else { Some(s.to_string()) }) | ||||||||||
@@ -122,7 +129,7 @@ mod tests { | ||||||||||
#[pg_test] | ||||||||||
fn test_empty_whitelist() { | ||||||||||
let model = "Salesforce/xgen-7b-8k-inst"; | ||||||||||
set_config(PGML_HF_WHITELIST.0, "").unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_ok()); | ||||||||||
@@ -131,12 +138,12 @@ mod tests { | ||||||||||
#[pg_test] | ||||||||||
fn test_nonempty_whitelist() { | ||||||||||
let model = "Salesforce/xgen-7b-8k-inst"; | ||||||||||
set_config(PGML_HF_WHITELIST.0, model).unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_ok()); | ||||||||||
set_config(PGML_HF_WHITELIST.0, "other_model").unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_err()); | ||||||||||
@@ -145,18 +152,18 @@ mod tests { | ||||||||||
#[pg_test] | ||||||||||
fn test_trusted_model() { | ||||||||||
let model = "Salesforce/xgen-7b-8k-inst"; | ||||||||||
set_config(PGML_HF_WHITELIST.0, model).unwrap(); | ||||||||||
set_config(PGML_HF_TRUST_WHITELIST.0, model).unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_ok()); | ||||||||||
let task_json = format!(json_template!(), model, true); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_err()); | ||||||||||
ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. In the initial commit of this test, this step should fail with postgresml/pgml-extension/src/bindings/transformers/whitelist.rs Lines 189 to 192 inf4e87c5
| ||||||||||
set_config(PGML_HF_TRUST_REMOTE_CODE.0, "true").unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_ok()); | ||||||||||
@@ -169,8 +176,8 @@ mod tests { | ||||||||||
#[pg_test] | ||||||||||
fn test_untrusted_model() { | ||||||||||
let model = "Salesforce/xgen-7b-8k-inst"; | ||||||||||
set_config(PGML_HF_WHITELIST.0, model).unwrap(); | ||||||||||
set_config(PGML_HF_TRUST_WHITELIST.0, "other_model").unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
@@ -180,7 +187,7 @@ mod tests { | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_err()); | ||||||||||
set_config(PGML_HF_TRUST_REMOTE_CODE.0, "true").unwrap(); | ||||||||||
let task_json = format!(json_template!(), model, false); | ||||||||||
let task: Value = serde_json::from_str(&task_json).unwrap(); | ||||||||||
assert!(verify_task(&task).is_ok()); | ||||||||||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,58 @@ | ||
use lazy_static::lazy_static; | ||
use pgrx::{GucContext, GucFlags, GucRegistry, GucSetting}; | ||
use std::ffi::CStr; | ||
#[cfg(any(test, feature = "pg_test"))] | ||
use pgrx::{pg_schema, pg_test}; | ||
lazy_static! { | ||
pub static ref PGML_VENV: (&'static str, GucSetting<Option<&'static CStr>>) = | ||
("pgml.venv", GucSetting::<Option<&'static CStr>>::new(None)); | ||
pub static ref PGML_HF_WHITELIST: (&'static str, GucSetting<Option<&'static CStr>>) = ( | ||
"pgml.huggingface_whitelist", | ||
GucSetting::<Option<&'static CStr>>::new(None), | ||
); | ||
pub static ref PGML_HF_TRUST_REMOTE_CODE: (&'static str, GucSetting<bool>) = | ||
("pgml.huggingface_trust_remote_code", GucSetting::<bool>::new(false)); | ||
pub static ref PGML_HF_TRUST_WHITELIST: (&'static str, GucSetting<Option<&'static CStr>>) = ( | ||
"pgml.huggingface_trust_remote_code_whitelist", | ||
GucSetting::<Option<&'static CStr>>::new(None), | ||
); | ||
} | ||
pub fn initialize_server_params() { | ||
GucRegistry::define_string_guc( | ||
PGML_VENV.0, | ||
"Python's virtual environment path", | ||
"", | ||
&PGML_VENV.1, | ||
GucContext::Userset, | ||
GucFlags::default(), | ||
); | ||
GucRegistry::define_string_guc( | ||
PGML_HF_WHITELIST.0, | ||
"Models allowed to be downloaded from huggingface", | ||
"", | ||
&PGML_HF_WHITELIST.1, | ||
GucContext::Userset, | ||
GucFlags::default(), | ||
); | ||
GucRegistry::define_bool_guc( | ||
PGML_HF_TRUST_REMOTE_CODE.0, | ||
"Whether model can execute remote codes", | ||
"", | ||
&PGML_HF_TRUST_REMOTE_CODE.1, | ||
GucContext::Userset, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Shall we make these GUCs
| ||
GucFlags::default(), | ||
); | ||
GucRegistry::define_string_guc( | ||
PGML_HF_TRUST_WHITELIST.0, | ||
"Models allowed to execute remote codes when pgml.hugging_face_trust_remote_code = 'on'", | ||
"", | ||
&PGML_HF_TRUST_WHITELIST.1, | ||
GucContext::Userset, | ||
GucFlags::default(), | ||
); | ||
} | ||
#[cfg(any(test, feature = "pg_test"))] | ||
@@ -26,17 +68,11 @@ pub fn set_config(name: &str, value: &str) -> Result<(), pgrx::spi::Error> { | ||
mod tests { | ||
use super::*; | ||
#[pg_test] | ||
fn read_pgml_huggingface_whitelist() { | ||
let name = "pgml.huggingface_whitelist"; | ||
let value = "meta-llama/Llama-2-7b"; | ||
set_config(name, value).unwrap(); | ||
assert_eq!(PGML_HF_WHITELIST.1.get().unwrap().to_string_lossy(), value); | ||
} | ||
} |