NotificationsYou must be signed in to change notification settings
Fork352
Star6.6k

Commitd017cd6

committed

swap out vLLM model if new

1 parentca7e4ad commitd017cd6Copy full SHA for d017cd6

File tree

4 files changed

+88

-28

lines changed

pgml-extension/src
- api.rs
- bindings/vllm

4 files changed

+88

-28

lines changed

`‎pgml-extension/src/api.rs‎`

Lines changed: 1 addition & 26 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,15 +2,13 @@ use std::fmt::Write;`
`2`	`2`	`use std::str::FromStr;`
`3`	`3`
`4`	`4`	`use ndarray::Zip;`
`5`		`-use once_cell::sync::OnceCell;`
`6`	`5`	`use pgrx::iter::{SetOfIterator,TableIterator};`
`7`	`6`	`use pgrx::*;`
`8`	`7`	`use serde_json::Value;`
`9`	`8`
`10`	`9`	`#[cfg(feature ="python")]`
`11`	`10`	`use serde_json::json;`
`12`	`11`
`13`		`-usecrate::bindings::vllm::{LLMBuilder,LLM};`
`14`	`12`	`#[cfg(feature ="python")]`
`15`	`13`	`usecrate::orm::*;`
`16`	`14`
`@@ -642,30 +640,7 @@ fn transform(mut task: Value, args: Value, inputs: Vec<&str>) -> anyhow::Result<`
`642`	`640`	`});`
`643`	`641`
`644`	`642`	`if use_vllm{`
`645`		`-crate::bindings::python::activate().unwrap();`
`646`		`-`
`647`		`-staticLAZY_LLM:OnceCell<LLM> =OnceCell::new();`
`648`		`-let llm =LAZY_LLM.get_or_init(move \|\|{`
`649`		`-let builder =matchLLMBuilder::try_from(task){`
`650`		`-Ok(b) => b,`
`651`		`-Err(e) =>error!("{e}"),`
`652`		`-};`
`653`		`- builder.build().unwrap()`
`654`		`-});`
`655`		`-`
`656`		`-let outputs = llm`
`657`		`-.generate(&inputs,None)?`
`658`		`-.iter()`
`659`		`-.map(\|o\|{`
`660`		`- o.outputs()`
`661`		`-.unwrap()`
`662`		`-.iter()`
`663`		`-.map(\|o\| o.text().unwrap())`
`664`		`-.collect::<Vec<_>>()`
`665`		`-})`
`666`		`-.collect::<Vec<Vec<_>>>();`
`667`		`-`
`668`		`-Ok(json!(outputs))`
	`643`	`+Ok(crate::bindings::vllm::vllm_inference(&task,&inputs)?)`
`669`	`644`	`}else{`
`670`	`645`	`ifletSome(map) = task.as_object_mut(){`
`671`	`646`	`// pop backend keyword, if present`

`‎pgml-extension/src/bindings/vllm/inference.rs‎`

Lines changed: 75 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,75 @@`
	`1`	`+use parking_lot::Mutex;`
	`2`	`+use pyo3::prelude::*;`
	`3`	`+use serde_json::{json,Value};`
	`4`	`+`
	`5`	`+usesuper::LLM;`
	`6`	`+`
	`7`	`+staticMODEL:Mutex<Option<LLM>> =Mutex::new(None);`
	`8`	`+`
	`9`	`+pubfnvllm_inference(task:&Value,inputs:&[&str]) ->PyResult<Value>{`
	`10`	`+crate::bindings::python::activate().expect("python venv activate");`
	`11`	`+letmut model =MODEL.lock();`
	`12`	`+`
	`13`	`+let llm =matchget_model_name(&model, task){`
	`14`	`+ModelName::Same => model.as_mut().expect("ModelName::Same as_mut"),`
	`15`	`+ModelName::Different(name) =>{`
	`16`	`+ifletSome(llm) = model.take(){`
	`17`	`+// delete old model, exists`
	`18`	`+destroy_model_parallel(llm)?;`
	`19`	`+}`
	`20`	`+// make new model`
	`21`	`+let llm =LLM::new(&name)?;`
	`22`	`+ model.insert(llm)`
	`23`	`+}`
	`24`	`+};`
	`25`	`+`
	`26`	`+let outputs = llm`
	`27`	`+.generate(&inputs,None)?`
	`28`	`+.iter()`
	`29`	`+.map(\|o\|{`
	`30`	`+ o.outputs()`
	`31`	`+.expect("RequestOutput::outputs()")`
	`32`	`+.iter()`
	`33`	`+.map(\|o\| o.text().expect("CompletionOutput::text()"))`
	`34`	`+.collect::<Vec<_>>()`
	`35`	`+})`
	`36`	`+.collect::<Vec<Vec<_>>>();`
	`37`	`+`
	`38`	`+Ok(json!(outputs))`
	`39`	`+}`
	`40`	`+`
	`41`	`+fnget_model_name<M>(model:&M,task:&Value) ->ModelName`
	`42`	`+where`
	`43`	`+M: std::ops::Deref<Target =Option<LLM>>,`
	`44`	`+{`
	`45`	`+match task`
	`46`	`+.as_object()`
	`47`	`+.and_then(\|obj\| obj.get("model").and_then(\|m\| m.as_str()))`
	`48`	`+{`
	`49`	`+Some(name) =>match model.as_ref(){`
	`50`	`+Some(llm)if llm.model() == name =>ModelName::Same,`
	`51`	`+ _ =>ModelName::Different(name.to_string()),`
	`52`	`+},`
	`53`	`+None =>ModelName::Same,`
	`54`	`+}`
	`55`	`+}`
	`56`	`+`
	`57`	`+enumModelName{`
	`58`	`+Same,`
	`59`	`+Different(String),`
	`60`	`+}`
	`61`	`+`
	`62`	`+// See https://github.com/vllm-project/vllm/issues/565#issuecomment-1725174811`
	`63`	`+fndestroy_model_parallel(llm:LLM) ->PyResult<()>{`
	`64`	`+Python::with_gil(\|py\|{`
	`65`	`+PyModule::import(py,"vllm")?`
	`66`	`+.getattr("model_executor")?`
	`67`	`+.getattr("parallel_utils")?`
	`68`	`+.getattr("parallel_state")?`
	`69`	`+.getattr("destroy_model_parallel")?`
	`70`	`+.call0()?;`
	`71`	`+drop(llm);`
	`72`	`+PyModule::import(py,"gc")?.getattr("collect")?.call0()?;`
	`73`	`+Ok(())`
	`74`	`+})`
	`75`	`+}`

`‎pgml-extension/src/bindings/vllm/llm.rs‎`

Lines changed: 10 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ pub enum Quantization {`
`29`	`29`	`}`
`30`	`30`
`31`	`31`	`pubstructLLM{`
	`32`	`+model:String,`
`32`	`33`	`inner:PyObject,`
`33`	`34`	`}`
`34`	`35`
`@@ -133,7 +134,7 @@ impl LLMBuilder {`
`133`	`134`	`pubfnbuild(self) ->PyResult<LLM>{`
`134`	`135`	`let inner =Python::with_gil(\|py\| ->PyResult<PyObject>{`
`135`	`136`	`let kwargs =PyDict::new(py);`
`136`		`- kwargs.set_item("model",self.model)?;`
	`137`	`+ kwargs.set_item("model",self.model.clone())?;`
`137`	`138`	`kwargs.set_item("tokenizer",self.tokenizer)?;`
`138`	`139`	`kwargs.set_item("tokenizer_mode",self.tokenizer_mode)?;`
`139`	`140`	`kwargs.set_item("trust_remote_code",self.trust_remote_code)?;`
`@@ -149,7 +150,10 @@ impl LLMBuilder {`
`149`	`150`	`vllm.getattr("LLM")?.call((),Some(kwargs))?.extract()`
`150`	`151`	`})?;`
`151`	`152`
`152`		`-Ok(LLM{ inner})`
	`153`	`+Ok(LLM{`
	`154`	`+ inner,`
	`155`	`+model:self.model,`
	`156`	`+})`
`153`	`157`	`}`
`154`	`158`	`}`
`155`	`159`
`@@ -184,6 +188,10 @@ impl LLM {`
`184`	`188`	`.extract(py)`
`185`	`189`	`})`
`186`	`190`	`}`
	`191`	`+`
	`192`	`+pubfnmodel(&self) ->&str{`
	`193`	`+self.model.as_str()`
	`194`	`+}`
`187`	`195`	`}`
`188`	`196`
`189`	`197`	`implToPyObjectforTokenizerMode{`

`‎pgml-extension/src/bindings/vllm/mod.rs‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,9 +1,11 @@`
`1`	`1`	//! Rust bindings to the Python package `vllm`.
`2`	`2`
	`3`	`+mod inference;`
`3`	`4`	`mod llm;`
`4`	`5`	`mod outputs;`
`5`	`6`	`mod params;`
`6`	`7`
	`8`	`+pubuse inference::*;`
`7`	`9`	`pubuse llm::*;`
`8`	`10`	`pubuse outputs::*;`
`9`	`11`	`pubuse params::*;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitd017cd6

File tree

4 files changed

4 files changed

`‎pgml-extension/src/api.rs‎`

`‎pgml-extension/src/bindings/vllm/inference.rs‎`

`‎pgml-extension/src/bindings/vllm/llm.rs‎`

`‎pgml-extension/src/bindings/vllm/mod.rs‎`

0 commit comments