- Notifications
You must be signed in to change notification settings - Fork328
Silas add ranking#1498
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Silas add ranking#1498
Changes fromall commits
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
-- src/api.rs:613 | ||
-- pgml::api::rank | ||
CREATE FUNCTION pgml."rank"( | ||
"transformer" TEXT, /* &str */ | ||
"query" TEXT, /* &str */ | ||
"documents" TEXT[], /* alloc::vec::Vec<&str> */ | ||
"kwargs" jsonb DEFAULT '{}' /* pgrx::datum::json::JsonB */ | ||
) RETURNS TABLE ( | ||
"corpus_id" bigint, /* i64 */ | ||
"score" double precision, /* f64 */ | ||
"text" TEXT /* core::option::Option<alloc::string::String> */ | ||
) | ||
IMMUTABLE STRICT PARALLEL SAFE | ||
LANGUAGE c /* Rust */ | ||
AS 'MODULE_PATHNAME', 'rank_wrapper'; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -12,7 +12,7 @@ | ||
import orjson | ||
from rouge import Rouge | ||
from sacrebleu.metrics import BLEU | ||
from sentence_transformers import SentenceTransformer, CrossEncoder | ||
from sklearn.metrics import ( | ||
mean_squared_error, | ||
r2_score, | ||
@@ -500,6 +500,33 @@ def transform(task, args, inputs, stream=False): | ||
return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode() | ||
def create_cross_encoder(transformer): | ||
return CrossEncoder(transformer) | ||
def rank_using(model, query, documents, kwargs): | ||
if isinstance(kwargs, str): | ||
kwargs = orjson.loads(kwargs) | ||
# The score is a numpy float32 before we convert it | ||
return [ | ||
{"score": x.pop("score").item(), **x} | ||
for x in model.rank(query, documents, **kwargs) | ||
] | ||
def rank(transformer, query, documents, kwargs): | ||
kwargs = orjson.loads(kwargs) | ||
if transformer not in __cache_sentence_transformer_by_name: | ||
__cache_sentence_transformer_by_name[transformer] = create_cross_encoder( | ||
transformer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. If you pass kwargs through to We should do this for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. We will have to create a separate argument for this, or pop specific arguments from kwargs. If just pass kwargs straight through we will get an unexpected keyword argument error. | ||
) | ||
model = __cache_sentence_transformer_by_name[transformer] | ||
return rank_using(model, query, documents, kwargs) | ||
def create_embedding(transformer): | ||
return SentenceTransformer(transformer) | ||