Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Adding embed_array for getting the embeddings of multiple strings#686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
montanalow merged 15 commits intopostgresml:masterfromjsaied99:embeddings_inputs
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
15 commits
Select commitHold shift + click to select a range
be7052e
working postgres
jsaied99Jun 5, 2023
2d949fb
Fixing Dockerfile.local for local deployment
jsaied99Jun 5, 2023
1ebfd01
removing command from compose
jsaied99Jun 5, 2023
b20ed8d
adding inputs as an array for embeddings
jsaied99Jun 5, 2023
e0738dc
fixing function overloading issue
jsaied99Jun 5, 2023
f614404
adding vec<vec>
jsaied99Jun 5, 2023
9425b63
handling inputs
jsaied99Jun 5, 2023
039da67
handling inputs loads instead of dumps
jsaied99Jun 5, 2023
03abdcd
fixing instance of json
jsaied99Jun 5, 2023
c5f7798
Merge branch 'master' into embeddings_inputs
jsaied99Jun 5, 2023
6ffabc9
adding name so same funciton name
jsaied99Jun 5, 2023
0b0e666
Changing inner func name to embed_batch
jsaied99Jun 5, 2023
4b2e14d
adding fixes from comments
jsaied99Jun 5, 2023
78bda85
adding Vec<Vec<f32>>
jsaied99Jun 5, 2023
9111a49
fixing compilation errors
jsaied99Jun 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletionspgml-extension/src/api.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -563,9 +563,21 @@ fn load_dataset(
TableIterator::new(vec![(name, rows)].into_iter())
}

#[pg_extern(immutable, parallel_safe)]
#[pg_extern(immutable, parallel_safe, name = "embed")]
pub fn embed(transformer: &str, text: &str, kwargs: default!(JsonB, "'{}'")) -> Vec<f32> {
crate::bindings::transformers::embed(transformer, text, &kwargs.0)
embed_batch(transformer, Vec::from([text]), kwargs)
.first()
.unwrap()
.to_vec()
}

#[pg_extern(immutable, parallel_safe, name = "embed")]
pub fn embed_batch(
transformer: &str,
inputs: Vec<&str>,
kwargs: default!(JsonB, "'{}'"),
) -> Vec<Vec<f32>> {
crate::bindings::transformers::embed(transformer, &inputs, &kwargs.0)
}

#[pg_extern(immutable, parallel_safe)]
Expand Down
19 changes: 12 additions & 7 deletionspgml-extension/src/bindings/transformers.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -110,25 +110,30 @@ def transform(task, args, inputs):
return json.dumps(pipe(inputs, **args), cls=NumpyJSONEncoder)


def embed(transformer, text, kwargs):
def embed(transformer, inputs, kwargs):

inputs = json.loads(inputs)
kwargs = json.loads(kwargs)
ensure_device(kwargs)
instructor = transformer.startswith("hkunlp/instructor")

if instructor:
klass = INSTRUCTOR
text = [[kwargs.pop("instruction"), text]]

texts_with_instructions = []
instruction = kwargs.pop("instruction")
for text in inputs:
texts_with_instructions.append([instruction, text])

inputs = texts_with_instructions
else:
klass = SentenceTransformer

if transformer not in __cache_sentence_transformer_by_name:
__cache_sentence_transformer_by_name[transformer] = klass(transformer)
model = __cache_sentence_transformer_by_name[transformer]

result = model.encode(text, **kwargs)
if instructor:
result = result[0]

return result
return model.encode(inputs, **kwargs)


def load_dataset(name, subset, limit: None, kwargs: "{}"):
Expand Down
20 changes: 10 additions & 10 deletionspgml-extension/src/bindings/transformers.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -35,14 +35,13 @@ pub fn transform(
let results = Python::with_gil(|py| -> String {
let transform: Py<PyAny> = PY_MODULE.getattr(py, "transform").unwrap().into();

let result = transform
.call1(
let result = transform.call1(
py,
PyTuple::new(
py,
PyTuple::new(
py,
&[task.into_py(py), args.into_py(py), inputs.into_py(py)],
),
);
&[task.into_py(py), args.into_py(py), inputs.into_py(py)],
),
);

let result = match result {
Err(e) => {
Expand All@@ -57,11 +56,12 @@ pub fn transform(
serde_json::from_str(&results).unwrap()
}

pub fn embed(transformer: &str,text:&str, kwargs: &serde_json::Value) -> Vec<f32> {
pub fn embed(transformer: &str,inputs: Vec<&str>, kwargs: &serde_json::Value) -> Vec<Vec<f32>> {
crate::bindings::venv::activate();

let kwargs = serde_json::to_string(kwargs).unwrap();
Python::with_gil(|py| -> Vec<f32> {
let inputs = serde_json::to_string(&inputs).unwrap();
Python::with_gil(|py| -> Vec<Vec<f32>> {
let embed: Py<PyAny> = PY_MODULE.getattr(py, "embed").unwrap().into();
embed
.call1(
Expand All@@ -70,7 +70,7 @@ pub fn embed(transformer: &str, text: &str, kwargs: &serde_json::Value) -> Vec<f
py,
&[
transformer.to_string().into_py(py),
text.to_string().into_py(py),
inputs.into_py(py),
kwargs.into_py(py),
],
),
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp