Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd86f40e

Browse files
montanalowlevkk
andauthored
mindsdb vs postgresml blog post (#704)
Co-authored-by: Lev Kokotov <levkk@users.noreply.github.com>
1 parent4b921f3 commitd86f40e

File tree

10 files changed

+345
-34
lines changed

10 files changed

+345
-34
lines changed

‎pgml-dashboard/src/api/docs.rs‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ async fn blog_handler<'a>(path: PathBuf, cluster: Cluster) -> Result<ResponseOk,
8080
cluster,
8181
&path,
8282
vec![
83+
NavLink::new("MindsDB vs PostgresML")
84+
.href("/blog/mindsdb-vs-postgresml"),
8385
NavLink::new("Introducing PostgresML Python SDK: Build End-to-End Vector Search Applications without OpenAI and Pinecone")
8486
.href("/blog/introducing-postgresml-python-sdk-build-end-to-end-vector-search-applications-without-openai-and-pinecone"),
8587
NavLink::new("PostgresML raises $4.7M to launch serverless AI application databases based on Postgres")

‎pgml-dashboard/static/blog/mindsdb-vs-postgresml.md‎

Lines changed: 313 additions & 0 deletions
Large diffs are not rendered by default.
692 KB
Loading
290 KB
Loading
145 KB
Loading

‎pgml-dashboard/templates/layout/nav/top.html‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<aclass="nav-link"href="/docs/guides/setup/quick_start_with_docker/">Docs</a>
2222
</li>
2323
<liclass="nav-item d-flex align-items-center">
24-
<aclass="nav-link"href="/blog/postgresml-raises-4.7M-to-launch-serverless-ai-application-databases-based-on-postgres">Blog</a>
24+
<aclass="nav-link"href="/blog/mindsdb-vs-postgresml">Blog</a>
2525
</li>
2626
<liclass="nav-item d-flex align-items-center">
2727
<aclass="nav-link"href="https://github.com/postgresml/postgresml"target="_blank">Open Source</a>

‎pgml-extension/requirements.txt‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ deepspeed==0.9.2
44
huggingface-hub==0.14.1
55
InstructorEmbedding==1.0.0
66
lightgbm==3.3.5
7+
orjson==3.9.0
78
pandas==2.0.1
89
rich==13.3.5
910
rouge==1.0.1

‎pgml-extension/src/api.rs‎

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ pub fn embed_batch(
577577
inputs:Vec<&str>,
578578
kwargs:default!(JsonB,"'{}'"),
579579
) ->Vec<Vec<f32>>{
580-
crate::bindings::transformers::embed(transformer,&inputs,&kwargs.0)
580+
crate::bindings::transformers::embed(transformer, inputs,&kwargs.0)
581581
}
582582

583583
#[pg_extern(immutable, parallel_safe)]
@@ -602,11 +602,11 @@ pub fn chunk(
602602
pubfntransform_json(
603603
task:JsonB,
604604
args:default!(JsonB,"'{}'"),
605-
inputs:default!(Vec<String>,"ARRAY[]::TEXT[]"),
605+
inputs:default!(Vec<&str>,"ARRAY[]::TEXT[]"),
606606
cache:default!(bool,false),
607607
) ->JsonB{
608608
JsonB(crate::bindings::transformers::transform(
609-
&task.0,&args.0,&inputs,
609+
&task.0,&args.0, inputs,
610610
))
611611
}
612612

@@ -616,14 +616,14 @@ pub fn transform_json(
616616
pubfntransform_string(
617617
task:String,
618618
args:default!(JsonB,"'{}'"),
619-
inputs:default!(Vec<String>,"ARRAY[]::TEXT[]"),
619+
inputs:default!(Vec<&str>,"ARRAY[]::TEXT[]"),
620620
cache:default!(bool,false),
621621
) ->JsonB{
622622
letmut task_map =HashMap::new();
623623
task_map.insert("task", task);
624624
let task_json =json!(task_map);
625625
JsonB(crate::bindings::transformers::transform(
626-
&task_json,&args.0,&inputs,
626+
&task_json,&args.0, inputs,
627627
))
628628
}
629629

‎pgml-extension/src/bindings/transformers.py‎

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
importos
2-
importjson
31
importmath
2+
importos
43
importshutil
54
importtime
6-
importnumpyasnp
75

86
importdatasets
97
fromInstructorEmbeddingimportINSTRUCTOR
8+
importnumpy
9+
importorjson
1010
fromrougeimportRouge
1111
fromsacrebleu.metricsimportBLEU
1212
fromsentence_transformersimportSentenceTransformer
@@ -42,7 +42,6 @@
4242
__cache_sentence_transformer_by_name= {}
4343
__cache_transform_pipeline_by_task= {}
4444

45-
4645
DTYPE_MAP= {
4746
"uint8":torch.uint8,
4847
"int8":torch.int8,
@@ -58,6 +57,10 @@
5857
"bool":torch.bool,
5958
}
6059

60+
deforjson_default(obj):
61+
ifisinstance(obj,numpy.float32):
62+
returnfloat(obj)
63+
raiseTypeError
6164

6265
defconvert_dtype(kwargs):
6366
if"torch_dtype"inkwargs:
@@ -78,18 +81,10 @@ def ensure_device(kwargs):
7881
else:
7982
kwargs["device"]="cpu"
8083

81-
82-
classNumpyJSONEncoder(json.JSONEncoder):
83-
defdefault(self,obj):
84-
ifisinstance(obj,np.float32):
85-
returnfloat(obj)
86-
returnsuper().default(obj)
87-
88-
8984
deftransform(task,args,inputs):
90-
task=json.loads(task)
91-
args=json.loads(args)
92-
inputs=json.loads(inputs)
85+
task=orjson.loads(task)
86+
args=orjson.loads(args)
87+
inputs=orjson.loads(inputs)
9388

9489
key=",".join([f"{key}:{val}"for (key,val)insorted(task.items())])
9590
ifkeynotin__cache_transform_pipeline_by_task:
@@ -103,17 +98,18 @@ def transform(task, args, inputs):
10398
pipe=__cache_transform_pipeline_by_task[key]
10499

105100
ifpipe.task=="question-answering":
106-
inputs= [json.loads(input)forinputininputs]
101+
inputs= [orjson.loads(input)forinputininputs]
107102

108103
convert_eos_token(pipe.tokenizer,args)
109104

110-
returnjson.dumps(pipe(inputs,**args),cls=NumpyJSONEncoder)
105+
results=pipe(inputs,**args)
106+
107+
returnorjson.dumps(results,default=orjson_default).decode()
111108

112109

113110
defembed(transformer,inputs,kwargs):
114-
115-
inputs=json.loads(inputs)
116-
kwargs=json.loads(kwargs)
111+
kwargs=orjson.loads(kwargs)
112+
117113
ensure_device(kwargs)
118114
instructor=transformer.startswith("hkunlp/instructor")
119115

@@ -137,7 +133,7 @@ def embed(transformer, inputs, kwargs):
137133

138134

139135
defload_dataset(name,subset,limit:None,kwargs:"{}"):
140-
kwargs=json.loads(kwargs)
136+
kwargs=orjson.loads(kwargs)
141137

142138
iflimit:
143139
dataset=datasets.load_dataset(
@@ -164,7 +160,7 @@ def load_dataset(name, subset, limit: None, kwargs: "{}"):
164160
else:
165161
raisePgMLException(f"Unhandled dataset type:{type(dataset)}")
166162

167-
returnjson.dumps({"data":data,"types":types})
163+
returnorjson.dumps({"data":data,"types":types}).decode()
168164

169165

170166
deftokenize_text_classification(tokenizer,max_length,x,y):
@@ -421,7 +417,7 @@ def compute_metrics_text_generation(model, tokenizer, hyperparams, y):
421417

422418

423419
deftune(task,hyperparams,path,x_train,x_test,y_train,y_test):
424-
hyperparams=json.loads(hyperparams)
420+
hyperparams=orjson.loads(hyperparams)
425421
model_name=hyperparams.pop("model_name")
426422
tokenizer=AutoTokenizer.from_pretrained(model_name)
427423

@@ -562,7 +558,7 @@ def generate(model_id, data, config):
562558
result=get_transformer_by_model_id(model_id)
563559
tokenizer=result["tokenizer"]
564560
model=result["model"]
565-
config=json.loads(config)
561+
config=orjson.loads(config)
566562
all_preds= []
567563

568564
batch_size=1# TODO hyperparams

‎pgml-extension/src/bindings/transformers.rs‎

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ static PY_MODULE: Lazy<Py<PyModule>> = Lazy::new(|| {
2424
pubfntransform(
2525
task:&serde_json::Value,
2626
args:&serde_json::Value,
27-
inputs:&Vec<String>,
27+
inputs:Vec<&str>,
2828
) -> serde_json::Value{
2929
crate::bindings::venv::activate();
3030

3131
let task = serde_json::to_string(task).unwrap();
3232
let args = serde_json::to_string(args).unwrap();
33-
let inputs = serde_json::to_string(inputs).unwrap();
33+
let inputs = serde_json::to_string(&inputs).unwrap();
3434

3535
let results =Python::with_gil(|py| ->String{
3636
let transform:Py<PyAny> =PY_MODULE.getattr(py,"transform").unwrap().into();
@@ -56,11 +56,10 @@ pub fn transform(
5656
serde_json::from_str(&results).unwrap()
5757
}
5858

59-
pubfnembed(transformer:&str,inputs:&[&str],kwargs:&serde_json::Value) ->Vec<Vec<f32>>{
59+
pubfnembed(transformer:&str,inputs:Vec<&str>,kwargs:&serde_json::Value) ->Vec<Vec<f32>>{
6060
crate::bindings::venv::activate();
6161

6262
let kwargs = serde_json::to_string(kwargs).unwrap();
63-
let inputs = serde_json::to_string(&inputs).unwrap();
6463
Python::with_gil(|py| ->Vec<Vec<f32>>{
6564
let embed:Py<PyAny> =PY_MODULE.getattr(py,"embed").unwrap().into();
6665
embed

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp