Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit650f3cd

Browse files
authored
Add pgml-rds-proxy (#1412)
1 parenta5349e4 commit650f3cd

File tree

10 files changed

+258
-2
lines changed

10 files changed

+258
-2
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name:Build and release pgml-rds-proxy Docker image
2+
3+
on:
4+
workflow_dispatch:
5+
jobs:
6+
publish-proxy-docker-image:
7+
strategy:
8+
matrix:
9+
os:["buildjet-4vcpu-ubuntu-2204"]
10+
runs-on:${{ matrix.os }}
11+
defaults:
12+
run:
13+
working-directory:packages/pgml-rds-proxy
14+
steps:
15+
-uses:actions/checkout@v2
16+
-name:Login to GitHub Container Registry
17+
uses:docker/login-action@v1
18+
with:
19+
registry:ghcr.io
20+
username:${{ github.actor }}
21+
password:${{ secrets.GITHUB_TOKEN }}
22+
-name:Build and push Docker image
23+
run:|
24+
bash build-docker-image.sh

‎packages/pgml-rds-proxy/Dockerfile‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
FROM ubuntu:22.04
2+
ENV PGCAT_VERSION=2.0.0-alpha18
3+
RUN apt update && \
4+
apt install -y curl postgresql-client-common postgresql-client-14 && \
5+
apt clean
6+
WORKDIR /pgml-rds-proxy
7+
COPY --chown=root:root download-pgcat.sh download-pgcat.sh
8+
COPY --chown=root:root run.sh run.sh
9+
RUN bash download-pgcat.sh
10+
ENTRYPOINT ["bash","run.sh"]

‎packages/pgml-rds-proxy/README.md‎

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#pgml-rds-proxy
2+
3+
A pgcat-based PostgreSQL proxy that allows to use PostgresML functions on managed PostgreSQL databases that may not have Internet access, like AWS RDS.
4+
5+
##Getting started
6+
7+
A Docker image is provided and is the easiest way to get started. To run the image, you can simply:
8+
9+
```bash
10+
docker run \
11+
-e DATABASE_URL=postgres://pg:ml@sql.cloud.postgresml.org:38042/pgml \
12+
-p 6432:6432 \
13+
ghcr.io/postgresml/pgml-rds-proxy:latest
14+
```
15+
16+
**Note:** Replace the`DATABASE_URL` above with the`DATABASE_URL` of your own PostgresML database.
17+
18+
If you're running this on EC2, make sure the instance is placed inside the same VPC as your RDS database and that the RDS database is allowed to make outbound connections to the EC2 instance.
19+
The example above starts the proxy process on port 6432, so for your security group configuration, make sure the database can make outbound connections to the EC2 instance using TCP on port 6432.
20+
21+
###Configure FDW
22+
23+
We'll be using the Foreign Data Wrapper extension to connect from your RDS database to PostgresML, forwarding the connection through the proxy. If you're running the proxy on EC2, take note of the private IP
24+
or DNS entry of the instance.
25+
26+
Before proceeding, make sure you have the following extensions installed into your RDS database:
27+
28+
```postgresql
29+
CREATE EXTENSION IF NOT EXISTS dblink;
30+
CREATE EXTENSION IF NOT EXISTS postgres_fdw;
31+
```
32+
33+
Both of these require superuser, so make sure you're running these commands with a user that has the`rds_superuser` role.
34+
35+
To create a foreign data wrapper connection, take your PostgresML host and port and replace the host with the private IP or DNS entry of the instance.
36+
37+
```postgresql
38+
CREATE SERVER postgresml
39+
FOREIGN DATA WRAPPER postgres_fdw
40+
OPTIONS (
41+
host '127.0.0.1',
42+
port '6432',
43+
dbname 'pgml'
44+
);
45+
```
46+
47+
Replace the value for`host` with the private IP or DNS entry of the EC2 instance running the proxy. Replace the`dbname` with the name of the database from your PostgresML database`DATABASE_URL`.
48+
49+
####User mapping
50+
51+
PostgresML and the proxy requires authentication. For each user that will use the connection, create a user mapping, like so:
52+
53+
```postgresql
54+
CREATE USER MAPPING
55+
FOR CURRENT_USER
56+
SERVER postgresml
57+
OPTIONS (
58+
user 'pg',
59+
password 'ml'
60+
);
61+
```
62+
63+
Replace the values for`user` and`password` with the values from your PostgresML database`DATABASE_URL`. This example contains values that will only work with our demo server and aren't suitable for production.`CURRENT_USER` is a special PostgreSQL variable that's replaced by the name of the user running the command. If you want to create this mapping for other users, replace it with the name of the user/role.
64+
65+
###Test the connection
66+
67+
To test the connection, you can use`dblink`:
68+
69+
```
70+
SELECT
71+
*
72+
FROM
73+
dblink(
74+
'postgresml',
75+
'SELECT * FROM pgml.embed(''intfloat/e5-small'', ''embed this text'') AS embedding'
76+
) AS t1(embedding real[386]);
77+
```
78+
79+
If everything is configured correctly, you should see an array of 386 floating points, your first embedding generated using PostgresML on AWS RDS. Both dblink and the proxy makes efficient use of connections, so queries will be executed as fast as the network connection allows.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
#
3+
#
4+
#
5+
set -ex
6+
7+
docker run --privileged --rm tonistiigi/binfmt --install all
8+
docker buildx create --use --name mybuilder||true
9+
docker buildx build \
10+
--platform linux/amd64,linux/arm64 \
11+
--tag ghcr.io/postgresml/pgml-rds-proxy:latest \
12+
--progress plain \
13+
--no-cache \
14+
--push \
15+
.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
#
3+
# Download the right version of pgcat for the architecture.
4+
#
5+
# Author: PostgresML <team@postgresml.org>
6+
# License: MIT
7+
#
8+
architecture=$(arch)
9+
name=$(uname)
10+
url="https://static.postgresml.org/packages/pgcat"
11+
version="$PGCAT_VERSION"
12+
bin_name="pgcat2-$version.bin"
13+
14+
if [[-z"$version" ]];then
15+
echo"PGCAT_VERSION environment variable is not set"
16+
exit 1
17+
fi
18+
19+
if [["$architecture"=="aarch64"&&"$name"=="Linux" ]];then
20+
url="${url}/arm64/$bin_name"
21+
elif [["$architecture"=="x86_64"&&"$name"=="Linux" ]];then
22+
url="${url}/amd64/$bin_name"
23+
else
24+
echo"Unsupported platform:${name}${architecture}"
25+
exit 1
26+
fi
27+
28+
echo"Downloading pgcat from$url"
29+
curl -L -o /usr/local/bin/pgcat${url}
30+
chmod +x /usr/local/bin/pgcat

‎packages/pgml-rds-proxy/run.sh‎

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
#
3+
# Configure pgcat from a DATABASE_URL environment variable and run it as PID 1.
4+
# This will regenerate the configuration file every time so modifications to it won't be saved.
5+
#
6+
# If you want to modify the configuration file, generate it first and then run pgcat with `--config <path to file>` instead.
7+
#
8+
# Author: PostgresML <team@postgresml.org>
9+
# License: MIT
10+
#
11+
exec /usr/local/bin/pgcat --database-url${DATABASE_URL}

‎pgml-sdks/pgml/Cargo.lock‎

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎pgml-sdks/pgml/Cargo.toml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ ctrlc = "3"
4545
inquire ="0.6"
4646
parking_lot ="0.12.1"
4747
once_cell ="1.19.0"
48+
url ="2.5.0"
4849

4950
[features]
5051
default = []

‎pgml-sdks/pgml/src/cli.rs‎

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use pyo3::prelude::*;
1010
use sqlx::{Acquire,Executor};
1111
use std::io::Write;
1212

13-
/// PostgresML CLI
13+
/// PostgresML CLI: configure your PostgresML deployments & create connections to remote data sources.
1414
#[cfg(feature ="python")]
1515
#[derive(Parser,Debug,Clone)]
1616
#[command(author, version, about, long_about =None, name ="pgml", bin_name ="pgml")]
@@ -97,6 +97,13 @@ enum Subcommands {
9797
#[arg(long)]
9898
database_url:Option<String>,
9999
},
100+
101+
/// Connect your database to PostgresML via dblink.
102+
Remote{
103+
/// DATABASE_URL.
104+
#[arg(long, short)]
105+
database_url:Option<String>,
106+
},
100107
}
101108

102109
enumLevel{
@@ -212,6 +219,10 @@ async fn cli_internal() -> anyhow::Result<()> {
212219
)
213220
.await?;
214221
}
222+
223+
Subcommands::Remote{ database_url} =>{
224+
remote(database_url).await?;
225+
}
215226
};
216227

217228
Ok(())
@@ -326,6 +337,49 @@ async fn connect(
326337
Ok(())
327338
}
328339

340+
asyncfnremote(database_url:Option<String>) -> anyhow::Result<()>{
341+
let database_url =user_input!(database_url,"PostgresML DATABASE_URL");
342+
let database_url = url::Url::parse(&database_url)?;
343+
let user = database_url.username();
344+
if user.is_empty(){
345+
anyhow::bail!("user not found in DATABASE_URL");
346+
}
347+
348+
let password = database_url.password();
349+
let password =if password.is_none(){
350+
anyhow::bail!("password not found in DATABASE_URL");
351+
}else{
352+
password.unwrap()
353+
};
354+
355+
let host = database_url.host_str();
356+
let host =if host.is_none(){
357+
anyhow::bail!("host not found in DATABASE_URL");
358+
}else{
359+
host.unwrap()
360+
};
361+
362+
let port = database_url.port();
363+
let port =if port.is_none(){
364+
"6432".to_string()
365+
}else{
366+
port.unwrap().to_string()
367+
};
368+
369+
let database = database_url.path().replace("/","");
370+
371+
let sql =include_str!("sql/remote.sql")
372+
.replace("{user}", user)
373+
.replace("{password}", password)
374+
.replace("{host}", host)
375+
.replace("{db_name}","postgresml")
376+
.replace("{database_name}",&database)
377+
.replace("{port}",&port);
378+
379+
println!("{}", syntax_highlight(&sql));
380+
Ok(())
381+
}
382+
329383
fnsyntax_highlight(text:&str) ->String{
330384
if !std::io::stdout().is_terminal(){
331385
return text.to_owned();

‎pgml-sdks/pgml/src/sql/remote.sql‎

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
CREATE EXTENSION IF NOT EXISTS postgres_fdw;
3+
CREATE EXTENSION IF NOT EXISTS dblink;
4+
5+
CREATE SERVER"{db_name}"
6+
FOREIGN DATA WRAPPER postgres_fdw
7+
OPTIONS (
8+
host'{host}',
9+
port'{port}',
10+
dbname'{database_name}'
11+
);
12+
13+
CREATEUSERMAPPING
14+
FORCURRENT_USER
15+
SERVER"{db_name}"
16+
OPTIONS (
17+
user'{user}',
18+
password'{password}'
19+
);
20+
21+
SELECT*FROM dblink(
22+
'{db_name}',
23+
'SELECT pgml.embed(''intfloat/e5-small'',''test postgresml embedding'') AS embedding'
24+
)AS t(embeddingreal[386]);
25+
26+
CREATEFUNCTIONpgml_embed_e5_small(text) RETURNSreal[386]AS $$
27+
SELECT*FROM dblink(
28+
'{db_name}',
29+
'SELECT pgml.embed(''intfloat/e5-small'','''|| $1||''') AS embedding'
30+
)AS t(embeddingreal[386]);
31+
$$ LANGUAGE SQL;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp