Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Native Parameters: reintroduce INLINE approach with tests#267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
susodapop merged 14 commits intomainfromfix-params
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
14 commits
Select commitHold shift + click to select a range
9d88c51
Strip out protocol check and add use_inline_params config for the
Oct 30, 2023
3c70e1b
Move parameter tests into a dedicated class rather than a mixin
Oct 30, 2023
f40e221
Update all e2e tests per guidance from YD.
Oct 30, 2023
316cae3
Black the codebase
Oct 30, 2023
148e76b
Update docstring
Oct 30, 2023
f0e1a51
Update sqlalchemy dialect to always use NATIVE ParamApproach, as this
Oct 30, 2023
52f1167
Update use_inline_params so that a warning log is emitted if inline
Oct 31, 2023
395ef73
Add protocol patch to our local e2e tests as well.
Oct 31, 2023
8a8bf1f
Fix some types
Oct 31, 2023
4f9138d
After PR review: move ParameterApproach.NONE calculation into
Oct 31, 2023
5312acb
Add docstring per PR review
Oct 31, 2023
31a0463
Black source files
Oct 31, 2023
bf78371
Update comment for clarity
Oct 31, 2023
ba848b4
Black source files
Nov 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletionsdocs/parameters.md
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
`<placeholder>`
5 changes: 4 additions & 1 deletionsrc/databricks/sql/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -5,7 +5,10 @@
# PEP 249 module globals
apilevel="2.0"
threadsafety=1# Threads may share the module, but not connections.
paramstyle="named"# Python extended format codes, e.g. ...WHERE name=%(name)s

# Python extended format codes, e.g. ...WHERE name=%(name)s
# Note that when we switch to ParameterApproach.NATIVE, paramstyle will be `named`
paramstyle="pyformat"


classDBAPITypeObject(object):
Expand Down
168 changes: 151 additions & 17 deletionssrc/databricks/sql/client.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -19,16 +19,25 @@
ExecuteResponse,
ParamEscaper,
named_parameters_to_tsparkparams,
inject_parameters,
ParameterApproach,
)
from databricks.sql.types import Row
from databricks.sql.auth.auth import get_python_sql_connector_auth_provider
from databricks.sql.experimental.oauth_persistence import OAuthPersistence

from databricks.sql.thrift_api.TCLIService.ttypes import (
TSparkParameter,
)


logger = logging.getLogger(__name__)

DEFAULT_RESULT_BUFFER_SIZE_BYTES = 104857600
DEFAULT_ARRAY_SIZE = 100000

NO_NATIVE_PARAMS: List = []


class Connection:
def __init__(
Expand DownExpand Up@@ -65,6 +74,12 @@ def __init__(
:param schema: An optional initial schema to use. Requires DBR version 9.0+

Other Parameters:
use_inline_params: `boolean`, optional (default is True)
When True, parameterized calls to cursor.execute() will try to render parameter values inline with the
query text instead of using native bound parameters supported in DBR 14.1 and above. This connector will attempt to
sanitise parameterized inputs to prevent SQL injection. Before you can switch this to False, you must
update your queries to use the PEP-249 `named` paramstyle instead of the `pyformat` paramstyle used
in INLINE mode.
auth_type: `str`, optional
`databricks-oauth` : to use oauth with fine-grained permission scopes, set to `databricks-oauth`.
This is currently in private preview for Databricks accounts on AWS.
Expand DownExpand Up@@ -207,6 +222,9 @@ def read(self) -> Optional[OAuthToken]:
logger.info("Successfully opened session " + str(self.get_session_id_hex()))
self._cursors = [] # type: List[Cursor]

self._suppress_inline_warning = "use_inline_params" in kwargs
self.use_inline_params = kwargs.get("use_inline_params", True)

def __enter__(self):
return self

Expand DownExpand Up@@ -358,6 +376,100 @@ def __iter__(self):
else:
raise Error("There is no active result set")

def _determine_parameter_approach(
self, params: Optional[Union[List, Dict[str, Any]]] = None
) -> ParameterApproach:
"""Encapsulates the logic for choosing whether to send parameters in native vs inline mode

If params is None then ParameterApproach.NONE is returned.
If self.use_inline_params is True then inline mode is used.
If self.use_inline_params is False, then check if the server supports them and proceed.
Else raise an exception.

Returns a ParameterApproach enumeration or raises an exception

If inline approach is used when the server supports native approach, a warning is logged
"""

if params is None:
return ParameterApproach.NONE

server_supports_native_approach = (
self.connection.server_parameterized_queries_enabled(
self.connection.protocol_version
)
)

if self.connection.use_inline_params:
if (
server_supports_native_approach
and not self.connection._suppress_inline_warning
):
logger.warning(
"This query will be executed with inline parameters."
"Consider using native parameters."
"Learn more: https://github.com/databricks/databricks-sql-python/tree/main/docs/parameters.md"
"To suppress this warning, pass use_inline_params=True when creating the connection."
)
return ParameterApproach.INLINE

elif server_supports_native_approach:
return ParameterApproach.NATIVE
else:
raise NotSupportedError(
"Parameterized operations are not supported by this server. DBR 14.1 is required."
)

def _prepare_inline_parameters(
self, stmt: str, params: Optional[Union[List, Dict[str, Any]]]
) -> Tuple[str, List]:
"""Return a statement and list of native parameters to be passed to thrift_backend for execution

:stmt:
A string SQL query containing parameter markers of PEP-249 paramstyle `pyformat`.
For example `%(param)s`.

:params:
An iterable of parameter values to be rendered inline. If passed as a Dict, the keys
must match the names of the markers included in :stmt:. If passed as a List, its length
must equal the count of parameter markers in :stmt:.

Returns a tuple of:
stmt: the passed statement with the param markers replaced by literal rendered values
params: an empty list representing the native parameters to be passed with this query.
The list is always empty because native parameters are never used under the inline approach
"""

escaped_values = self.escaper.escape_args(params)
rendered_statement = inject_parameters(stmt, escaped_values)

return rendered_statement, NO_NATIVE_PARAMS

def _prepare_native_parameters(
self, stmt: str, params: Optional[Union[List[Any], Dict[str, Any]]]
) -> Tuple[str, List[TSparkParameter]]:
"""Return a statement and a list of native parameters to be passed to thrift_backend for execution

:stmt:
A string SQL query containing parameter markers of PEP-249 paramstyle `named`.
For example `:param`.

:params:
An iterable of parameter values to be sent natively. If passed as a Dict, the keys
must match the names of the markers included in :stmt:. If passed as a List, its length
must equal the count of parameter markers in :stmt:. In list form, any member of the list
can be wrapped in a DbsqlParameter class.

Returns a tuple of:
stmt: the passed statement` with the param markers replaced by literal rendered values
params: a list of TSparkParameters that will be passed in native mode
"""

stmt = stmt
params = named_parameters_to_tsparkparams(params) # type: ignore

return stmt, params

def _close_and_clear_active_result_set(self):
try:
if self.active_result_set:
Expand DownExpand Up@@ -515,40 +627,62 @@ def _handle_staging_remove(self, presigned_url: str, headers: dict = None):
def execute(
self,
operation: str,
parameters: Optional[Union[List[Any], Dict[str,str]]] = None,
parameters: Optional[Union[List[Any], Dict[str,Any]]] = None,
) -> "Cursor":
"""
Execute a query and wait for execution to complete.
Parameters should be given in extended param format style: %(...)<s|d|f>.
For example:
operation = "SELECT * FROM table WHERE field = %(some_value)s"
parameters = {"some_value": "foo"}
Will result in the query "SELECT * FROM table WHERE field = 'foo' being sent to the server

The parameterisation behaviour of this method depends on which parameter approach is used:
- With INLINE mode (default), parameters are rendered inline with the query text
- With NATIVE mode, parameters are sent to the server separately for binding

This behaviour is controlled by the `use_inline_params` argument passed when building a connection.

The syntax for these approaches is different:

If the connection was instantiated with use_inline_params=False, then parameters
should be given in PEP-249 `named` paramstyle like :param_name

If the connection was instantiated with use_inline_params=True (default), then parameters
should be given in PEP-249 `pyformat` paramstyle like %(param_name)s

```python
inline_operation = "SELECT * FROM table WHERE field = %(some_value)s"
native_operation = "SELECT * FROM table WHERE field = :some_value"
parameters = {"some_value": "foo"}
```

Both will result in the query equivalent to "SELECT * FROM table WHERE field = 'foo'
being sent to the server

:returns self
"""
if parameters is None:
parameters = []

elif not Connection.server_parameterized_queries_enabled(
self.connection.protocol_version
):
raise NotSupportedError(
"Parameterized operations are not supported by this server. DBR 14.1 is required."
param_approach = self._determine_parameter_approach(parameters)
if param_approach == ParameterApproach.NONE:
prepared_params = NO_NATIVE_PARAMS
prepared_operation = operation

elif param_approach == ParameterApproach.INLINE:
prepared_operation, prepared_params = self._prepare_inline_parameters(
operation, parameters
)
elif param_approach == ParameterApproach.NATIVE:
prepared_operation, prepared_params = self._prepare_native_parameters(
operation, parameters
)
else:
parameters = named_parameters_to_tsparkparams(parameters)

self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.execute_command(
operation=operation,
operation=prepared_operation,
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
lz4_compression=self.connection.lz4_compression,
cursor=self,
use_cloud_fetch=self.connection.use_cloud_fetch,
parameters=parameters,
parameters=prepared_params,
)
self.active_result_set = ResultSet(
self.connection,
Expand Down
10 changes: 9 additions & 1 deletionsrc/databricks/sql/utils.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -25,6 +25,12 @@
BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]


class ParameterApproach(Enum):
INLINE = 1
NATIVE = 2
NONE = 3


class ResultSetQueue(ABC):
@abstractmethod
def next_n_rows(self, num_rows: int) -> pyarrow.Table:
Expand DownExpand Up@@ -627,7 +633,9 @@ def calculate_decimal_cast_string(input: Decimal) -> str:
return f"DECIMAL({overall},{after})"


def named_parameters_to_tsparkparams(parameters: Union[List[Any], Dict[str, str]]):
def named_parameters_to_tsparkparams(
parameters: Union[List[Any], Dict[str, str]]
) -> List[TSparkParameter]:
tspark_params = []
if isinstance(parameters, dict):
dbsql_params = named_parameters_to_dbsqlparams_v1(parameters)
Expand Down
20 changes: 19 additions & 1 deletionsrc/databricks/sqlalchemy/base.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -59,7 +59,7 @@ class DatabricksDialect(default.DefaultDialect):
non_native_boolean_check_constraint: bool = False
supports_identity_columns: bool = True
supports_schemas: bool = True
paramstyle: str = "named"
default_paramstyle: str = "named"
div_is_floordiv: bool = False
supports_default_values: bool = False
supports_server_side_cursors: bool = False
Expand All@@ -85,6 +85,21 @@ class DatabricksDialect(default.DefaultDialect):
def dbapi(cls):
return sql

def _force_paramstyle_to_native_mode(self):
"""This method can be removed after databricks-sql-connector wholly switches to NATIVE ParamApproach.

This is a hack to trick SQLAlchemy into using a different paramstyle
than the one declared by this module in src/databricks/sql/__init__.py

This method is called _after_ the dialect has been initialised, which is important because otherwise
our users would need to include a `paramstyle` argument in their SQLAlchemy connection string.

This dialect is written to support NATIVE queries. Although the INLINE approach can technically work,
the same behaviour can be achieved within SQLAlchemy itself using its literal_processor methods.
"""

self.paramstyle = self.default_paramstyle

def create_connect_args(self, url):
# TODO: can schema be provided after HOST?
# Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
Expand All@@ -95,11 +110,14 @@ def create_connect_args(self, url):
"http_path": url.query.get("http_path"),
"catalog": url.query.get("catalog"),
"schema": url.query.get("schema"),
"use_inline_params": False,
}

self.schema = kwargs["schema"]
self.catalog = kwargs["catalog"]

self._force_paramstyle_to_native_mode()

return [], kwargs

def get_columns(
Expand Down
14 changes: 14 additions & 0 deletionssrc/databricks/sqlalchemy/test/test_suite.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -3,6 +3,20 @@
then are overridden by our local skip markers in _regression, _unsupported, and _future.
"""


def start_protocol_patch():
"""See tests/test_parameterized_queries.py for more information about this patch."""
from unittest.mock import patch

native_support_patcher = patch(
"databricks.sql.client.Connection.server_parameterized_queries_enabled",
return_value=True,
)
native_support_patcher.start()


start_protocol_patch()

# type: ignore
# fmt: off
from sqlalchemy.testing.suite import *
Expand Down
4 changes: 4 additions & 0 deletionssrc/databricks/sqlalchemy/test_local/e2e/test_basic.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -22,6 +22,10 @@
exceptImportError:
fromsqlalchemy.ext.declarativeimportdeclarative_base

fromdatabricks.sqlalchemy.test.test_suiteimportstart_protocol_patch

start_protocol_patch()


USER_AGENT_TOKEN="PySQL e2e Tests"

Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp