- Notifications
You must be signed in to change notification settings - Fork126
SQLAlchemy 2: Stop skipping all type tests#242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
8c05fd8681593575368a5da9b9f2b7afd1bf4a59f5c817509d3a76a4d4afb60ab8e28faf1a49e53eef60a56b083aa1cae34671ff977221c2bc0495de233058214523b80ae0ddb31eef4c8332da8200e2312d081b0d5be1cFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,214 @@ | ||
| import sqlalchemy | ||
ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Note for reviewers: this PR renamed | ||
| from sqlalchemy.ext.compiler import compiles | ||
| from typing import Union | ||
| from datetime import datetime, time | ||
| from databricks.sql.utils import ParamEscaper | ||
| @compiles(sqlalchemy.types.Enum, "databricks") | ||
| @compiles(sqlalchemy.types.String, "databricks") | ||
| @compiles(sqlalchemy.types.Text, "databricks") | ||
| @compiles(sqlalchemy.types.Time, "databricks") | ||
| @compiles(sqlalchemy.types.Unicode, "databricks") | ||
| @compiles(sqlalchemy.types.UnicodeText, "databricks") | ||
| @compiles(sqlalchemy.types.Uuid, "databricks") | ||
| def compile_string_databricks(type_, compiler, **kw): | ||
| """ | ||
| We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy | ||
| defaults to incompatible / abnormal compiled names | ||
| Enum -> VARCHAR | ||
| String -> VARCHAR[LENGTH] | ||
| Text -> VARCHAR[LENGTH] | ||
| Time -> TIME | ||
| Unicode -> VARCHAR[LENGTH] | ||
| UnicodeText -> TEXT | ||
| Uuid -> CHAR[32] | ||
| But all of these types will be compiled to STRING in Databricks SQL | ||
| """ | ||
| return "STRING" | ||
| @compiles(sqlalchemy.types.Integer, "databricks") | ||
| def compile_integer_databricks(type_, compiler, **kw): | ||
| """ | ||
| We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER" | ||
| """ | ||
| return "INT" | ||
| @compiles(sqlalchemy.types.LargeBinary, "databricks") | ||
| def compile_binary_databricks(type_, compiler, **kw): | ||
| """ | ||
| We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB" | ||
| """ | ||
| return "BINARY" | ||
| @compiles(sqlalchemy.types.Numeric, "databricks") | ||
| def compile_numeric_databricks(type_, compiler, **kw): | ||
| """ | ||
| We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC" | ||
| The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric | ||
| to the SQLAlchemy Decimal() implementation | ||
| """ | ||
| return compiler.visit_DECIMAL(type_, **kw) | ||
| @compiles(sqlalchemy.types.DateTime, "databricks") | ||
| def compile_datetime_databricks(type_, compiler, **kw): | ||
| """ | ||
| We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" | ||
| """ | ||
| return "TIMESTAMP_NTZ" | ||
| @compiles(sqlalchemy.types.ARRAY, "databricks") | ||
| def compile_array_databricks(type_, compiler, **kw): | ||
| """ | ||
| SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql. | ||
| The Postgres implementation works for Databricks SQL, so we duplicate that here. | ||
| :type_: | ||
| This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute | ||
| which is itself an instance of TypeEngine | ||
| https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY | ||
| """ | ||
| inner = compiler.process(type_.item_type, **kw) | ||
| return f"ARRAY<{inner}>" | ||
| class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): | ||
| """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ | ||
| includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that | ||
| the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any | ||
| timezone set, we need to strip the timezone off the value received from pysql. | ||
| It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. | ||
| """ | ||
| impl = sqlalchemy.types.DateTime | ||
| cache_ok = True | ||
| def process_result_value(self, value: Union[None, datetime], dialect): | ||
| if value is None: | ||
| return None | ||
| return value.replace(tzinfo=None) | ||
| class DatabricksTimeType(sqlalchemy.types.TypeDecorator): | ||
| """Databricks has no native TIME type. So we store it as a string.""" | ||
| impl = sqlalchemy.types.Time | ||
| cache_ok = True | ||
| TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" | ||
| TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" | ||
| def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]: | ||
| """Values sent to the database are converted to %:H:%M:%S strings.""" | ||
| if value is None: | ||
| return None | ||
| return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) | ||
| # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string | ||
| def process_literal_param(self, value, dialect) -> time: # type: ignore | ||
| """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails | ||
| because the string literal renderer receives a str() object and calls .isoformat() on it. | ||
| Whereas this method receives a datetime.time() object which is subsequently passed to that | ||
| same renderer. And that works. | ||
| UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar | ||
| mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct | ||
| of Databricks not having a true TIME type. I think the string representation of Time() types is | ||
| somehow affecting the literal rendering process. But as long as this passes the tests, I'm not | ||
| worried about it. | ||
| """ | ||
| return value | ||
| def process_result_value( | ||
| self, value: Union[None, str], dialect | ||
| ) -> Union[time, None]: | ||
| """Values received from the database are parsed into datetime.time() objects""" | ||
| if value is None: | ||
| return None | ||
| try: | ||
| _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) | ||
| except ValueError: | ||
| # If the string doesn't have microseconds, try parsing it without them | ||
| _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) | ||
| return _parsed.time() | ||
| class DatabricksStringType(sqlalchemy.types.TypeDecorator): | ||
| """We have to implement our own String() type because SQLAlchemy's default implementation | ||
| wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for | ||
| escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL. | ||
| """ | ||
| impl = sqlalchemy.types.String | ||
| cache_ok = True | ||
| pe = ParamEscaper() | ||
| def process_literal_param(self, value, dialect) -> str: | ||
| """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here | ||
| implements the same logic as our legacy inline escaping logic. | ||
| """ | ||
| return self.pe.escape_string(value) | ||
| def literal_processor(self, dialect): | ||
| """We manually override this method to prevent further processing of the string literal beyond | ||
| what happens in the process_literal_param() method. | ||
| The SQLAlchemy docs _specifically_ say to not override this method. | ||
ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Note for reviewers, I checked a couple other open source dialects and we are not the only dialect that directly implements the | ||
| It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ | ||
| and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor() | ||
| method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax | ||
| error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. | ||
| We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. | ||
| See type_api.py::TypeEngine.literal_processor: | ||
| ```python | ||
| def process(value: Any) -> str: | ||
| return fixed_impl_processor( | ||
| fixed_process_literal_param(value, dialect) | ||
| ) | ||
| ``` | ||
| That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the | ||
| process_literal_param defined in our Databricks dialect) | ||
| https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor | ||
| """ | ||
| def process(value): | ||
| """This is a copy of the default String.literal_processor() method but stripping away | ||
| its double-escaping behaviour for single-quotes. | ||
| """ | ||
| _step1 = self.process_literal_param(value, dialect="databricks") | ||
| if dialect.identifier_preparer._double_percents: | ||
| _step2 = _step1.replace("%", "%%") | ||
| else: | ||
| _step2 = _step1 | ||
| return "%s" % _step2 | ||
| return process | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,34 +1,96 @@ | ||
| """ | ||
ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. I removed this comment since we now have a better understanding of how to use That said, I think our approach going forward should be to use ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. | ||
| The complete list of requirements is provided by SQLAlchemy here: | ||
| https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py | ||
| When SQLAlchemy skips a test because a requirement is closed() it gives a generic skip message. | ||
| To make these failures more actionable, we only define requirements in this file that we wish to | ||
| force to be open(). If a test should be skipped on Databricks, it will be specifically marked skip | ||
| in test_suite.py with a Databricks-specific reason. | ||
| See the special note about the array_type exclusion below. | ||
| """ | ||
| import sqlalchemy.testing.requirements | ||
| import sqlalchemy.testing.exclusions | ||
| class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): | ||
| @property | ||
| def date_historic(self): | ||
| """target dialect supports representation of Python | ||
| datetime.datetime() objects with historic (pre 1970) values.""" | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def datetime_historic(self): | ||
| """target dialect supports representation of Python | ||
| datetime.datetime() objects with historic (pre 1970) values.""" | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def datetime_literals(self): | ||
| """target dialect supports rendering of a date, time, or datetime as a | ||
| literal string, e.g. via the TypeEngine.literal_processor() method. | ||
| """ | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def timestamp_microseconds(self): | ||
| """target dialect supports representation of Python | ||
ContributorAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Note for reviewers: the docstrings in this file are duplicated directly from SQLAlchemy's base | ||
| datetime.datetime() with microsecond objects but only | ||
| if TIMESTAMP is used.""" | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def time_microseconds(self): | ||
| """target dialect supports representation of Python | ||
| datetime.time() with microsecond objects. | ||
| This requirement declaration isn't needed but I've included it here for completeness. | ||
| Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns | ||
| as STRING Databricks data types. And we use a custom time type to render those strings | ||
| between str() and time.time() representations. Therefore we can store _any_ precision | ||
| that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects | ||
| except mssql, mysql, mariadb, and oracle. | ||
| """ | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def infinity_floats(self): | ||
| """The Float type can persist and load float('inf'), float('-inf').""" | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def precision_numerics_retains_significant_digits(self): | ||
| """A precision numeric type will return empty significant digits, | ||
| i.e. a value such as 10.000 will come back in Decimal form with | ||
| the .000 maintained.""" | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def precision_numerics_many_significant_digits(self): | ||
| """target backend supports values with many digits on both sides, | ||
| such as 319438950232418390.273596, 87673.594069654243 | ||
| """ | ||
| return sqlalchemy.testing.exclusions.open() | ||
| @property | ||
| def array_type(self): | ||
| """While Databricks does support ARRAY types, pysql cannot bind them. So | ||
| we cannot use them with SQLAlchemy | ||
| Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the | ||
| test runner will crash the pytest process due to an AttributeError | ||
| """ | ||
| return sqlalchemy.testing.exclusions.closed() | ||
Uh oh!
There was an error while loading.Please reload this page.