Oct 10, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 7, 2023
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md

 SQLAlchemy provides reusable tests for testing dialect implementations.

 To run these tests, assuming the environment variables needed for e2e tests are set, do the following:

 ```
 cd src/databricks/sqlalchemy
 poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \
 poetry shell
 cd src/databricks/sqlalchemy/test
 python -m pytest test_suite.py --dburi \
  "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema"
 ```

 Some of these of these tests fail currently. We're working on getting
 relavent tests passingandothers skipped.
 Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. The tests that we've already reviewed and verified
 are decorated with a pytest marker called `reviewed`. To only run these testsandcheck for regressions, you can add `-m reviewed` to the invocation command above.

 ### Code formatting

diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py
 from databricks import sql

 # This import is required to process our @compiles decorators
 import databricks.sqlalchemy.types
 import databricks.sqlalchemy._types as dialect_type_impl


 from databricks.sqlalchemy.base import (
    non_native_boolean_check_constraint: bool = False
    paramstyle: str = "named"

    colspecs = {
        sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType,
        sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType,
        sqlalchemy.types.String: dialect_type_impl.DatabricksStringType,
    }

    @classmethod
    def dbapi(cls):
        return sql
        columns = []

        for col in resp:

            # Taken from PyHive. This removes added type info from decimals and maps
            _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0)
            this_column = {
diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py
 import sqlalchemy
 from sqlalchemy.ext.compiler import compiles

 from typing import Union

 from datetime import datetime, time


 from databricks.sql.utils import ParamEscaper


 @compiles(sqlalchemy.types.Enum, "databricks")
 @compiles(sqlalchemy.types.String, "databricks")
 @compiles(sqlalchemy.types.Text, "databricks")
 @compiles(sqlalchemy.types.Time, "databricks")
 @compiles(sqlalchemy.types.Unicode, "databricks")
 @compiles(sqlalchemy.types.UnicodeText, "databricks")
 @compiles(sqlalchemy.types.Uuid, "databricks")
 def compile_string_databricks(type_, compiler, **kw):
    """
    We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy
    defaults to incompatible / abnormal compiled names

      Enum -> VARCHAR
      String -> VARCHAR[LENGTH]
      Text -> VARCHAR[LENGTH]
      Time -> TIME
      Unicode -> VARCHAR[LENGTH]
      UnicodeText -> TEXT
      Uuid -> CHAR[32]

    But all of these types will be compiled to STRING in Databricks SQL
    """
    return "STRING"


 @compiles(sqlalchemy.types.Integer, "databricks")
 def compile_integer_databricks(type_, compiler, **kw):
    """
    We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER"
    """
    return "INT"


 @compiles(sqlalchemy.types.LargeBinary, "databricks")
 def compile_binary_databricks(type_, compiler, **kw):
    """
    We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB"
    """
    return "BINARY"


 @compiles(sqlalchemy.types.Numeric, "databricks")
 def compile_numeric_databricks(type_, compiler, **kw):
    """
    We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC"

    The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric
    to the SQLAlchemy Decimal() implementation
    """
    return compiler.visit_DECIMAL(type_, **kw)


 @compiles(sqlalchemy.types.DateTime, "databricks")
 def compile_datetime_databricks(type_, compiler, **kw):
    """
    We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME"
    """
    return "TIMESTAMP_NTZ"


 @compiles(sqlalchemy.types.ARRAY, "databricks")
 def compile_array_databricks(type_, compiler, **kw):
    """
    SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql.
    The Postgres implementation works for Databricks SQL, so we duplicate that here.

    :type_:
        This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute
        which is itself an instance of TypeEngine

    https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
    """

    inner = compiler.process(type_.item_type, **kw)

    return f"ARRAY<{inner}>"


 class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator):
    """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ
    includes a timezone of 'Etc/UTC'.  But since SQLAlchemy's test suite assumes that
    the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any
    timezone set, we need to strip the timezone off the value received from pysql.

    It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug.
    """

    impl = sqlalchemy.types.DateTime

    cache_ok = True

    def process_result_value(self, value: Union[None, datetime], dialect):
        if value is None:
            return None
        return value.replace(tzinfo=None)


 class DatabricksTimeType(sqlalchemy.types.TypeDecorator):
    """Databricks has no native TIME type. So we store it as a string."""

    impl = sqlalchemy.types.Time
    cache_ok = True

    TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f"
    TIME_NO_MICROSECONDS_FMT = "%H:%M:%S"

    def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]:
        """Values sent to the database are converted to %:H:%M:%S strings."""
        if value is None:
            return None
        return value.strftime(self.TIME_WITH_MICROSECONDS_FMT)

    # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
    def process_literal_param(self, value, dialect) -> time:  # type: ignore
        """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails
        because the string literal renderer receives a str() object and calls .isoformat() on it.

        Whereas this method receives a datetime.time() object which is subsequently passed to that
        same renderer. And that works.

        UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar
        mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct
        of Databricks not having a true TIME type. I think the string representation of Time() types is
        somehow affecting the literal rendering process. But as long as this passes the tests, I'm not
        worried about it.
        """
        return value

    def process_result_value(
        self, value: Union[None, str], dialect
    ) -> Union[time, None]:
        """Values received from the database are parsed into datetime.time() objects"""
        if value is None:
            return None

        try:
            _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT)
        except ValueError:
            # If the string doesn't have microseconds, try parsing it without them
            _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT)

        return _parsed.time()


 class DatabricksStringType(sqlalchemy.types.TypeDecorator):
    """We have to implement our own String() type because SQLAlchemy's default implementation
    wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for
    escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL.
    """

    impl = sqlalchemy.types.String
    cache_ok = True
    pe = ParamEscaper()

    def process_literal_param(self, value, dialect) -> str:
        """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here
        implements the same logic as our legacy inline escaping logic.
        """

        return self.pe.escape_string(value)

    def literal_processor(self, dialect):
        """We manually override this method to prevent further processing of the string literal beyond
        what happens in the process_literal_param() method.

        The SQLAlchemy docs _specifically_ say to not override this method.

        It appears that any processing that happens from TypeEngine.process_literal_param happens _before_
        and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor()
        method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax
        error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need.

        We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong.

        See type_api.py::TypeEngine.literal_processor:

        ```python
            def process(value: Any) -> str:
                return fixed_impl_processor(
                    fixed_process_literal_param(value, dialect)
                )
        ```

        That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the
        process_literal_param defined in our Databricks dialect)

        https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor
        """

        def process(value):
            """This is a copy of the default String.literal_processor() method but stripping away
            its double-escaping behaviour for single-quotes.
            """

            _step1 = self.process_literal_param(value, dialect="databricks")
            if dialect.identifier_preparer._double_percents:
                _step2 = _step1.replace("%", "%%")
            else:
                _step2 = _step1

            return "%s" % _step2

        return process
diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py
 """
 This module is supposedly used by the compliance tests to control which tests are run based on database capabilities.
 However, based on some experimentation that does not appear to be consistently the case. Until we better understand
 when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures
 and errors.

 Once we better understand how to use requirements.py, an example exclusion will look like this:

    import sqlalchemy.testing.requirements
    import sqlalchemy.testing.exclusions

    class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
        @property
        def __some_example_requirement(self):
            return sqlalchemy.testing.exclusions.closed


 The complete list of requirements is provided by SQLAlchemy here:

 https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py

 When SQLAlchemy skips a test because a requirement is closed() it gives a generic skip message.
 To make these failures more actionable, we only define requirements in this file that we wish to
 force to be open(). If a test should be skipped on Databricks, it will be specifically marked skip
 in test_suite.py with a Databricks-specific reason.

 See the special note about the array_type exclusion below.
 """

 import sqlalchemy.testing.requirements
 import sqlalchemy.testing.exclusions

 import logging

 logger = logging.getLogger(__name__)
 class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
    @property
    def date_historic(self):
        """target dialect supports representation of Python
        datetime.datetime() objects with historic (pre 1970) values."""

 logger.warning("requirements.py is not currently employed by Databricks dialect")
        return sqlalchemy.testing.exclusions.open()

    @property
    def datetime_historic(self):
        """target dialect supports representation of Python
        datetime.datetime() objects with historic (pre 1970) values."""

 class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
    pass
        return sqlalchemy.testing.exclusions.open()

    @property
    def datetime_literals(self):
        """target dialect supports rendering of a date, time, or datetime as a
        literal string, e.g. via the TypeEngine.literal_processor() method.

        """

        return sqlalchemy.testing.exclusions.open()

    @property
    def timestamp_microseconds(self):
        """target dialect supports representation of Python
        datetime.datetime() with microsecond objects but only
        if TIMESTAMP is used."""

        return sqlalchemy.testing.exclusions.open()

    @property
    def time_microseconds(self):
        """target dialect supports representation of Python
        datetime.time() with microsecond objects.

        This requirement declaration isn't needed but I've included it here for completeness.
        Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns
        as STRING Databricks data types. And we use a custom time type to render those strings
        between str() and time.time() representations. Therefore we can store _any_ precision
        that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects
        except mssql, mysql, mariadb, and oracle.
        """

        return sqlalchemy.testing.exclusions.open()

    @property
    def infinity_floats(self):
        """The Float type can persist and load float('inf'), float('-inf')."""

        return sqlalchemy.testing.exclusions.open()

    @property
    def precision_numerics_retains_significant_digits(self):
        """A precision numeric type will return empty significant digits,
        i.e. a value such as 10.000 will come back in Decimal form with
        the .000 maintained."""

        return sqlalchemy.testing.exclusions.open()

    @property
    def precision_numerics_many_significant_digits(self):
        """target backend supports values with many digits on both sides,
        such as 319438950232418390.273596, 87673.594069654243

        """
        return sqlalchemy.testing.exclusions.open()

    @property
    def array_type(self):
        """While Databricks does support ARRAY types, pysql cannot bind them. So
        we cannot use them with SQLAlchemy

        Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the
        test runner will crash the pytest process due to an AttributeError
        """

        return sqlalchemy.testing.exclusions.closed()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -148,16 +148,15 @@ The suites marked `[not documented]` require additional configuration which will

		SQLAlchemy provides reusable tests for testing dialect implementations.

		To run these tests, assuming the environment variables needed for e2e tests are set, do the following:

		```
		cd src/databricks/sqlalchemy
		poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \
		poetry shell
		cd src/databricks/sqlalchemy/test
		python -m pytest test_suite.py --dburi \
		"databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema"
		```

		Some of these of these tests fail currently. We're working on getting
		relavent tests passingandothers skipped.
		Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. The tests that we've already reviewed and verified
		are decorated with a pytest marker called `reviewed`. To only run these testsandcheck for regressions, you can add `-m reviewed` to the invocation command above.

		### Code formatting

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,7 +13,7 @@
		from databricks import sql

		# This import is required to process our @compiles decorators
		import databricks.sqlalchemy.types
		import databricks.sqlalchemy._types as dialect_type_impl


		from databricks.sqlalchemy.base import (
Expand DownExpand Up		@@ -48,6 +48,12 @@ class DatabricksDialect(default.DefaultDialect):
		non_native_boolean_check_constraint: bool = False
		paramstyle: str = "named"

		colspecs = {
		sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType,
		sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType,
		sqlalchemy.types.String: dialect_type_impl.DatabricksStringType,
		}

		@classmethod
		def dbapi(cls):
		return sql
Expand DownExpand Up		@@ -130,7 +136,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs):
		columns = []

		for col in resp:

		# Taken from PyHive. This removes added type info from decimals and maps
		_col_type = re.search(r"^\w+", col.TYPE_NAME).group(0)
		this_column = {
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,214 @@
		import sqlalchemy
Copy link ContributorAuthor susodapopOct 9, 2023• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Note for reviewers: this PR renamed`types.py` →`_types.py`, which makes this file appear brand new. But most of the contents of the file are identical to`main`. Reviewing commit-by-commit you'll see the file moved in a dedicated commit. Subsequent changes to the file are linked to un-skipping the tests that those changes enabled to pass.
		from sqlalchemy.ext.compiler import compiles

		from typing import Union

		from datetime import datetime, time


		from databricks.sql.utils import ParamEscaper


		@compiles(sqlalchemy.types.Enum, "databricks")
		@compiles(sqlalchemy.types.String, "databricks")
		@compiles(sqlalchemy.types.Text, "databricks")
		@compiles(sqlalchemy.types.Time, "databricks")
		@compiles(sqlalchemy.types.Unicode, "databricks")
		@compiles(sqlalchemy.types.UnicodeText, "databricks")
		@compiles(sqlalchemy.types.Uuid, "databricks")
		def compile_string_databricks(type_, compiler, **kw):
		"""
		We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy
		defaults to incompatible / abnormal compiled names

		Enum -> VARCHAR
		String -> VARCHAR[LENGTH]
		Text -> VARCHAR[LENGTH]
		Time -> TIME
		Unicode -> VARCHAR[LENGTH]
		UnicodeText -> TEXT
		Uuid -> CHAR[32]

		But all of these types will be compiled to STRING in Databricks SQL
		"""
		return "STRING"


		@compiles(sqlalchemy.types.Integer, "databricks")
		def compile_integer_databricks(type_, compiler, **kw):
		"""
		We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER"
		"""
		return "INT"


		@compiles(sqlalchemy.types.LargeBinary, "databricks")
		def compile_binary_databricks(type_, compiler, **kw):
		"""
		We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB"
		"""
		return "BINARY"


		@compiles(sqlalchemy.types.Numeric, "databricks")
		def compile_numeric_databricks(type_, compiler, **kw):
		"""
		We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC"

		The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric
		to the SQLAlchemy Decimal() implementation
		"""
		return compiler.visit_DECIMAL(type_, **kw)


		@compiles(sqlalchemy.types.DateTime, "databricks")
		def compile_datetime_databricks(type_, compiler, **kw):
		"""
		We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME"
		"""
		return "TIMESTAMP_NTZ"


		@compiles(sqlalchemy.types.ARRAY, "databricks")
		def compile_array_databricks(type_, compiler, **kw):
		"""
		SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql.
		The Postgres implementation works for Databricks SQL, so we duplicate that here.

		:type_:
		This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute
		which is itself an instance of TypeEngine

		https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
		"""

		inner = compiler.process(type_.item_type, **kw)

		return f"ARRAY<{inner}>"


		class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator):
		"""The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ
		includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that
		the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any
		timezone set, we need to strip the timezone off the value received from pysql.

		It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug.
		"""

		impl = sqlalchemy.types.DateTime

		cache_ok = True

		def process_result_value(self, value: Union[None, datetime], dialect):
		if value is None:
		return None
		return value.replace(tzinfo=None)


		class DatabricksTimeType(sqlalchemy.types.TypeDecorator):
		"""Databricks has no native TIME type. So we store it as a string."""

		impl = sqlalchemy.types.Time
		cache_ok = True

		TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f"
		TIME_NO_MICROSECONDS_FMT = "%H:%M:%S"

		def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]:
		"""Values sent to the database are converted to %:H:%M:%S strings."""
		if value is None:
		return None
		return value.strftime(self.TIME_WITH_MICROSECONDS_FMT)

		# mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
		def process_literal_param(self, value, dialect) -> time: # type: ignore
		"""It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails
		because the string literal renderer receives a str() object and calls .isoformat() on it.

		Whereas this method receives a datetime.time() object which is subsequently passed to that
		same renderer. And that works.

		UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar
		mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct
		of Databricks not having a true TIME type. I think the string representation of Time() types is
		somehow affecting the literal rendering process. But as long as this passes the tests, I'm not
		worried about it.
		"""
		return value

		def process_result_value(
		self, value: Union[None, str], dialect
		) -> Union[time, None]:
		"""Values received from the database are parsed into datetime.time() objects"""
		if value is None:
		return None

		try:
		_parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT)
		except ValueError:
		# If the string doesn't have microseconds, try parsing it without them
		_parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT)

		return _parsed.time()


		class DatabricksStringType(sqlalchemy.types.TypeDecorator):
		"""We have to implement our own String() type because SQLAlchemy's default implementation
		wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for
		escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL.
		"""

		impl = sqlalchemy.types.String
		cache_ok = True
		pe = ParamEscaper()

		def process_literal_param(self, value, dialect) -> str:
		"""SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here
		implements the same logic as our legacy inline escaping logic.
		"""

		return self.pe.escape_string(value)

		def literal_processor(self, dialect):
		"""We manually override this method to prevent further processing of the string literal beyond
		what happens in the process_literal_param() method.

		The SQLAlchemy docs _specifically_ say to not override this method.
Copy link ContributorAuthor susodapopOct 9, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Note for reviewers, I checked a couple other open source dialects and we are not the only dialect that directly implements the`literal_processor` method for the String() type. I presume other dialect authors do so for the same reason that we do.

		It appears that any processing that happens from TypeEngine.process_literal_param happens _before_
		and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor()
		method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax
		error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need.

		We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong.

		See type_api.py::TypeEngine.literal_processor:

		```python
		def process(value: Any) -> str:
		return fixed_impl_processor(
		fixed_process_literal_param(value, dialect)
		)
		```

		That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the
		process_literal_param defined in our Databricks dialect)

		https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor
		"""

		def process(value):
		"""This is a copy of the default String.literal_processor() method but stripping away
		its double-escaping behaviour for single-quotes.
		"""

		_step1 = self.process_literal_param(value, dialect="databricks")
		if dialect.identifier_preparer._double_percents:
		_step2 = _step1.replace("%", "%%")
		else:
		_step2 = _step1

		return "%s" % _step2

		return process
Original file line number	Diff line number	Diff line change
		@@ -1,34 +1,96 @@
		"""
		This module is supposedly used by the compliance tests to control which tests are run based on database capabilities.
		However, based on some experimentation that does not appear to be consistently the case. Until we better understand
		when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures
		and errors.

		Once we better understand how to use requirements.py, an example exclusion will look like this:

		import sqlalchemy.testing.requirements
		import sqlalchemy.testing.exclusions

		class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
		@property
		def __some_example_requirement(self):
		return sqlalchemy.testing.exclusions.closed


Copy link ContributorAuthor susodapopOct 9, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I removed this comment since we now have a better understanding of how to use`requirements.py` to dictate which features are skipped. That said, I think our approach going forward should be to use`requirements.py` only topermit tests to run (i.e. only to include exclusions.open() calls). For the tests that we skip because of limitations of Databricks, I think we should manually call these out in`test_suite.py` and provide a more precise skip message. Because if the test is skipped because of`requirements.py`, the output from pytest simply says "test not enabled for any dialect" which could be misleading. benc-db reacted with thumbs up emoji Copy link ContributorAuthor susodapopOct 10, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I've implemented this fix in`b31eef4` and`8200e23` 👍
		The complete list of requirements is provided by SQLAlchemy here:

		https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py

		When SQLAlchemy skips a test because a requirement is closed() it gives a generic skip message.
		To make these failures more actionable, we only define requirements in this file that we wish to
		force to be open(). If a test should be skipped on Databricks, it will be specifically marked skip
		in test_suite.py with a Databricks-specific reason.

		See the special note about the array_type exclusion below.
		"""

		import sqlalchemy.testing.requirements
		import sqlalchemy.testing.exclusions

		import logging

		logger = logging.getLogger(__name__)
		class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
		@property
		def date_historic(self):
		"""target dialect supports representation of Python
		datetime.datetime() objects with historic (pre 1970) values."""

		logger.warning("requirements.py is not currently employed by Databricks dialect")
		return sqlalchemy.testing.exclusions.open()

		@property
		def datetime_historic(self):
		"""target dialect supports representation of Python
		datetime.datetime() objects with historic (pre 1970) values."""

		class Requirements(sqlalchemy.testing.requirements.SuiteRequirements):
		pass
		return sqlalchemy.testing.exclusions.open()

		@property
		def datetime_literals(self):
		"""target dialect supports rendering of a date, time, or datetime as a
		literal string, e.g. via the TypeEngine.literal_processor() method.

		"""

		return sqlalchemy.testing.exclusions.open()

		@property
		def timestamp_microseconds(self):
		"""target dialect supports representation of Python
Copy link ContributorAuthor susodapopOct 9, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Note for reviewers: the docstrings in this file are duplicated directly from SQLAlchemy's base`requirements.py` file. Any additional commentary specific to Databricks appears at the end of the docstring.
		datetime.datetime() with microsecond objects but only
		if TIMESTAMP is used."""

		return sqlalchemy.testing.exclusions.open()

		@property
		def time_microseconds(self):
		"""target dialect supports representation of Python
		datetime.time() with microsecond objects.

		This requirement declaration isn't needed but I've included it here for completeness.
		Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns
		as STRING Databricks data types. And we use a custom time type to render those strings
		between str() and time.time() representations. Therefore we can store _any_ precision
		that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects
		except mssql, mysql, mariadb, and oracle.
		"""

		return sqlalchemy.testing.exclusions.open()

		@property
		def infinity_floats(self):
		"""The Float type can persist and load float('inf'), float('-inf')."""

		return sqlalchemy.testing.exclusions.open()

		@property
		def precision_numerics_retains_significant_digits(self):
		"""A precision numeric type will return empty significant digits,
		i.e. a value such as 10.000 will come back in Decimal form with
		the .000 maintained."""

		return sqlalchemy.testing.exclusions.open()

		@property
		def precision_numerics_many_significant_digits(self):
		"""target backend supports values with many digits on both sides,
		such as 319438950232418390.273596, 87673.594069654243

		"""
		return sqlalchemy.testing.exclusions.open()

		@property
		def array_type(self):
		"""While Databricks does support ARRAY types, pysql cannot bind them. So
		we cannot use them with SQLAlchemy

		Due to a bug in SQLAlchemy, we _must_ define this exclusion as closed() here or else the
		test runner will crash the pytest process due to an AttributeError
		"""

		return sqlalchemy.testing.exclusions.closed()