11import enum
22
33import pytest
4- from sqlalchemy .types import (
5- BigInteger ,
6- Boolean ,
7- Date ,
8- DateTime ,
9- Double ,
10- Enum ,
11- Float ,
12- Integer ,
13- Interval ,
14- LargeBinary ,
15- MatchType ,
16- Numeric ,
17- PickleType ,
18- SchemaType ,
19- SmallInteger ,
20- String ,
21- Text ,
22- Time ,
23- TypeEngine ,
24- Unicode ,
25- UnicodeText ,
26- Uuid ,
27- )
4+ import sqlalchemy
285
296from databricks .sqlalchemy import DatabricksDialect
307
@@ -55,43 +32,49 @@ class DatabricksDataType(enum.Enum):
5532# Defines the way that SQLAlchemy CamelCase types are compiled into Databricks SQL types.
5633# Note: I wish I could define this within the TestCamelCaseTypesCompilation class, but pytest doesn't like that.
5734camel_case_type_map = {
58- BigInteger :DatabricksDataType .BIGINT ,
59- LargeBinary :DatabricksDataType .BINARY ,
60- Boolean :DatabricksDataType .BOOLEAN ,
61- Date :DatabricksDataType .DATE ,
62- DateTime :DatabricksDataType .TIMESTAMP ,
63- Double :DatabricksDataType .DOUBLE ,
64- Enum :DatabricksDataType .STRING ,
65- Float :DatabricksDataType .FLOAT ,
66- Integer :DatabricksDataType .INT ,
67- Interval :DatabricksDataType .TIMESTAMP ,
68- Numeric :DatabricksDataType .DECIMAL ,
69- PickleType :DatabricksDataType .BINARY ,
70- SmallInteger :DatabricksDataType .SMALLINT ,
71- String :DatabricksDataType .STRING ,
72- Text :DatabricksDataType .STRING ,
73- Time :DatabricksDataType .STRING ,
74- Unicode :DatabricksDataType .STRING ,
75- UnicodeText :DatabricksDataType .STRING ,
76- Uuid :DatabricksDataType .STRING ,
35+ sqlalchemy . types . BigInteger :DatabricksDataType .BIGINT ,
36+ sqlalchemy . types . LargeBinary :DatabricksDataType .BINARY ,
37+ sqlalchemy . types . Boolean :DatabricksDataType .BOOLEAN ,
38+ sqlalchemy . types . Date :DatabricksDataType .DATE ,
39+ sqlalchemy . types . DateTime :DatabricksDataType .TIMESTAMP ,
40+ sqlalchemy . types . Double :DatabricksDataType .DOUBLE ,
41+ sqlalchemy . types . Enum :DatabricksDataType .STRING ,
42+ sqlalchemy . types . Float :DatabricksDataType .FLOAT ,
43+ sqlalchemy . types . Integer :DatabricksDataType .INT ,
44+ sqlalchemy . types . Interval :DatabricksDataType .TIMESTAMP ,
45+ sqlalchemy . types . Numeric :DatabricksDataType .DECIMAL ,
46+ sqlalchemy . types . PickleType :DatabricksDataType .BINARY ,
47+ sqlalchemy . types . SmallInteger :DatabricksDataType .SMALLINT ,
48+ sqlalchemy . types . String :DatabricksDataType .STRING ,
49+ sqlalchemy . types . Text :DatabricksDataType .STRING ,
50+ sqlalchemy . types . Time :DatabricksDataType .STRING ,
51+ sqlalchemy . types . Unicode :DatabricksDataType .STRING ,
52+ sqlalchemy . types . UnicodeText :DatabricksDataType .STRING ,
53+ sqlalchemy . types . Uuid :DatabricksDataType .STRING ,
7754}
7855
79- # Convert the dictionary into a list of tuples for use in pytest.mark.parametrize
80- _as_tuple_list = [(key ,value )for key ,value in camel_case_type_map .items ()]
56+
57+ def dict_as_tuple_list (d :dict ):
58+ """Return a list of [(key, value), ...] from a dictionary."""
59+ return [(key ,value )for key ,value in d .items ()]
8160
8261
8362class CompilationTestBase :
8463dialect = DatabricksDialect ()
8564
86- def _assert_compiled_value (self ,type_ :TypeEngine ,expected :DatabricksDataType ):
65+ def _assert_compiled_value (
66+ self ,type_ :sqlalchemy .types .TypeEngine ,expected :DatabricksDataType
67+ ):
8768"""Assert that when type_ is compiled for the databricks dialect, it renders the DatabricksDataType name.
8869
8970 This method initialises the type_ with no arguments.
9071 """
9172compiled_result = type_ ().compile (dialect = self .dialect )# type: ignore
9273assert compiled_result == expected .name
9374
94- def _assert_compiled_value_explicit (self ,type_ :TypeEngine ,expected :str ):
75+ def _assert_compiled_value_explicit (
76+ self ,type_ :sqlalchemy .types .TypeEngine ,expected :str
77+ ):
9578"""Assert that when type_ is compiled for the databricks dialect, it renders the expected string.
9679
9780 This method expects an initialised type_ so that we can test how a TypeEngine created with arguments
@@ -117,12 +100,57 @@ class TestCamelCaseTypesCompilation(CompilationTestBase):
117100 [1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#generic-camelcase-types
118101 """
119102
120- @pytest .mark .parametrize ("type_, expected" ,_as_tuple_list )
103+ @pytest .mark .parametrize ("type_, expected" ,dict_as_tuple_list ( camel_case_type_map ) )
121104def test_bare_camel_case_types_compile (self ,type_ ,expected ):
122105self ._assert_compiled_value (type_ ,expected )
123106
124107def test_numeric_renders_as_decimal_with_precision (self ):
125- self ._assert_compiled_value_explicit (Numeric (10 ),"DECIMAL(10)" )
108+ self ._assert_compiled_value_explicit (
109+ sqlalchemy .types .Numeric (10 ),"DECIMAL(10)"
110+ )
126111
127112def test_numeric_renders_as_decimal_with_precision_and_scale (self ):
128- self ._assert_compiled_value_explicit (Numeric (10 ,2 ),"DECIMAL(10, 2)" )
113+ return self ._assert_compiled_value_explicit (
114+ sqlalchemy .types .Numeric (10 ,2 ),"DECIMAL(10, 2)"
115+ )
116+
117+
118+ uppercase_type_map = {
119+ sqlalchemy .types .ARRAY :DatabricksDataType .ARRAY ,
120+ sqlalchemy .types .BIGINT :DatabricksDataType .BIGINT ,
121+ sqlalchemy .types .BINARY :DatabricksDataType .BINARY ,
122+ sqlalchemy .types .BOOLEAN :DatabricksDataType .BOOLEAN ,
123+ sqlalchemy .types .DATE :DatabricksDataType .DATE ,
124+ sqlalchemy .types .DECIMAL :DatabricksDataType .DECIMAL ,
125+ sqlalchemy .types .DOUBLE :DatabricksDataType .DOUBLE ,
126+ sqlalchemy .types .FLOAT :DatabricksDataType .FLOAT ,
127+ sqlalchemy .types .INT :DatabricksDataType .INT ,
128+ sqlalchemy .types .SMALLINT :DatabricksDataType .SMALLINT ,
129+ sqlalchemy .types .TIMESTAMP :DatabricksDataType .TIMESTAMP ,
130+ }
131+
132+
133+ class TestUppercaseTypesCompilation (CompilationTestBase ):
134+ """Per the sqlalchemy documentation[^1], uppercase types are considered to be specific to some
135+ database backends. These tests verify that the types compile into valid Databricks SQL type strings.
136+
137+ [1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#backend-specific-uppercase-datatypes
138+ """
139+
140+ @pytest .mark .parametrize ("type_, expected" ,dict_as_tuple_list (uppercase_type_map ))
141+ def test_bare_uppercase_types_compile (self ,type_ ,expected ):
142+ if isinstance (type_ ,type (sqlalchemy .types .ARRAY )):
143+ # ARRAY cannot be initialised without passing an item definition so we test separately
144+ # I preserve it in the uppercase_type_map for clarity
145+ return True
146+ return self ._assert_compiled_value (type_ ,expected )
147+
148+ def test_array_string_renders_as_array_of_string (self ):
149+ """SQLAlchemy's ARRAY type requires an item definition. And their docs indicate that they've only tested
150+ it with Postgres since that's the only first-class dialect with support for ARRAY.
151+
152+ https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
153+ """
154+ return self ._assert_compiled_value_explicit (
155+ sqlalchemy .types .ARRAY (sqlalchemy .types .String ),"ARRAY<STRING>"
156+ )