Commit51c99cb

Jesse

authored and

saishreeeee

committed

SQLAlchemy 2: add type compilation for uppercase types (#240)

Signed-off-by: Jesse Whitehouse <jesse.whitehouse@databricks.com>Signed-off-by: Sai Shree Pradhan <saishree.pradhan@databricks.com>

1 parentd4db4fa commit51c99cbCopy full SHA for 51c99cb

File tree

2 files changed

+110

-77

lines changed

src/databricks/sqlalchemy
- test_local
  - test_types.py
- types.py

2 files changed

+110

-77

lines changed

`‎src/databricks/sqlalchemy/test_local/test_types.py‎`

Lines changed: 78 additions & 50 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,30 +1,7 @@`
`1`	`1`	`importenum`
`2`	`2`
`3`	`3`	`importpytest`
`4`		`-fromsqlalchemy.typesimport (`
`5`		`-BigInteger,`
`6`		`-Boolean,`
`7`		`-Date,`
`8`		`-DateTime,`
`9`		`-Double,`
`10`		`-Enum,`
`11`		`-Float,`
`12`		`-Integer,`
`13`		`-Interval,`
`14`		`-LargeBinary,`
`15`		`-MatchType,`
`16`		`-Numeric,`
`17`		`-PickleType,`
`18`		`-SchemaType,`
`19`		`-SmallInteger,`
`20`		`-String,`
`21`		`-Text,`
`22`		`-Time,`
`23`		`-TypeEngine,`
`24`		`-Unicode,`
`25`		`-UnicodeText,`
`26`		`-Uuid,`
`27`		`-)`
	`4`	`+importsqlalchemy`
`28`	`5`
`29`	`6`	`fromdatabricks.sqlalchemyimportDatabricksDialect`
`30`	`7`
`@@ -55,43 +32,49 @@ class DatabricksDataType(enum.Enum):`
`55`	`32`	`# Defines the way that SQLAlchemy CamelCase types are compiled into Databricks SQL types.`
`56`	`33`	`# Note: I wish I could define this within the TestCamelCaseTypesCompilation class, but pytest doesn't like that.`
`57`	`34`	`camel_case_type_map= {`
`58`		`-BigInteger:DatabricksDataType.BIGINT,`
`59`		`-LargeBinary:DatabricksDataType.BINARY,`
`60`		`-Boolean:DatabricksDataType.BOOLEAN,`
`61`		`-Date:DatabricksDataType.DATE,`
`62`		`-DateTime:DatabricksDataType.TIMESTAMP,`
`63`		`-Double:DatabricksDataType.DOUBLE,`
`64`		`-Enum:DatabricksDataType.STRING,`
`65`		`-Float:DatabricksDataType.FLOAT,`
`66`		`-Integer:DatabricksDataType.INT,`
`67`		`-Interval:DatabricksDataType.TIMESTAMP,`
`68`		`-Numeric:DatabricksDataType.DECIMAL,`
`69`		`-PickleType:DatabricksDataType.BINARY,`
`70`		`-SmallInteger:DatabricksDataType.SMALLINT,`
`71`		`-String:DatabricksDataType.STRING,`
`72`		`-Text:DatabricksDataType.STRING,`
`73`		`-Time:DatabricksDataType.STRING,`
`74`		`-Unicode:DatabricksDataType.STRING,`
`75`		`-UnicodeText:DatabricksDataType.STRING,`
`76`		`-Uuid:DatabricksDataType.STRING,`
	`35`	`+sqlalchemy.types.BigInteger:DatabricksDataType.BIGINT,`
	`36`	`+sqlalchemy.types.LargeBinary:DatabricksDataType.BINARY,`
	`37`	`+sqlalchemy.types.Boolean:DatabricksDataType.BOOLEAN,`
	`38`	`+sqlalchemy.types.Date:DatabricksDataType.DATE,`
	`39`	`+sqlalchemy.types.DateTime:DatabricksDataType.TIMESTAMP,`
	`40`	`+sqlalchemy.types.Double:DatabricksDataType.DOUBLE,`
	`41`	`+sqlalchemy.types.Enum:DatabricksDataType.STRING,`
	`42`	`+sqlalchemy.types.Float:DatabricksDataType.FLOAT,`
	`43`	`+sqlalchemy.types.Integer:DatabricksDataType.INT,`
	`44`	`+sqlalchemy.types.Interval:DatabricksDataType.TIMESTAMP,`
	`45`	`+sqlalchemy.types.Numeric:DatabricksDataType.DECIMAL,`
	`46`	`+sqlalchemy.types.PickleType:DatabricksDataType.BINARY,`
	`47`	`+sqlalchemy.types.SmallInteger:DatabricksDataType.SMALLINT,`
	`48`	`+sqlalchemy.types.String:DatabricksDataType.STRING,`
	`49`	`+sqlalchemy.types.Text:DatabricksDataType.STRING,`
	`50`	`+sqlalchemy.types.Time:DatabricksDataType.STRING,`
	`51`	`+sqlalchemy.types.Unicode:DatabricksDataType.STRING,`
	`52`	`+sqlalchemy.types.UnicodeText:DatabricksDataType.STRING,`
	`53`	`+sqlalchemy.types.Uuid:DatabricksDataType.STRING,`
`77`	`54`	`}`
`78`	`55`
`79`		`-# Convert the dictionary into a list of tuples for use in pytest.mark.parametrize`
`80`		`-_as_tuple_list= [(key,value)forkey,valueincamel_case_type_map.items()]`
	`56`	`+`
	`57`	`+defdict_as_tuple_list(d:dict):`
	`58`	`+"""Return a list of [(key, value), ...] from a dictionary."""`
	`59`	`+return [(key,value)forkey,valueind.items()]`
`81`	`60`
`82`	`61`
`83`	`62`	`classCompilationTestBase:`
`84`	`63`	`dialect=DatabricksDialect()`
`85`	`64`
`86`		`-def_assert_compiled_value(self,type_:TypeEngine,expected:DatabricksDataType):`
	`65`	`+def_assert_compiled_value(`
	`66`	`+self,type_:sqlalchemy.types.TypeEngine,expected:DatabricksDataType`
	`67`	`+ ):`
`87`	`68`	`"""Assert that when type_ is compiled for the databricks dialect, it renders the DatabricksDataType name.`
`88`	`69`
`89`	`70`	`This method initialises the type_ with no arguments.`
`90`	`71`	`"""`
`91`	`72`	`compiled_result=type_().compile(dialect=self.dialect)# type: ignore`
`92`	`73`	`assertcompiled_result==expected.name`
`93`	`74`
`94`		`-def_assert_compiled_value_explicit(self,type_:TypeEngine,expected:str):`
	`75`	`+def_assert_compiled_value_explicit(`
	`76`	`+self,type_:sqlalchemy.types.TypeEngine,expected:str`
	`77`	`+ ):`
`95`	`78`	`"""Assert that when type_ is compiled for the databricks dialect, it renders the expected string.`
`96`	`79`
`97`	`80`	`This method expects an initialised type_ so that we can test how a TypeEngine created with arguments`
`@@ -117,12 +100,57 @@ class TestCamelCaseTypesCompilation(CompilationTestBase):`
`117`	`100`	`[1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#generic-camelcase-types`
`118`	`101`	`"""`
`119`	`102`
`120`		`-@pytest.mark.parametrize("type_, expected",_as_tuple_list)`
	`103`	`+@pytest.mark.parametrize("type_, expected",dict_as_tuple_list(camel_case_type_map))`
`121`	`104`	`deftest_bare_camel_case_types_compile(self,type_,expected):`
`122`	`105`	`self._assert_compiled_value(type_,expected)`
`123`	`106`
`124`	`107`	`deftest_numeric_renders_as_decimal_with_precision(self):`
`125`		`-self._assert_compiled_value_explicit(Numeric(10),"DECIMAL(10)")`
	`108`	`+self._assert_compiled_value_explicit(`
	`109`	`+sqlalchemy.types.Numeric(10),"DECIMAL(10)"`
	`110`	`+ )`
`126`	`111`
`127`	`112`	`deftest_numeric_renders_as_decimal_with_precision_and_scale(self):`
`128`		`-self._assert_compiled_value_explicit(Numeric(10,2),"DECIMAL(10, 2)")`
	`113`	`+returnself._assert_compiled_value_explicit(`
	`114`	`+sqlalchemy.types.Numeric(10,2),"DECIMAL(10, 2)"`
	`115`	`+ )`
	`116`	`+`
	`117`	`+`
	`118`	`+uppercase_type_map= {`
	`119`	`+sqlalchemy.types.ARRAY:DatabricksDataType.ARRAY,`
	`120`	`+sqlalchemy.types.BIGINT:DatabricksDataType.BIGINT,`
	`121`	`+sqlalchemy.types.BINARY:DatabricksDataType.BINARY,`
	`122`	`+sqlalchemy.types.BOOLEAN:DatabricksDataType.BOOLEAN,`
	`123`	`+sqlalchemy.types.DATE:DatabricksDataType.DATE,`
	`124`	`+sqlalchemy.types.DECIMAL:DatabricksDataType.DECIMAL,`
	`125`	`+sqlalchemy.types.DOUBLE:DatabricksDataType.DOUBLE,`
	`126`	`+sqlalchemy.types.FLOAT:DatabricksDataType.FLOAT,`
	`127`	`+sqlalchemy.types.INT:DatabricksDataType.INT,`
	`128`	`+sqlalchemy.types.SMALLINT:DatabricksDataType.SMALLINT,`
	`129`	`+sqlalchemy.types.TIMESTAMP:DatabricksDataType.TIMESTAMP,`
	`130`	`+}`
	`131`	`+`
	`132`	`+`
	`133`	`+classTestUppercaseTypesCompilation(CompilationTestBase):`
	`134`	`+"""Per the sqlalchemy documentation[^1], uppercase types are considered to be specific to some`
	`135`	`+ database backends. These tests verify that the types compile into valid Databricks SQL type strings.`
	`136`	`+`
	`137`	`+ [1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#backend-specific-uppercase-datatypes`
	`138`	`+ """`
	`139`	`+`
	`140`	`+@pytest.mark.parametrize("type_, expected",dict_as_tuple_list(uppercase_type_map))`
	`141`	`+deftest_bare_uppercase_types_compile(self,type_,expected):`
	`142`	`+ifisinstance(type_,type(sqlalchemy.types.ARRAY)):`
	`143`	`+# ARRAY cannot be initialised without passing an item definition so we test separately`
	`144`	`+# I preserve it in the uppercase_type_map for clarity`
	`145`	`+returnTrue`
	`146`	`+returnself._assert_compiled_value(type_,expected)`
	`147`	`+`
	`148`	`+deftest_array_string_renders_as_array_of_string(self):`
	`149`	`+"""SQLAlchemy's ARRAY type requires an item definition. And their docs indicate that they've only tested`
	`150`	`+ it with Postgres since that's the only first-class dialect with support for ARRAY.`
	`151`	`+`
	`152`	`+ https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY`
	`153`	`+ """`
	`154`	`+returnself._assert_compiled_value_explicit(`
	`155`	`+sqlalchemy.types.ARRAY(sqlalchemy.types.String),"ARRAY<STRING>"`
	`156`	`+ )`

`‎src/databricks/sqlalchemy/types.py‎`

Lines changed: 32 additions & 27 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,27 +1,14 @@`
	`1`	`+importsqlalchemy`
`1`	`2`	`fromsqlalchemy.ext.compilerimportcompiles`
`2`		`-fromsqlalchemy.sql.compilerimportGenericTypeCompiler`
`3`		`-fromsqlalchemy.typesimport (`
`4`		`-DateTime,`
`5`		`-Enum,`
`6`		`-Integer,`
`7`		`-LargeBinary,`
`8`		`-Numeric,`
`9`		`-String,`
`10`		`-Text,`
`11`		`-Time,`
`12`		`-Unicode,`
`13`		`-UnicodeText,`
`14`		`-Uuid,`
`15`		`-)`
`16`		`-`
`17`		`-`
`18`		`-@compiles(Enum,"databricks")`
`19`		`-@compiles(String,"databricks")`
`20`		`-@compiles(Text,"databricks")`
`21`		`-@compiles(Time,"databricks")`
`22`		`-@compiles(Unicode,"databricks")`
`23`		`-@compiles(UnicodeText,"databricks")`
`24`		`-@compiles(Uuid,"databricks")`
	`3`	`+`
	`4`	`+`
	`5`	`+@compiles(sqlalchemy.types.Enum,"databricks")`
	`6`	`+@compiles(sqlalchemy.types.String,"databricks")`
	`7`	`+@compiles(sqlalchemy.types.Text,"databricks")`
	`8`	`+@compiles(sqlalchemy.types.Time,"databricks")`
	`9`	`+@compiles(sqlalchemy.types.Unicode,"databricks")`
	`10`	`+@compiles(sqlalchemy.types.UnicodeText,"databricks")`
	`11`	`+@compiles(sqlalchemy.types.Uuid,"databricks")`
`25`	`12`	`defcompile_string_databricks(type_,compiler,**kw):`
`26`	`13`	`"""`
`27`	`14`	`We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy`
`@@ -40,23 +27,23 @@ def compile_string_databricks(type_, compiler, **kw):`
`40`	`27`	`return"STRING"`
`41`	`28`
`42`	`29`
`43`		`-@compiles(Integer,"databricks")`
	`30`	`+@compiles(sqlalchemy.types.Integer,"databricks")`
`44`	`31`	`defcompile_integer_databricks(type_,compiler,**kw):`
`45`	`32`	`"""`
`46`	`33`	`We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER"`
`47`	`34`	`"""`
`48`	`35`	`return"INT"`
`49`	`36`
`50`	`37`
`51`		`-@compiles(LargeBinary,"databricks")`
	`38`	`+@compiles(sqlalchemy.types.LargeBinary,"databricks")`
`52`	`39`	`defcompile_binary_databricks(type_,compiler,**kw):`
`53`	`40`	`"""`
`54`	`41`	`We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB"`
`55`	`42`	`"""`
`56`	`43`	`return"BINARY"`
`57`	`44`
`58`	`45`
`59`		`-@compiles(Numeric,"databricks")`
	`46`	`+@compiles(sqlalchemy.types.Numeric,"databricks")`
`60`	`47`	`defcompile_numeric_databricks(type_,compiler,**kw):`
`61`	`48`	`"""`
`62`	`49`	`We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC"`
`@@ -67,9 +54,27 @@ def compile_numeric_databricks(type_, compiler, **kw):`
`67`	`54`	`returncompiler.visit_DECIMAL(type_,**kw)`
`68`	`55`
`69`	`56`
`70`		`-@compiles(DateTime,"databricks")`
	`57`	`+@compiles(sqlalchemy.types.DateTime,"databricks")`
`71`	`58`	`defcompile_datetime_databricks(type_,compiler,**kw):`
`72`	`59`	`"""`
`73`	`60`	`We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME"`
`74`	`61`	`"""`
`75`	`62`	`return"TIMESTAMP"`
	`63`	`+`
	`64`	`+`
	`65`	`+@compiles(sqlalchemy.types.ARRAY,"databricks")`
	`66`	`+defcompile_array_databricks(type_,compiler,**kw):`
	`67`	`+"""`
	`68`	`+ SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql.`
	`69`	`+ The Postgres implementation works for Databricks SQL, so we duplicate that here.`
	`70`	`+`
	`71`	`+ :type_:`
	`72`	`+ This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute`
	`73`	`+ which is itself an instance of TypeEngine`
	`74`	`+`
	`75`	`+ https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY`
	`76`	`+ """`
	`77`	`+`
	`78`	`+inner=compiler.process(type_.item_type,**kw)`
	`79`	`+`
	`80`	`+returnf"ARRAY<{inner}>"`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit51c99cb

File tree

2 files changed

2 files changed

`‎src/databricks/sqlalchemy/test_local/test_types.py‎`

`‎src/databricks/sqlalchemy/types.py‎`

0 commit comments