Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Introducerow_limit param#607

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
varun-edachali-dbx merged 13 commits intosea-migrationfrommax-rows-sea
Jul 7, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
13 commits
Select commitHold shift + click to select a range
bb3e94d
introduce row_limit
varun-edachali-dbxJun 19, 2025
dc3a1fb
move use_sea init to Session constructor
varun-edachali-dbxJun 19, 2025
eba17c1
more explicit typing
varun-edachali-dbxJun 19, 2025
00e57e7
add row_limit to Thrift backend
varun-edachali-dbxJun 19, 2025
304ef0e
formatting (black)
varun-edachali-dbxJun 19, 2025
414117a
add e2e test for thrift resultRowLimit
varun-edachali-dbxJun 20, 2025
2058907
explicitly convert extra cursor params to dict
varun-edachali-dbxJun 20, 2025
c1c0e7d
Merge branch 'sea-migration' into max-rows-sea
varun-edachali-dbxJun 26, 2025
215d886
Merge branch 'sea-migration' into max-rows-sea
varun-edachali-dbxJun 26, 2025
084da7a
remove excess tests
varun-edachali-dbxJun 26, 2025
ef69180
Merge branch 'sea-migration' into max-rows-sea
varun-edachali-dbxJul 3, 2025
4104d75
Merge branch 'sea-migration' into max-rows-sea
varun-edachali-dbxJul 3, 2025
c94d453
add docstring for row_limit
varun-edachali-dbxJul 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletionssrc/databricks/sql/backend/databricks_client.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -85,6 +85,7 @@ def execute_command(
parameters: List,
async_op: bool,
enforce_embedded_schema_correctness: bool,
row_limit: Optional[int] = None,
) -> Union["ResultSet", None]:
"""
Executes a SQL command or query within the specified session.
Expand All@@ -103,6 +104,7 @@ def execute_command(
parameters: List of parameters to bind to the query
async_op: Whether to execute the command asynchronously
enforce_embedded_schema_correctness: Whether to enforce schema correctness
row_limit: Maximum number of rows in the operation result.

Returns:
If async_op is False, returns a ResultSet object containing the
Expand Down
3 changes: 2 additions & 1 deletionsrc/databricks/sql/backend/sea/backend.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -405,6 +405,7 @@ def execute_command(
parameters: List[Dict[str, Any]],
async_op: bool,
enforce_embedded_schema_correctness: bool,
row_limit: Optional[int] = None,
) -> Union[SeaResultSet, None]:
"""
Execute a SQL command using the SEA backend.
Expand DownExpand Up@@ -462,7 +463,7 @@ def execute_command(
format=format,
wait_timeout=(WaitTimeout.ASYNC if async_op else WaitTimeout.SYNC).value,
on_wait_timeout="CONTINUE",
row_limit=max_rows,
row_limit=row_limit,
parameters=sea_parameters if sea_parameters else None,
result_compression=result_compression,
)
Expand Down
4 changes: 3 additions & 1 deletionsrc/databricks/sql/backend/thrift_backend.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4,7 +4,7 @@
import math
import time
import threading
from typing import List, Union, Any, TYPE_CHECKING
from typing import List,Optional,Union, Any, TYPE_CHECKING

if TYPE_CHECKING:
from databricks.sql.client import Cursor
Expand DownExpand Up@@ -929,6 +929,7 @@ def execute_command(
parameters=[],
async_op=False,
enforce_embedded_schema_correctness=False,
row_limit: Optional[int] = None,
) -> Union["ResultSet", None]:
thrift_handle = session_id.to_thrift_handle()
if not thrift_handle:
Expand DownExpand Up@@ -969,6 +970,7 @@ def execute_command(
useArrowNativeTypes=spark_arrow_types,
parameters=parameters,
enforceEmbeddedSchemaCorrectness=enforce_embedded_schema_correctness,
resultRowLimit=row_limit,
)
resp = self.make_request(self._client.ExecuteStatement, req)

Expand Down
28 changes: 20 additions & 8 deletionssrc/databricks/sql/client.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -335,8 +335,14 @@ def cursor(
self,
arraysize: int = DEFAULT_ARRAY_SIZE,
buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
row_limit: Optional[int] = None,
) -> "Cursor":
"""
Args:
arraysize: The maximum number of rows in direct results.
buffer_size_bytes: The maximum number of bytes in direct results.
row_limit: The maximum number of rows in the result.

Return a new Cursor object using the connection.

Will throw an Error if the connection has been closed.
Expand All@@ -349,6 +355,7 @@ def cursor(
self.session.backend,
arraysize=arraysize,
result_buffer_size_bytes=buffer_size_bytes,
row_limit=row_limit,
)
self._cursors.append(cursor)
return cursor
Expand DownExpand Up@@ -382,6 +389,7 @@ def __init__(
backend: DatabricksClient,
result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
arraysize: int = DEFAULT_ARRAY_SIZE,
row_limit: Optional[int] = None,
) -> None:
"""
These objects represent a database cursor, which is used to manage the context of a fetch
Expand All@@ -391,16 +399,18 @@ def __init__(
visible by other cursors or connections.
"""

self.connection = connection
self.rowcount = -1 # Return -1 as this is not supported
self.buffer_size_bytes = result_buffer_size_bytes
self.connection: Connection = connection

self.rowcount: int = -1 # Return -1 as this is not supported
self.buffer_size_bytes: int = result_buffer_size_bytes
self.active_result_set: Union[ResultSet, None] = None
self.arraysize = arraysize
self.arraysize: int = arraysize
self.row_limit: Optional[int] = row_limit
# Note that Cursor closed => active result set closed, but not vice versa
self.open = True
self.executing_command_id = None
self.backend = backend
self.active_command_id = None
self.open: bool = True
self.executing_command_id: Optional[CommandId] = None
self.backend: DatabricksClient = backend
self.active_command_id: Optional[CommandId] = None
self.escaper = ParamEscaper()
self.lastrowid = None

Expand DownExpand Up@@ -779,6 +789,7 @@ def execute(
parameters=prepared_params,
async_op=False,
enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
row_limit=self.row_limit,
)

if self.active_result_set and self.active_result_set.is_staging_operation:
Expand DownExpand Up@@ -835,6 +846,7 @@ def execute_async(
parameters=prepared_params,
async_op=True,
enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
row_limit=self.row_limit,
)

return self
Expand Down
60 changes: 58 additions & 2 deletionstests/e2e/test_driver.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -113,10 +113,12 @@ def connection(self, extra_params=()):
conn.close()

@contextmanager
def cursor(self, extra_params=()):
def cursor(self, extra_params=(), extra_cursor_params=()):
with self.connection(extra_params) as conn:
cursor = conn.cursor(
arraysize=self.arraysize, buffer_size_bytes=self.buffer_size_bytes
arraysize=self.arraysize,
buffer_size_bytes=self.buffer_size_bytes,
**dict(extra_cursor_params),
)
try:
yield cursor
Expand DownExpand Up@@ -943,6 +945,60 @@ def test_catalogs_returns_arrow_table(self):
results = cursor.fetchall_arrow()
assert isinstance(results, pyarrow.Table)

def test_row_limit_with_larger_result(self):
"""Test that row_limit properly constrains results when query would return more rows"""
row_limit = 1000
with self.cursor(extra_cursor_params={"row_limit": row_limit}) as cursor:
# Execute a query that returns more than row_limit rows
cursor.execute("SELECT * FROM range(2000)")
rows = cursor.fetchall()

# Check if the number of rows is limited to row_limit
assert len(rows) == row_limit, f"Expected {row_limit} rows, got {len(rows)}"

def test_row_limit_with_smaller_result(self):
"""Test that row_limit doesn't affect results when query returns fewer rows than limit"""
row_limit = 100
expected_rows = 50
with self.cursor(extra_cursor_params={"row_limit": row_limit}) as cursor:
# Execute a query that returns fewer than row_limit rows
cursor.execute(f"SELECT * FROM range({expected_rows})")
rows = cursor.fetchall()

# Check if all rows are returned (not limited by row_limit)
assert (
len(rows) == expected_rows
), f"Expected {expected_rows} rows, got {len(rows)}"

@skipUnless(pysql_supports_arrow(), "arrow test needs arrow support")
def test_row_limit_with_arrow_larger_result(self):
"""Test that row_limit properly constrains arrow results when query would return more rows"""
row_limit = 800
with self.cursor(extra_cursor_params={"row_limit": row_limit}) as cursor:
# Execute a query that returns more than row_limit rows
cursor.execute("SELECT * FROM range(1500)")
arrow_table = cursor.fetchall_arrow()

# Check if the number of rows in the arrow table is limited to row_limit
assert (
arrow_table.num_rows == row_limit
), f"Expected {row_limit} rows, got {arrow_table.num_rows}"

@skipUnless(pysql_supports_arrow(), "arrow test needs arrow support")
def test_row_limit_with_arrow_smaller_result(self):
"""Test that row_limit doesn't affect arrow results when query returns fewer rows than limit"""
row_limit = 200
expected_rows = 100
with self.cursor(extra_cursor_params={"row_limit": row_limit}) as cursor:
# Execute a query that returns fewer than row_limit rows
cursor.execute(f"SELECT * FROM range({expected_rows})")
arrow_table = cursor.fetchall_arrow()

# Check if all rows are returned (not limited by row_limit)
assert (
arrow_table.num_rows == expected_rows
), f"Expected {expected_rows} rows, got {arrow_table.num_rows}"


# use a RetrySuite to encapsulate these tests which we'll typically want to run together; however keep
# the 429/503 subsuites separate since they execute under different circumstances.
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp