@@ -49,6 +49,11 @@ def add_file_links(
4949for link in t_spark_arrow_result_links :
5050if link .rowCount <= 0 :
5151continue
52+ logger .debug (
53+ "ResultFileDownloadManager.add_file_links: start offset {}, row count: {}" .format (
54+ link .startRowOffset ,link .rowCount
55+ )
56+ )
5257self .download_handlers .append (
5358ResultSetDownloadHandler (self .downloadable_result_settings ,link )
5459 )
@@ -88,6 +93,12 @@ def get_next_downloaded_file(
8893
8994# Check (and wait) for download status
9095if self ._check_if_download_successful (handler ):
96+ link = handler .result_link
97+ logger .debug (
98+ "ResultFileDownloadManager: file found for row index {}: start {}, row count: {}" .format (
99+ next_row_offset ,link .startRowOffset ,link .rowCount
100+ )
101+ )
91102# Buffer should be empty so set buffer to new ArrowQueue with result_file
92103result = DownloadedFile (
93104handler .result_file ,
@@ -97,40 +108,78 @@ def get_next_downloaded_file(
97108self .download_handlers .pop (idx )
98109# Return True upon successful download to continue loop and not force a retry
99110return result
111+ else :
112+ logger .debug (
113+ "ResultFileDownloadManager: cannot find file for row index {}" .format (
114+ next_row_offset
115+ )
116+ )
117+
100118# Download was not successful for next download item, force a retry
101119self ._shutdown_manager ()
102120return None
103121
104122def _remove_past_handlers (self ,next_row_offset :int ):
123+ logger .debug (
124+ "ResultFileDownloadManager: removing past handlers, current offset: {}" .format (
125+ next_row_offset
126+ )
127+ )
105128# Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
106129i = 0
107130while i < len (self .download_handlers ):
108131result_link = self .download_handlers [i ].result_link
132+ logger .debug (
133+ "- checking result link: start {}, row count: {}, current offset: {}" .format (
134+ result_link .startRowOffset ,result_link .rowCount ,next_row_offset
135+ )
136+ )
109137if result_link .startRowOffset + result_link .rowCount > next_row_offset :
110138i += 1
111139continue
112140self .download_handlers .pop (i )
113141
114142def _schedule_downloads (self ):
115143# Schedule downloads for all download handlers if not already scheduled.
144+ logger .debug ("ResultFileDownloadManager: schedule downloads" )
116145for handler in self .download_handlers :
117146if handler .is_download_scheduled :
118147continue
119148try :
149+ logger .debug (
150+ "- start: {}, row count: {}" .format (
151+ handler .result_link .startRowOffset ,handler .result_link .rowCount
152+ )
153+ )
120154self .thread_pool .submit (handler .run )
121155except Exception as e :
122156logger .error (e )
123157break
124158handler .is_download_scheduled = True
125159
126160def _find_next_file_index (self ,next_row_offset :int ):
161+ logger .debug (
162+ "ResultFileDownloadManager: trying to find file for row {}" .format (
163+ next_row_offset
164+ )
165+ )
127166# Get the handler index of the next file in order
128167next_indices = [
129168i
130169for i ,handler in enumerate (self .download_handlers )
131170if handler .is_download_scheduled
171+ # TODO: shouldn't `next_row_offset` be tested against the range, not just start row offset?
132172and handler .result_link .startRowOffset == next_row_offset
133173 ]
174+
175+ for i in next_indices :
176+ link = self .download_handlers [i ].result_link
177+ logger .debug (
178+ "- found file: start {}, row count {}" .format (
179+ link .startRowOffset ,link .rowCount
180+ )
181+ )
182+
134183return next_indices [0 ]if len (next_indices )> 0 else None
135184
136185def _check_if_download_successful (self ,handler :ResultSetDownloadHandler ):