Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf5f4a1d

Browse files
committed
fixes
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
1 parentac8c9cb commitf5f4a1d

File tree

5 files changed

+167
-190
lines changed

5 files changed

+167
-190
lines changed

‎modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py‎

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
importpandas
1919
importpyarrow
2020
importpytest
21-
frompandas._testingimportensure_clean
2221
frompandas.core.dtypes.commonimportis_list_like
2322
frompyhdkimport__version__ashdk_version
2423

2524
frommodin.configimportStorageFormat
2625
frommodin.pandas.test.utilsimport (
2726
create_test_dfs,
2827
default_to_pandas_ignore_string,
28+
get_unique_filename,
2929
io_ops_bad_exc,
3030
random_state,
3131
test_data,
@@ -324,17 +324,17 @@ def test_read_csv_datetime(
324324

325325
@pytest.mark.parametrize("engine", [None,"arrow"])
326326
@pytest.mark.parametrize("parse_dates", [None,True,False])
327-
deftest_read_csv_datetime_tz(self,engine,parse_dates):
328-
withensure_clean(".csv")asfile:
329-
withopen(file,"w")asf:
330-
f.write("test\n2023-01-01T00:00:00.000-07:00")
327+
deftest_read_csv_datetime_tz(self,engine,parse_dates,tmp_path):
328+
unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)
329+
withopen(unique_filename,"w")asf:
330+
f.write("test\n2023-01-01T00:00:00.000-07:00")
331331

332-
eval_io(
333-
fn_name="read_csv",
334-
filepath_or_buffer=file,
335-
md_extra_kwargs={"engine":engine},
336-
parse_dates=parse_dates,
337-
)
332+
eval_io(
333+
fn_name="read_csv",
334+
filepath_or_buffer=unique_filename,
335+
md_extra_kwargs={"engine":engine},
336+
parse_dates=parse_dates,
337+
)
338338

339339
@pytest.mark.parametrize("engine", [None,"arrow"])
340340
@pytest.mark.parametrize(
@@ -382,26 +382,26 @@ def test_read_csv_col_handling(
382382
"c1.1,c1,c1.1,c1,c1.1,c1.2,c1.2,c2",
383383
],
384384
)
385-
deftest_read_csv_duplicate_cols(self,cols):
385+
deftest_read_csv_duplicate_cols(self,cols,tmp_path):
386386
deftest(df,lib,**kwargs):
387387
data=f"{cols}\n"
388-
withensure_clean(".csv")asfname:
389-
withopen(fname,"w")asf:
390-
f.write(data)
391-
returnlib.read_csv(fname)
388+
unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)
389+
withopen(unique_filename,"w")asf:
390+
f.write(data)
391+
returnlib.read_csv(unique_filename)
392392

393393
run_and_compare(test,data={})
394394

395-
deftest_read_csv_dtype_object(self):
395+
deftest_read_csv_dtype_object(self,tmp_path):
396396
withpytest.warns(UserWarning)aswarns:
397-
withensure_clean(".csv")asfile:
398-
withopen(file,"w")asf:
399-
f.write("test\ntest")
397+
unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)
398+
withopen(unique_filename,"w")asf:
399+
f.write("test\ntest")
400400

401-
deftest(**kwargs):
402-
returnpd.read_csv(file,dtype={"test":"object"})
401+
deftest(**kwargs):
402+
returnpd.read_csv(unique_filename,dtype={"test":"object"})
403403

404-
run_and_compare(test,data={})
404+
run_and_compare(test,data={})
405405
forwarninwarns.list:
406406
assertnotre.match(r".*defaulting to pandas.*",str(warn))
407407

@@ -870,30 +870,30 @@ def concat(df1, df2, lib, **kwargs):
870870
@pytest.mark.parametrize("transform", [True,False])
871871
@pytest.mark.parametrize("sort_last", [True,False])
872872
# RecursionError in case of concatenation of big number of frames
873-
deftest_issue_5889(self,transform,sort_last):
874-
withensure_clean(".csv")asfile:
875-
data= {"a": [1,2,3],"b": [1,2,3]}iftransformelse {"a": [1,2,3]}
876-
pandas.DataFrame(data).to_csv(file,index=False)
873+
deftest_issue_5889(self,transform,sort_last,tmp_path):
874+
unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)
875+
data= {"a": [1,2,3],"b": [1,2,3]}iftransformelse {"a": [1,2,3]}
876+
pandas.DataFrame(data).to_csv(unique_filename,index=False)
877877

878-
deftest_concat(lib,**kwargs):
879-
iftransform:
878+
deftest_concat(lib,**kwargs):
879+
iftransform:
880880

881-
defread_csv():
882-
returnlib.read_csv(file)["b"]
881+
defread_csv():
882+
returnlib.read_csv(unique_filename)["b"]
883883

884-
else:
884+
else:
885885

886-
defread_csv():
887-
returnlib.read_csv(file)
886+
defread_csv():
887+
returnlib.read_csv(unique_filename)
888888

889-
df=read_csv()
890-
for_inrange(100):
891-
df=lib.concat([df,read_csv()])
892-
ifsort_last:
893-
df=lib.concat([df,read_csv()],sort=True)
894-
returndf
889+
df=read_csv()
890+
for_inrange(100):
891+
df=lib.concat([df,read_csv()])
892+
ifsort_last:
893+
df=lib.concat([df,read_csv()],sort=True)
894+
returndf
895895

896-
run_and_compare(test_concat,data={})
896+
run_and_compare(test_concat,data={})
897897

898898

899899
classTestGroupby:

‎modin/experimental/pandas/test/test_io_exp.py‎

Lines changed: 28 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
importnumpyasnp
1919
importpandas
2020
importpytest
21-
frompandas._testingimportensure_clean
2221

2322
importmodin.experimental.pandasaspd
24-
frommodin.configimportAsyncReadMode,Engine
23+
frommodin.configimportEngine
2524
frommodin.pandas.test.utilsimport (
2625
df_equals,
2726
eval_general,
27+
get_unique_filename,
2828
parse_dates_values_by_id,
2929
test_data,
3030
time_parsing_csv_path,
@@ -355,7 +355,7 @@ def test_xml_glob(tmp_path, filename):
355355
reason=f"{Engine.get()} does not have experimental read_custom_text API",
356356
)
357357
@pytest.mark.parametrize("set_async_read_mode", [False,True],indirect=True)
358-
deftest_read_custom_json_text(set_async_read_mode):
358+
deftest_read_custom_json_text(set_async_read_mode,tmp_path):
359359
def_generate_json(file_name,nrows,ncols):
360360
data=np.random.rand(nrows,ncols)
361361
df=pandas.DataFrame(data,columns=[f"col{x}"forxinrange(ncols)])
@@ -374,33 +374,27 @@ def _custom_parser(io_input, **kwargs):
374374
result[key].append(obj[key])
375375
returnpandas.DataFrame(result).rename(columns={"col0":"testID"})
376376

377-
withensure_clean()asfilename:
378-
_generate_json(filename,64,8)
377+
unique_filename=get_unique_filename(data_dir=tmp_path)
378+
_generate_json(unique_filename,64,8)
379379

380-
df1=pd.read_custom_text(
381-
filename,
382-
columns=["testID","col1","col3"],
383-
custom_parser=_custom_parser,
384-
is_quoting=False,
385-
)
386-
df2=pd.read_json(filename,lines=True)[["col0","col1","col3"]].rename(
387-
columns={"col0":"testID"}
388-
)
389-
ifAsyncReadMode.get():
390-
# If read operations are asynchronous, then the dataframes
391-
# check should be inside `ensure_clean` context
392-
# because the file may be deleted before actual reading starts
393-
df_equals(df1,df2)
394-
ifnotAsyncReadMode.get():
395-
df_equals(df1,df2)
380+
df1=pd.read_custom_text(
381+
unique_filename,
382+
columns=["testID","col1","col3"],
383+
custom_parser=_custom_parser,
384+
is_quoting=False,
385+
)
386+
df2=pd.read_json(unique_filename,lines=True)[["col0","col1","col3"]].rename(
387+
columns={"col0":"testID"}
388+
)
389+
df_equals(df1,df2)
396390

397391

398392
@pytest.mark.skipif(
399393
Engine.get()notin ("Ray","Unidist","Dask"),
400394
reason=f"{Engine.get()} does not have experimental API",
401395
)
402396
@pytest.mark.parametrize("set_async_read_mode", [False,True],indirect=True)
403-
deftest_read_evaluated_dict(set_async_read_mode):
397+
deftest_read_evaluated_dict(set_async_read_mode,tmp_path):
404398
def_generate_evaluated_dict(file_name,nrows,ncols):
405399
result= {}
406400
keys= [f"col{x}"forxinrange(ncols)]
@@ -430,23 +424,17 @@ def columns_callback(io_input, **kwargs):
430424
break
431425
returncolumns
432426

433-
withensure_clean()asfilename:
434-
_generate_evaluated_dict(filename,64,8)
427+
unique_filename=get_unique_filename(data_dir=tmp_path)
428+
_generate_evaluated_dict(unique_filename,64,8)
435429

436-
df1=pd.read_custom_text(
437-
filename,
438-
columns=["col1","col2"],
439-
custom_parser=_custom_parser,
440-
)
441-
assertdf1.shape== (64,2)
430+
df1=pd.read_custom_text(
431+
unique_filename,
432+
columns=["col1","col2"],
433+
custom_parser=_custom_parser,
434+
)
435+
assertdf1.shape== (64,2)
442436

443-
df2=pd.read_custom_text(
444-
filename,columns=columns_callback,custom_parser=_custom_parser
445-
)
446-
ifAsyncReadMode.get():
447-
# If read operations are asynchronous, then the dataframes
448-
# check should be inside `ensure_clean` context
449-
# because the file may be deleted before actual reading starts
450-
df_equals(df1,df2)
451-
ifnotAsyncReadMode.get():
452-
df_equals(df1,df2)
437+
df2=pd.read_custom_text(
438+
unique_filename,columns=columns_callback,custom_parser=_custom_parser
439+
)
440+
df_equals(df1,df2)

‎modin/pandas/test/dataframe/test_indexing.py‎

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
importnumpyasnp
1818
importpandas
1919
importpytest
20-
frompandas._testingimportensure_clean
2120
frompandas.testingimportassert_index_equal
2221

2322
importmodin.pandasaspd
@@ -35,6 +34,7 @@
3534
df_equals,
3635
eval_general,
3736
generate_multiindex,
37+
get_unique_filename,
3838
int_arg_keys,
3939
int_arg_values,
4040
name_contains,
@@ -2207,14 +2207,16 @@ def test___setitem__partitions_aligning():
22072207
df_equals(md_df,pd_df)
22082208

22092209

2210-
deftest___setitem__with_mismatched_partitions():
2211-
withensure_clean(".csv")asfname:
2212-
np.savetxt(fname,np.random.randint(0,100,size=(200_000,99)),delimiter=",")
2213-
modin_df=pd.read_csv(fname)
2214-
pandas_df=pandas.read_csv(fname)
2215-
modin_df["new"]=pd.Series(list(range(len(modin_df))))
2216-
pandas_df["new"]=pandas.Series(list(range(len(pandas_df))))
2217-
df_equals(modin_df,pandas_df)
2210+
deftest___setitem__with_mismatched_partitions(tmp_path):
2211+
unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)
2212+
np.savetxt(
2213+
unique_filename,np.random.randint(0,100,size=(200_000,99)),delimiter=","
2214+
)
2215+
modin_df=pd.read_csv(unique_filename)
2216+
pandas_df=pandas.read_csv(unique_filename)
2217+
modin_df["new"]=pd.Series(list(range(len(modin_df))))
2218+
pandas_df["new"]=pandas.Series(list(range(len(pandas_df))))
2219+
df_equals(modin_df,pandas_df)
22182220

22192221

22202222
deftest___setitem__mask():

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp