NotificationsYou must be signed in to change notification settings
Fork670
Star10.3k

Commitf5f4a1d

committed

fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>

1 parentac8c9cb commitf5f4a1dCopy full SHA for f5f4a1d

File tree

5 files changed

+167

-190

lines changed

modin
- experimental
  - core/execution/native/implementations/hdk_on_native/test
    - test_dataframe.py
  - pandas/test
    - test_io_exp.py
- pandas/test
  - dataframe
    - test_indexing.py
  - test_io.py
  - utils.py

5 files changed

+167

-190

lines changed

`‎modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py‎`

Lines changed: 41 additions & 41 deletions

Original file line number	Diff line number	Diff line change
`@@ -18,14 +18,14 @@`
`18`	`18`	`importpandas`
`19`	`19`	`importpyarrow`
`20`	`20`	`importpytest`
`21`		`-frompandas._testingimportensure_clean`
`22`	`21`	`frompandas.core.dtypes.commonimportis_list_like`
`23`	`22`	`frompyhdkimport__version__ashdk_version`
`24`	`23`
`25`	`24`	`frommodin.configimportStorageFormat`
`26`	`25`	`frommodin.pandas.test.utilsimport (`
`27`	`26`	`create_test_dfs,`
`28`	`27`	`default_to_pandas_ignore_string,`
	`28`	`+get_unique_filename,`
`29`	`29`	`io_ops_bad_exc,`
`30`	`30`	`random_state,`
`31`	`31`	`test_data,`
`@@ -324,17 +324,17 @@ def test_read_csv_datetime(`
`324`	`324`
`325`	`325`	`@pytest.mark.parametrize("engine", [None,"arrow"])`
`326`	`326`	`@pytest.mark.parametrize("parse_dates", [None,True,False])`
`327`		`-deftest_read_csv_datetime_tz(self,engine,parse_dates):`
`328`		`-withensure_clean(".csv")asfile:`
`329`		`-withopen(file,"w")asf:`
`330`		`-f.write("test\n2023-01-01T00:00:00.000-07:00")`
	`327`	`+deftest_read_csv_datetime_tz(self,engine,parse_dates,tmp_path):`
	`328`	`+unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)`
	`329`	`+withopen(unique_filename,"w")asf:`
	`330`	`+f.write("test\n2023-01-01T00:00:00.000-07:00")`
`331`	`331`
`332`		`-eval_io(`
`333`		`-fn_name="read_csv",`
`334`		`-filepath_or_buffer=file,`
`335`		`-md_extra_kwargs={"engine":engine},`
`336`		`-parse_dates=parse_dates,`
`337`		`-)`
	`332`	`+eval_io(`
	`333`	`+fn_name="read_csv",`
	`334`	`+filepath_or_buffer=unique_filename,`
	`335`	`+md_extra_kwargs={"engine":engine},`
	`336`	`+parse_dates=parse_dates,`
	`337`	`+ )`
`338`	`338`
`339`	`339`	`@pytest.mark.parametrize("engine", [None,"arrow"])`
`340`	`340`	`@pytest.mark.parametrize(`
`@@ -382,26 +382,26 @@ def test_read_csv_col_handling(`
`382`	`382`	`"c1.1,c1,c1.1,c1,c1.1,c1.2,c1.2,c2",`
`383`	`383`	`],`
`384`	`384`	`)`
`385`		`-deftest_read_csv_duplicate_cols(self,cols):`
	`385`	`+deftest_read_csv_duplicate_cols(self,cols,tmp_path):`
`386`	`386`	`deftest(df,lib,**kwargs):`
`387`	`387`	`data=f"{cols}\n"`
`388`		`-withensure_clean(".csv")asfname:`
`389`		`-withopen(fname,"w")asf:`
`390`		`-f.write(data)`
`391`		`-returnlib.read_csv(fname)`
	`388`	`+unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)`
	`389`	`+withopen(unique_filename,"w")asf:`
	`390`	`+f.write(data)`
	`391`	`+returnlib.read_csv(unique_filename)`
`392`	`392`
`393`	`393`	`run_and_compare(test,data={})`
`394`	`394`
`395`		`-deftest_read_csv_dtype_object(self):`
	`395`	`+deftest_read_csv_dtype_object(self,tmp_path):`
`396`	`396`	`withpytest.warns(UserWarning)aswarns:`
`397`		`-withensure_clean(".csv")asfile:`
`398`		`-withopen(file,"w")asf:`
`399`		`-f.write("test\ntest")`
	`397`	`+unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)`
	`398`	`+withopen(unique_filename,"w")asf:`
	`399`	`+f.write("test\ntest")`
`400`	`400`
`401`		`-deftest(**kwargs):`
`402`		`-returnpd.read_csv(file,dtype={"test":"object"})`
	`401`	`+deftest(**kwargs):`
	`402`	`+returnpd.read_csv(unique_filename,dtype={"test":"object"})`
`403`	`403`
`404`		`-run_and_compare(test,data={})`
	`404`	`+run_and_compare(test,data={})`
`405`	`405`	`forwarninwarns.list:`
`406`	`406`	`assertnotre.match(r".defaulting to pandas.",str(warn))`
`407`	`407`
`@@ -870,30 +870,30 @@ def concat(df1, df2, lib, **kwargs):`
`870`	`870`	`@pytest.mark.parametrize("transform", [True,False])`
`871`	`871`	`@pytest.mark.parametrize("sort_last", [True,False])`
`872`	`872`	`# RecursionError in case of concatenation of big number of frames`
`873`		`-deftest_issue_5889(self,transform,sort_last):`
`874`		`-withensure_clean(".csv")asfile:`
`875`		`-data= {"a": [1,2,3],"b": [1,2,3]}iftransformelse {"a": [1,2,3]}`
`876`		`-pandas.DataFrame(data).to_csv(file,index=False)`
	`873`	`+deftest_issue_5889(self,transform,sort_last,tmp_path):`
	`874`	`+unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)`
	`875`	`+data= {"a": [1,2,3],"b": [1,2,3]}iftransformelse {"a": [1,2,3]}`
	`876`	`+pandas.DataFrame(data).to_csv(unique_filename,index=False)`
`877`	`877`
`878`		`-deftest_concat(lib,**kwargs):`
`879`		`-iftransform:`
	`878`	`+deftest_concat(lib,**kwargs):`
	`879`	`+iftransform:`
`880`	`880`
`881`		`-defread_csv():`
`882`		`-returnlib.read_csv(file)["b"]`
	`881`	`+defread_csv():`
	`882`	`+returnlib.read_csv(unique_filename)["b"]`
`883`	`883`
`884`		`-else:`
	`884`	`+else:`
`885`	`885`
`886`		`-defread_csv():`
`887`		`-returnlib.read_csv(file)`
	`886`	`+defread_csv():`
	`887`	`+returnlib.read_csv(unique_filename)`
`888`	`888`
`889`		`-df=read_csv()`
`890`		`-for_inrange(100):`
`891`		`-df=lib.concat([df,read_csv()])`
`892`		`-ifsort_last:`
`893`		`-df=lib.concat([df,read_csv()],sort=True)`
`894`		`-returndf`
	`889`	`+df=read_csv()`
	`890`	`+for_inrange(100):`
	`891`	`+df=lib.concat([df,read_csv()])`
	`892`	`+ifsort_last:`
	`893`	`+df=lib.concat([df,read_csv()],sort=True)`
	`894`	`+returndf`
`895`	`895`
`896`		`-run_and_compare(test_concat,data={})`
	`896`	`+run_and_compare(test_concat,data={})`
`897`	`897`
`898`	`898`
`899`	`899`	`classTestGroupby:`

`‎modin/experimental/pandas/test/test_io_exp.py‎`

Lines changed: 28 additions & 40 deletions

Original file line number	Diff line number	Diff line change
`@@ -18,13 +18,13 @@`
`18`	`18`	`importnumpyasnp`
`19`	`19`	`importpandas`
`20`	`20`	`importpytest`
`21`		`-frompandas._testingimportensure_clean`
`22`	`21`
`23`	`22`	`importmodin.experimental.pandasaspd`
`24`		`-frommodin.configimportAsyncReadMode,Engine`
	`23`	`+frommodin.configimportEngine`
`25`	`24`	`frommodin.pandas.test.utilsimport (`
`26`	`25`	`df_equals,`
`27`	`26`	`eval_general,`
	`27`	`+get_unique_filename,`
`28`	`28`	`parse_dates_values_by_id,`
`29`	`29`	`test_data,`
`30`	`30`	`time_parsing_csv_path,`
`@@ -355,7 +355,7 @@ def test_xml_glob(tmp_path, filename):`
`355`	`355`	`reason=f"{Engine.get()} does not have experimental read_custom_text API",`
`356`	`356`	`)`
`357`	`357`	`@pytest.mark.parametrize("set_async_read_mode", [False,True],indirect=True)`
`358`		`-deftest_read_custom_json_text(set_async_read_mode):`
	`358`	`+deftest_read_custom_json_text(set_async_read_mode,tmp_path):`
`359`	`359`	`def_generate_json(file_name,nrows,ncols):`
`360`	`360`	`data=np.random.rand(nrows,ncols)`
`361`	`361`	`df=pandas.DataFrame(data,columns=[f"col{x}"forxinrange(ncols)])`
`@@ -374,33 +374,27 @@ def _custom_parser(io_input, **kwargs):`
`374`	`374`	`result[key].append(obj[key])`
`375`	`375`	`returnpandas.DataFrame(result).rename(columns={"col0":"testID"})`
`376`	`376`
`377`		`-withensure_clean()asfilename:`
`378`		`-_generate_json(filename,64,8)`
	`377`	`+unique_filename=get_unique_filename(data_dir=tmp_path)`
	`378`	`+_generate_json(unique_filename,64,8)`
`379`	`379`
`380`		`-df1=pd.read_custom_text(`
`381`		`-filename,`
`382`		`-columns=["testID","col1","col3"],`
`383`		`-custom_parser=_custom_parser,`
`384`		`-is_quoting=False,`
`385`		`- )`
`386`		`-df2=pd.read_json(filename,lines=True)[["col0","col1","col3"]].rename(`
`387`		`-columns={"col0":"testID"}`
`388`		`- )`
`389`		`-ifAsyncReadMode.get():`
`390`		`-# If read operations are asynchronous, then the dataframes`
`391`		-# check should be inside `ensure_clean` context
`392`		`-# because the file may be deleted before actual reading starts`
`393`		`-df_equals(df1,df2)`
`394`		`-ifnotAsyncReadMode.get():`
`395`		`-df_equals(df1,df2)`
	`380`	`+df1=pd.read_custom_text(`
	`381`	`+unique_filename,`
	`382`	`+columns=["testID","col1","col3"],`
	`383`	`+custom_parser=_custom_parser,`
	`384`	`+is_quoting=False,`
	`385`	`+ )`
	`386`	`+df2=pd.read_json(unique_filename,lines=True)[["col0","col1","col3"]].rename(`
	`387`	`+columns={"col0":"testID"}`
	`388`	`+ )`
	`389`	`+df_equals(df1,df2)`
`396`	`390`
`397`	`391`
`398`	`392`	`@pytest.mark.skipif(`
`399`	`393`	`Engine.get()notin ("Ray","Unidist","Dask"),`
`400`	`394`	`reason=f"{Engine.get()} does not have experimental API",`
`401`	`395`	`)`
`402`	`396`	`@pytest.mark.parametrize("set_async_read_mode", [False,True],indirect=True)`
`403`		`-deftest_read_evaluated_dict(set_async_read_mode):`
	`397`	`+deftest_read_evaluated_dict(set_async_read_mode,tmp_path):`
`404`	`398`	`def_generate_evaluated_dict(file_name,nrows,ncols):`
`405`	`399`	`result= {}`
`406`	`400`	`keys= [f"col{x}"forxinrange(ncols)]`
`@@ -430,23 +424,17 @@ def columns_callback(io_input, **kwargs):`
`430`	`424`	`break`
`431`	`425`	`returncolumns`
`432`	`426`
`433`		`-withensure_clean()asfilename:`
`434`		`-_generate_evaluated_dict(filename,64,8)`
	`427`	`+unique_filename=get_unique_filename(data_dir=tmp_path)`
	`428`	`+_generate_evaluated_dict(unique_filename,64,8)`
`435`	`429`
`436`		`-df1=pd.read_custom_text(`
`437`		`-filename,`
`438`		`-columns=["col1","col2"],`
`439`		`-custom_parser=_custom_parser,`
`440`		`-)`
`441`		`-assertdf1.shape== (64,2)`
	`430`	`+df1=pd.read_custom_text(`
	`431`	`+unique_filename,`
	`432`	`+columns=["col1","col2"],`
	`433`	`+custom_parser=_custom_parser,`
	`434`	`+ )`
	`435`	`+assertdf1.shape== (64,2)`
`442`	`436`
`443`		`-df2=pd.read_custom_text(`
`444`		`-filename,columns=columns_callback,custom_parser=_custom_parser`
`445`		`- )`
`446`		`-ifAsyncReadMode.get():`
`447`		`-# If read operations are asynchronous, then the dataframes`
`448`		-# check should be inside `ensure_clean` context
`449`		`-# because the file may be deleted before actual reading starts`
`450`		`-df_equals(df1,df2)`
`451`		`-ifnotAsyncReadMode.get():`
`452`		`-df_equals(df1,df2)`
	`437`	`+df2=pd.read_custom_text(`
	`438`	`+unique_filename,columns=columns_callback,custom_parser=_custom_parser`
	`439`	`+ )`
	`440`	`+df_equals(df1,df2)`

`‎modin/pandas/test/dataframe/test_indexing.py‎`

Lines changed: 11 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,6 @@`
`17`	`17`	`importnumpyasnp`
`18`	`18`	`importpandas`
`19`	`19`	`importpytest`
`20`		`-frompandas._testingimportensure_clean`
`21`	`20`	`frompandas.testingimportassert_index_equal`
`22`	`21`
`23`	`22`	`importmodin.pandasaspd`
`@@ -35,6 +34,7 @@`
`35`	`34`	`df_equals,`
`36`	`35`	`eval_general,`
`37`	`36`	`generate_multiindex,`
	`37`	`+get_unique_filename,`
`38`	`38`	`int_arg_keys,`
`39`	`39`	`int_arg_values,`
`40`	`40`	`name_contains,`
`@@ -2207,14 +2207,16 @@ def test___setitem__partitions_aligning():`
`2207`	`2207`	`df_equals(md_df,pd_df)`
`2208`	`2208`
`2209`	`2209`
`2210`		`-deftest___setitem__with_mismatched_partitions():`
`2211`		`-withensure_clean(".csv")asfname:`
`2212`		`-np.savetxt(fname,np.random.randint(0,100,size=(200_000,99)),delimiter=",")`
`2213`		`-modin_df=pd.read_csv(fname)`
`2214`		`-pandas_df=pandas.read_csv(fname)`
`2215`		`-modin_df["new"]=pd.Series(list(range(len(modin_df))))`
`2216`		`-pandas_df["new"]=pandas.Series(list(range(len(pandas_df))))`
`2217`		`-df_equals(modin_df,pandas_df)`
	`2210`	`+deftest___setitem__with_mismatched_partitions(tmp_path):`
	`2211`	`+unique_filename=get_unique_filename(extension="csv",data_dir=tmp_path)`
	`2212`	`+np.savetxt(`
	`2213`	`+unique_filename,np.random.randint(0,100,size=(200_000,99)),delimiter=","`
	`2214`	`+ )`
	`2215`	`+modin_df=pd.read_csv(unique_filename)`
	`2216`	`+pandas_df=pandas.read_csv(unique_filename)`
	`2217`	`+modin_df["new"]=pd.Series(list(range(len(modin_df))))`
	`2218`	`+pandas_df["new"]=pandas.Series(list(range(len(pandas_df))))`
	`2219`	`+df_equals(modin_df,pandas_df)`
`2218`	`2220`
`2219`	`2221`
`2220`	`2222`	`deftest___setitem__mask():`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitf5f4a1d

File tree

5 files changed

5 files changed

`‎modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py‎`

`‎modin/experimental/pandas/test/test_io_exp.py‎`

`‎modin/pandas/test/dataframe/test_indexing.py‎`

0 commit comments