Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

TEST-#7151: Remove usage ofpandas._testing private module#6988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Draft
anmyachev wants to merge3 commits intomodin-project:master
base:master
Choose a base branch
Loading
fromanmyachev:fix-testsw
Draft
Show file tree
Hide file tree
Changes from1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
NextNext commit
TEST-#7151: Remove usage of pandas._testing private module
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
@anmyachev
anmyachev committedApr 5, 2024
commit9322b35288f8f27505b728b99896a0d166c73428
18 changes: 5 additions & 13 deletionsmodin/conftest.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -16,7 +16,6 @@

import os
import platform
import shutil
import subprocess
import sys
import time
Expand DownExpand Up@@ -340,16 +339,15 @@ def fixture(tmp_path):


@pytest.fixture
def make_parquet_file():
def make_parquet_file(tmp_path):
"""Pytest fixture factory that makes a parquet file/dir for testing.

Yields:
Function that generates a parquet file/dir
"""
filenames = []

def _make_parquet_file(
filename,
filename=None,
nrows=NROWS,
ncols=2,
force=True,
Expand All@@ -369,6 +367,8 @@ def _make_parquet_file(
partitioned_columns: Create a partitioned directory using pandas.
row_group_size: Maximum size of each row group.
"""
if filename is None:
filename = get_unique_filename(extension=".parquet", data_dir=tmp_path)
if force or not os.path.exists(filename):
df = pandas.DataFrame(
{f"col{x + 1}": np.arange(nrows) for x in range(ncols)}
Expand All@@ -395,19 +395,11 @@ def _make_parquet_file(
)
else:
df.to_parquet(filename, row_group_size=row_group_size)
filenames.append(filename)
returnfilename

# Return function that generates parquet files
yield _make_parquet_file

# Delete parquet file that was created
for path in filenames:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
else:
os.remove(path)


@pytest.fixture
def make_sql_connection():
Expand Down
111 changes: 50 additions & 61 deletionsmodin/pandas/test/test_io.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1387,44 +1387,41 @@ def _test_read_parquet(
"Skipping empty filters error case to avoid race condition - see #6460"
)

with ensure_clean(".parquet") as unique_filename:
unique_filename = path_type(unique_filename)
make_parquet_file(
filename=unique_filename,
row_group_size=row_group_size,
range_index_start=range_index_start,
range_index_step=range_index_step,
range_index_name=range_index_name,
)
unique_filename = make_parquet_file(
row_group_size=row_group_size,
range_index_start=range_index_start,
range_index_step=range_index_step,
range_index_name=range_index_name,
)
unique_filename = path_type(unique_filename)

eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
columns=columns,
filters=filters,
)
eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
columns=columns,
filters=filters,
)

@pytest.mark.parametrize(
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
)
def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):
with ensure_clean(".parquet") as unique_filename:
make_parquet_file(filename=unique_filename, row_group_size=100)
unique_filename = make_parquet_file(row_group_size=100)

def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)
def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)

eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)
eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)

# Tests issue #6778
def test_read_parquet_no_extension(self, engine, make_parquet_file):
Expand DownExpand Up@@ -1496,23 +1493,20 @@ def test_read_parquet_range_index(
def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):
if engine == "fastparquet" and os.name == "nt":
pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720")
with ensure_clean(".parquet") as f1, ensure_clean(
".parquet"
) as f2, ensure_clean(".parquet") as f3:
for f in [f1, f2, f3]:
make_parquet_file(filename=f)
eval_io(fn_name="read_parquet", path=[f1, f2, f3], engine=engine)

def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file):
filenames = [None] * 3
for i in range(3):
filenames[i] = make_parquet_file()
eval_io(fn_name="read_parquet", path=filenames, engine=engine)

def test_read_parquet_indexing_by_column(self, engine, make_parquet_file):
# Test indexing into a column of Modin with various parquet file row lengths.
# Specifically, tests for https://github.com/modin-project/modin/issues/3527
# which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1)

nrows = (
MinPartitionSize.get() + 1
) # Use the minimal guaranteed failing value for nrows.
unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path)
make_parquet_file(filename=unique_filename, nrows=nrows)
unique_filename = make_parquet_file(nrows=nrows)

parquet_df = pd.read_parquet(unique_filename, engine=engine)
for col in parquet_df.columns:
Expand DownExpand Up@@ -1731,17 +1725,14 @@ def test_read_parquet_directory_range_index_consistent_metadata(
)
def test_read_parquet_partitioned_directory(
self,
tmp_path,
make_parquet_file,
columns,
filters,
range_index_start,
range_index_step,
engine,
):
unique_filename = get_unique_filename(extension=None, data_dir=tmp_path)
make_parquet_file(
filename=unique_filename,
unique_filename = make_parquet_file(
partitioned_columns=["col1"],
range_index_start=range_index_start,
range_index_step=range_index_step,
Expand DownExpand Up@@ -2063,11 +2054,10 @@ def test_read_parquet_s3_with_column_partitioning(
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_parquet_relative_to_user_home(make_parquet_file):
with ensure_clean(".parquet") as unique_filename:
make_parquet_file(filename=unique_filename)
_check_relative_io(
"read_parquet", unique_filename, "path", storage_default=("Hdk",)
)
unique_filename = make_parquet_file()
_check_relative_io(
"read_parquet", unique_filename, "path", storage_default=("Hdk",)
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
Expand DownExpand Up@@ -2756,20 +2746,19 @@ def test_fwf_file_usecols(self, make_fwf_file, usecols):
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
)
def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):
with ensure_clean(".fwf") as unique_filename:
make_fwf_file(filename=unique_filename)
unique_filename = make_fwf_file()

def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)
def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)

eval_io(
fn_name="read_fwf",
# read_csv kwargs
filepath_or_buffer=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)
eval_io(
fn_name="read_fwf",
# read_csv kwargs
filepath_or_buffer=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)

def test_fwf_file_chunksize(self, make_fwf_file):
unique_filename = make_fwf_file()
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp