Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf59a423

Browse files
authored
Merge pull request#10536 from eric-wieser/fix-10394
BUG: Resize bytes_ columns in genfromtxt (backport of#10401)
2 parents54abb9a +0a87861 commitf59a423

File tree

2 files changed

+27
-17
lines changed

2 files changed

+27
-17
lines changed

‎numpy/lib/npyio.py‎

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,7 +2042,6 @@ def tobytes_first(x, conv):
20422042
strcolidx= [ifor (i,v)inenumerate(column_types)
20432043
ifv==np.unicode_]
20442044

2045-
type_str=np.unicode_
20462045
ifbyte_convertersandstrcolidx:
20472046
# convert strings back to bytes for backward compatibility
20482047
warnings.warn(
@@ -2058,33 +2057,37 @@ def encode_unicode_cols(row_tup):
20582057

20592058
try:
20602059
data= [encode_unicode_cols(r)forrindata]
2061-
type_str=np.bytes_
20622060
exceptUnicodeEncodeError:
20632061
pass
2062+
else:
2063+
foriinstrcolidx:
2064+
column_types[i]=np.bytes_
20642065

2066+
# Update string types to be the right length
2067+
sized_column_types=column_types[:]
2068+
fori,col_typeinenumerate(column_types):
2069+
ifnp.issubdtype(col_type,np.character):
2070+
n_chars=max(len(row[i])forrowindata)
2071+
sized_column_types[i]= (col_type,n_chars)
20652072

2066-
# ... and take the largest number of chars.
2067-
foriinstrcolidx:
2068-
max_line_length=max(len(row[i])forrowindata)
2069-
column_types[i]=np.dtype((type_str,max_line_length))
2070-
#
20712073
ifnamesisNone:
2072-
# If the dtype is uniform, don't define names, else use ''
2073-
base=set([c.typeforcinconvertersifc._checked])
2074+
# If the dtype is uniform (before sizing strings)
2075+
base=set([
2076+
c_type
2077+
forc,c_typeinzip(converters,column_types)
2078+
ifc._checked])
20742079
iflen(base)==1:
2075-
ifstrcolidx:
2076-
(ddtype,mdtype)= (type_str,bool)
2077-
else:
2078-
(ddtype,mdtype)= (list(base)[0],bool)
2080+
uniform_type,=base
2081+
(ddtype,mdtype)= (uniform_type,bool)
20792082
else:
20802083
ddtype= [(defaultfmt%i,dt)
2081-
for (i,dt)inenumerate(column_types)]
2084+
for (i,dt)inenumerate(sized_column_types)]
20822085
ifusemask:
20832086
mdtype= [(defaultfmt%i,bool)
2084-
for (i,dt)inenumerate(column_types)]
2087+
for (i,dt)inenumerate(sized_column_types)]
20852088
else:
2086-
ddtype=list(zip(names,column_types))
2087-
mdtype=list(zip(names, [bool]*len(column_types)))
2089+
ddtype=list(zip(names,sized_column_types))
2090+
mdtype=list(zip(names, [bool]*len(sized_column_types)))
20882091
output=np.array(data,dtype=ddtype)
20892092
ifusemask:
20902093
outputmask=np.array(masks,dtype=mdtype)

‎numpy/lib/tests/test_io.py‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,13 @@ def test_recfromcsv(self):
20562056
assert_(isinstance(test,np.recarray))
20572057
assert_equal(test,control)
20582058

2059+
#gh-10394
2060+
data=TextIO('color\n"red"\n"blue"')
2061+
test=np.recfromcsv(data,converters={0:lambdax:x.strip(b'\"')})
2062+
control=np.array([('red',), ('blue',)],dtype=[('color', (bytes,4))])
2063+
assert_equal(test.dtype,control.dtype)
2064+
assert_equal(test,control)
2065+
20592066
deftest_max_rows(self):
20602067
# Test the `max_rows` keyword argument.
20612068
data='1 2\n3 4\n5 6\n7 8\n9 10\n'

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp