deftest_count(self):
	self.checkequal(3,'aaa','count','a')
	self.checkequal(0,'aaa','count','b')
	self.checkequal(3,'aaa','count','a')
	self.checkequal(0,'aaa','count','b')
	self.checkequal(3,'aaa','count','a')
	self.checkequal(0,'aaa','count','b')
	self.checkequal(0,'aaa','count','b')
	self.checkequal(2,'aaa','count','a',1)
	self.checkequal(0,'aaa','count','a',10)
	self.checkequal(1,'aaa','count','a',-1)
	self.checkequal(3,'aaa','count','a',-10)
	self.checkequal(1,'aaa','count','a',0,1)
	self.checkequal(3,'aaa','count','a',0,10)
	self.checkequal(2,'aaa','count','a',0,-1)
	self.checkequal(0,'aaa','count','a',0,-10)
	self.checkequal(3,'aaa','count','',1)
	self.checkequal(1,'aaa','count','',3)
	self.checkequal(0,'aaa','count','',10)
	self.checkequal(2,'aaa','count','',-1)
	self.checkequal(4,'aaa','count','',-10)

	self.checkequal(1,'','count','')
	self.checkequal(0,'','count','',1,1)
	self.checkequal(0,'','count','',sys.maxsize,0)

	self.checkequal(0,'','count','xx')
	self.checkequal(0,'','count','xx',1,1)
	self.checkequal(0,'','count','xx',sys.maxsize,0)

	self.checkraises(TypeError,'hello','count')

	ifself.contains_bytes:
	self.checkequal(0,'hello','count',42)
	else:
	self.checkraises(TypeError,'hello','count',42)

	# For a variety of combinations,
	# verify that str.count() matches an equivalent function
	# replacing all occurrences and then differencing the string lengths
	charset= ['','a','b']
	digits=7
	base=len(charset)
	teststrings=set()
	foriinrange(base**digits):
	entry= []
	forjinrange(digits):
	i,m=divmod(i,base)
	entry.append(charset[m])
	teststrings.add(''.join(entry))
	teststrings= [self.fixtype(ts)fortsinteststrings]
	foriinteststrings:
	n=len(i)
	forjinteststrings:
	r1=i.count(j)
	ifj:
	r2,rem=divmod(n-len(i.replace(j,self.fixtype(''))),
	len(j))
	else:
	r2,rem=len(i)+1,0
	ifremorr1!=r2:
	self.assertEqual(rem,0,'%s != 0 for %s'% (rem,i))
	self.assertEqual(r1,r2,'%s != %s for %s'% (r1,r2,i))

Question: what is the historical context on whyPyUnicode_Count is not reused inunicode_count? They look pretty similar:

cpython/Objects/unicodeobject.c

Lines 8968 to 9040 incbdeda8

	Py_ssize_t
	PyUnicode_Count(PyObject*str,
	PyObject*substr,
	Py_ssize_tstart,
	Py_ssize_tend)
	{
	Py_ssize_tresult;
	intkind1,kind2;
	constvoidbuf1=NULL,buf2=NULL;
	Py_ssize_tlen1,len2;

	if (ensure_unicode(str)<0\|\|ensure_unicode(substr)<0)
	return-1;

	kind1=PyUnicode_KIND(str);
	kind2=PyUnicode_KIND(substr);
	if (kind1<kind2)
	return0;

	len1=PyUnicode_GET_LENGTH(str);
	len2=PyUnicode_GET_LENGTH(substr);
	ADJUST_INDICES(start,end,len1);
	if (end-start<len2)
	return0;

	buf1=PyUnicode_DATA(str);
	buf2=PyUnicode_DATA(substr);
	if (kind2!=kind1) {
	buf2=unicode_askind(kind2,buf2,len2,kind1);
	if (!buf2)
	gotoonError;
	}

	switch (kind1) {
	casePyUnicode_1BYTE_KIND:
	if (PyUnicode_IS_ASCII(str)&&PyUnicode_IS_ASCII(substr))
	result=asciilib_count(
	((constPy_UCS1*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	else
	result=ucs1lib_count(
	((constPy_UCS1*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_2BYTE_KIND:
	result=ucs2lib_count(
	((constPy_UCS2*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_4BYTE_KIND:
	result=ucs4lib_count(
	((constPy_UCS4*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	default:
	Py_UNREACHABLE();
	}

	assert((kind2!=kind1)== (buf2!=PyUnicode_DATA(substr)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);

	returnresult;
	onError:
	assert((kind2!=kind1)== (buf2!=PyUnicode_DATA(substr)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);
	return-1;
	}

And

cpython/Objects/unicodeobject.c

Lines 10854 to 10916 incbdeda8

	staticPyObject*
	unicode_count(PyObjectself,PyObjectargs)
	{
	PyObjectsubstring=NULL;/ initialize to fix a compiler warning */
	Py_ssize_tstart=0;
	Py_ssize_tend=PY_SSIZE_T_MAX;
	PyObject*result;
	intkind1,kind2;
	constvoidbuf1,buf2;
	Py_ssize_tlen1,len2,iresult;

	if (!parse_args_finds_unicode("count",args,&substring,&start,&end))
	returnNULL;

	kind1=PyUnicode_KIND(self);
	kind2=PyUnicode_KIND(substring);
	if (kind1<kind2)
	returnPyLong_FromLong(0);

	len1=PyUnicode_GET_LENGTH(self);
	len2=PyUnicode_GET_LENGTH(substring);
	ADJUST_INDICES(start,end,len1);
	if (end-start<len2)
	returnPyLong_FromLong(0);

	buf1=PyUnicode_DATA(self);
	buf2=PyUnicode_DATA(substring);
	if (kind2!=kind1) {
	buf2=unicode_askind(kind2,buf2,len2,kind1);
	if (!buf2)
	returnNULL;
	}
	switch (kind1) {
	casePyUnicode_1BYTE_KIND:
	iresult=ucs1lib_count(
	((constPy_UCS1*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_2BYTE_KIND:
	iresult=ucs2lib_count(
	((constPy_UCS2*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_4BYTE_KIND:
	iresult=ucs4lib_count(
	((constPy_UCS4*)buf1)+start,end-start,
	buf2,len2,PY_SSIZE_T_MAX
	);
	break;
	default:
	Py_UNREACHABLE();
	}

	result=PyLong_FromSsize_t(iresult);

	assert((kind2==kind1)== (buf2==PyUnicode_DATA(substring)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);

	returnresult;
	}

Issue:Metabug: Improving C-level coverage #94808

pythongh-94808: CoverPyUnicode_Countin CAPI

22aee9a

bedevere-bot added the awaiting review label

Sep 19, 2022

sobolevn added tests

Tests in the Lib/test dir

skip news labels

Sep 19, 2022

sobolevn requested a review fromencukou

September 19, 2022 09:49

Copy link

Contributor

mdboom commentedSep 22, 2022

Question: what is the historical context on why PyUnicode_Count is not reused in unicode_count?

It looks like these both date to the same commit d57fd91 from 2000-03-10. They were pretty different then, but are almost the same now. I see some benefit in makingunicode_count callPyUnicode_Count to make sure they remain consistent, but I could also see someone seeing this as "churn for churn's sake".

Note there is alsoanylib_count which is a subset ofunicode_count andPyUnicode_Count.

There are a few other instances of this kind of thing I've come across looking at coverage -- it would be good to get a core developer's take on whether merging internal and external functions where they are clearly wrappable like this would be welcome.

encukou approved these changes

Oct 6, 2022

View reviewed changes

bedevere-bot added awaiting merge and removed awaiting review labels

Oct 6, 2022

encukou merged commite63d7da intopython:main

Oct 6, 2022

bedevere-bot removed the awaiting merge label

Oct 6, 2022

Copy link

Member

encukou commentedOct 6, 2022

Apparentlyunicode_count missedan optimization in 2011, otherwise they're equivalent (except arg parsing & converting the return value). Merging them could add the optimization tounicode_count.
If you want to work on that, note that there's alsoanylib_count that duplicates the mainswitch.

Copy link

MemberAuthor

sobolevn commentedOct 6, 2022

Thanks! Yes, I would like to do that! I will open a new issue for it.

carljm added a commit to carljm/cpython that referenced this pull request

Oct 6, 2022

Merge branch 'main' into dictwatch

7938d6f

* main:pythonGH-88050: fix race in closing subprocess pipe in asyncio  (python#97951)pythongh-93738: Disallow pre-v3 syntax in the C domain (python#97962)pythongh-95986: Fix the example using match keyword (python#95989)pythongh-97897: Prevent os.mkfifo and os.mknod segfaults with macOS 13 SDK (pythonGH-97944)pythongh-94808: Cover `PyUnicode_Count` in CAPI (python#96929)pythongh-94808: Cover `PyObject_PyBytes` case with custom `__bytes__` method (python#96610)pythongh-95691: Doc BufferedWriter and BufferedReader (python#95703)pythonGH-88968: Add notes about socket ownership transfers (python#97936)pythongh-96865: [Enum] fix Flag to use CONFORM boundary (pythonGH-97528)

sobolevn mentioned this pull request

Oct 6, 2022

UnifyPyUncode_Count andunicode_count#97982

Closed

carljm added a commit to carljm/cpython that referenced this pull request

Oct 8, 2022

Merge branch 'main' into typewatch

1af2a98

* main: (53 commits)pythongh-94808: Coverage: Test that maximum indentation level is handled (python#95926)pythonGH-88050: fix race in closing subprocess pipe in asyncio  (python#97951)pythongh-93738: Disallow pre-v3 syntax in the C domain (python#97962)pythongh-95986: Fix the example using match keyword (python#95989)pythongh-97897: Prevent os.mkfifo and os.mknod segfaults with macOS 13 SDK (pythonGH-97944)pythongh-94808: Cover `PyUnicode_Count` in CAPI (python#96929)pythongh-94808: Cover `PyObject_PyBytes` case with custom `__bytes__` method (python#96610)pythongh-95691: Doc BufferedWriter and BufferedReader (python#95703)pythonGH-88968: Add notes about socket ownership transfers (python#97936)pythongh-96865: [Enum] fix Flag to use CONFORM boundary (pythonGH-97528)pythongh-65961: Raise `DeprecationWarning` when `__package__` differs from `__spec__.parent` (python#97879)  docs(typing): add "see PEP 675" to LiteralString (python#97926)pythongh-97850: Remove all known instances of module_repr() (python#97876)  I changed my surname early this year (python#96671)pythongh-93738: Documentation C syntax (:c:type:<C type> -> :c:expr:<C type>) (python#97768)pythongh-91539: improve performance of get_proxies_environment  (python#91566)  build(deps): bump actions/stale from 5 to 6 (python#97701)pythonGH-95172 Make the same version `versionadded` oneline (python#95172)pythongh-88050: Fix asyncio subprocess to kill process cleanly when process is blocked (python#32073)pythongh-93738: Documentation C syntax (Function glob patterns -> literal markup) (python#97774)  ...

mpage pushed a commit to mpage/cpython that referenced this pull request

Oct 11, 2022

pythongh-94808: CoverPyUnicode_Countin CAPI (python#96929)

d458682

Labels

skip news tests

Tests in the Lib/test dir

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

gh-94808: Cover`PyUnicode_Count` in CAPI#96929

gh-94808: Cover`PyUnicode_Count` in CAPI#96929

Uh oh!

Conversation

sobolevn commentedSep 19, 2022•
edited by bedevere-bot
Loading

Uh oh!