Apr 18, 2025 · Apr 29, 2023 · Apr 29, 2023 · Apr 29, 2023 · Apr 30, 2023 · Apr 30, 2023
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
    Py_UCS4 *maxchar,
    int forward);

 /* Dedent a string.
   Behaviour is expected to be an exact match of `textwrap.dedent`.
   Return a new reference on success, NULL with exception set on error.
   */
 PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode);

 /* --- Misc functions ----------------------------------------------------- */

 extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
        )
        self.assertEqual(res2int(res), (6000, 6000))

    def test_cmd_dedent(self):
        # test that -c auto-dedents its arguments
        from textwrap import dedent
        test_cases = [
            (
                """
                    print('space-auto-dedent')
                """,
                "space-auto-dedent",
            ),
            (
                dedent(
                    """
                ^^^print('tab-auto-dedent')
                """
                ).replace("^", "\t"),
                "tab-auto-dedent",
            ),
            (
                dedent(
                    """
                ^^if 1:
                ^^^^print('mixed-auto-dedent-1')
                ^^print('mixed-auto-dedent-2')
                """
                ).replace("^", "\t \t"),
                "mixed-auto-dedent-1\nmixed-auto-dedent-2",
            ),
            (
                '''
                    data = """$

                    this data has an empty newline above and a newline with spaces below $
                                            $
                    """$
                    if 1:         $
                        print(repr(data))$
                '''.replace(
                    "$", ""
                ),
                # Note: entirely blank lines are normalized to \n, even if they
                # are part of a data string. This is consistent with
                # textwrap.dedent behavior, but might not be intuitive.
                "'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
            ),
        ]
        for code, expected in test_cases:
            # Run the auto-dedent case
            args1 = sys.executable, '-c', code
            proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
            self.assertEqual(proc1.returncode, 0, proc1)
            output1 = proc1.stdout.strip().decode(encoding='utf-8')

            # Manually dedent beforehand, check the result is the same.
            args2 = sys.executable, '-c', dedent(code)
            proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
            self.assertEqual(proc2.returncode, 0, proc2)
            output2 = proc2.stdout.strip().decode(encoding='utf-8')

            self.assertEqual(output1, output2)
            self.assertEqual(output1.replace('\r\n', '\n'), expected)

    def test_cmd_dedent_failcase(self):
        # Mixing tabs and spaces is not allowed
        from textwrap import dedent
        template = dedent(
            '''
            -+if 1:
            +-++ print('will fail')
            ''')
        code = template.replace('-', ' ').replace('+', '\t')
        assert_python_failure('-c', code)
        code = template.replace('-', '\t').replace('+', ' ')
        assert_python_failure('-c', code)

    def test_cpu_count(self):
        code = "import os; print(os.cpu_count(), os.process_cpu_count())"
        res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
 String arguments passed to "-c" are now automatically dedented as if by
 :func:`textwrap.dedent`. This allows "python -c" invocations to be indented
 in shell scripts without causing indentation errors. (Patch by Jon Crall and
 Steven Sun)
diff --git a/Modules/main.c b/Modules/main.c
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_pythonrun.h"     // _PyRun_AnyFileObject()
 #include "pycore_unicodeobject.h" // _PyUnicode_Dedent()

 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()
        return pymain_exit_err_print();
    }

    Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
    if (unicode == NULL) {
        goto error;
    }

    bytes = PyUnicode_AsUTF8String(unicode);
    Py_DECREF(unicode);
    if (bytes == NULL) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
    return Py_BuildValue("(N)", copy);
 }

 /* Dedent a string.
   Behaviour is expected to be an exact match of `textwrap.dedent`.
   Return a new reference on success, NULL with exception set on error.
   */
 PyAPI_FUNC(PyObject *)
 _PyUnicode_Dedent(PyObject *unicode)
 {
    Py_ssize_t src_len = 0;
    const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
    if (!src) {
        return NULL;
    }
    if (src_len <= 0) {
        Py_INCREF(unicode);
        return unicode;
    }

    const char *end = src + src_len;

    // [candidate_start, candidate_start + candidate_len)
    // describes the current longest common leading whitespace
    const char *candidate_start = NULL;
    Py_ssize_t candidate_len = 0;

    for (const char *iter = src; iter < end; ++iter) {
        const char *line_start = iter;
        const char *leading_whitespace_end = NULL;

        // scan the whole line
        while (iter < end && *iter != '\n') {
            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
                /* `iter` points to the first non-whitespace character
                   in this line */
                if (iter == line_start) {
                    // some line has no indent, fast exit!
                    Py_INCREF(unicode);
                    return unicode;
                }
                leading_whitespace_end = iter;
            }
            ++iter;
        }

        // if this line has all white space, skip it
        if (!leading_whitespace_end) {
            continue;
        }

        if (!candidate_start) {
            // update the first leading whitespace
            candidate_start = line_start;
            candidate_len = leading_whitespace_end - line_start;
            assert(candidate_len > 0);
        } else {
            /* We then compare with the current longest leading whitespace.

               [line_start, leading_whitespace_end) is the leading whitespace of
               this line,

               [candidate_start, candidate_start + candidate_len)
               is the leading whitespace of the current longest leading
               whitespace. */
            Py_ssize_t new_candidate_len = 0;

            for (const char *candidate_iter = candidate_start,
                            *line_iter = line_start;
                 candidate_iter < candidate_start + candidate_len &&
                 line_iter < leading_whitespace_end;
                 ++candidate_iter, ++line_iter) {
                if (*candidate_iter != *line_iter) {
                    break;
                }
                ++new_candidate_len;
            }

            candidate_len = new_candidate_len;
            if (candidate_len == 0) {
                // No common things now, fast exit!
                Py_INCREF(unicode);
                return unicode;
            }
        }
    }

    assert(candidate_len >= 0);
    /* Final check for strings that contain nothing but whitespace. */
    if (candidate_len == 0) {
        Py_INCREF(unicode);
        return unicode;
    }

    // now we should trigger a dedent
    char *dest = PyMem_Malloc(src_len);
    if (!dest) {
        PyErr_NoMemory();
        return NULL;
    }
    char *dest_iter = dest;

    for (const char *iter = src; iter < end; ++iter) {
        const char *line_start = iter;
        bool in_leading_space = true;

        // iterate over a line to find the end of a line
        while (iter < end && *iter != '\n') {
            if (in_leading_space && *iter != ' ' && *iter != '\t') {
                in_leading_space = false;
            }
            ++iter;
        }

        // invariant: *iter == '\n' or iter == end
        bool append_newline = iter < end;

        // if this line has all white space, write '\n' and continue
        if (in_leading_space && append_newline) {
            *dest_iter++ = '\n';
            continue;
        }

        /* copy [new_line_start + candidate_len, iter) to buffer, then
            conditionally append '\n' */

        Py_ssize_t new_line_len = iter - line_start - candidate_len;
        assert(new_line_len >= 0);
        memcpy(dest_iter, line_start + candidate_len, new_line_len);

        dest_iter += new_line_len;

        if (append_newline) {
            *dest_iter++ = '\n';
        }
    }

    Py_ssize_t dest_len = dest_iter - dest;

    PyObject *res = PyUnicode_FromStringAndSize(dest, dest_len);
    PyMem_Free(dest);
    return res;
 }

 static PyMethodDef unicode_methods[] = {
    UNICODE_ENCODE_METHODDEF
    UNICODE_REPLACE_METHODDEF
Original file line number	Diff line number	Diff line change
Expand Up		@@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
		Py_UCS4 *maxchar,
		int forward);

		/* Dedent a string.
		Behaviour is expected to be an exact match of `textwrap.dedent`.
		Return a new reference on success, NULL with exception set on error.
		*/
		PyAPI_FUNC(PyObject) _PyUnicode_Dedent(PyObject unicode);
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved

		/* --- Misc functions ----------------------------------------------------- */

		extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -1051,6 +1051,81 @@ def test_int_max_str_digits(self):
		)
		self.assertEqual(res2int(res), (6000, 6000))

		def test_cmd_dedent(self):
		# test that -c auto-dedents its arguments
		from textwrap import dedent
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved
		test_cases = [
sunmy2019 marked this conversation as resolved. Show resolvedHide resolved
		(
		"""
		print('space-auto-dedent')
		""",
		"space-auto-dedent",
		),
		(
		dedent(
		"""
		^^^print('tab-auto-dedent')
		"""
		).replace("^", "\t"),
		"tab-auto-dedent",
		),
		(
		dedent(
		"""
		^^if 1:
		^^^^print('mixed-auto-dedent-1')
		^^print('mixed-auto-dedent-2')
		"""
		).replace("^", "\t \t"),
		"mixed-auto-dedent-1\nmixed-auto-dedent-2",
		),
		(
		'''
		data = """$

		this data has an empty newline above and a newline with spaces below $
		$
		"""$
		if 1: $
		print(repr(data))$
		'''.replace(
		"$", ""
		),
		# Note: entirely blank lines are normalized to \n, even if they
		# are part of a data string. This is consistent with
		# textwrap.dedent behavior, but might not be intuitive.
		"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
		),
		]
		for code, expected in test_cases:
		# Run the auto-dedent case
		args1 = sys.executable, '-c', code
		proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
		self.assertEqual(proc1.returncode, 0, proc1)
		output1 = proc1.stdout.strip().decode(encoding='utf-8')

		# Manually dedent beforehand, check the result is the same.
		args2 = sys.executable, '-c', dedent(code)
		proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
		self.assertEqual(proc2.returncode, 0, proc2)
		output2 = proc2.stdout.strip().decode(encoding='utf-8')

		self.assertEqual(output1, output2)
		self.assertEqual(output1.replace('\r\n', '\n'), expected)

		def test_cmd_dedent_failcase(self):
		# Mixing tabs and spaces is not allowed
		from textwrap import dedent
		template = dedent(
		'''
		-+if 1:
		+-++ print('will fail')
		''')
		code = template.replace('-', ' ').replace('+', '\t')
		assert_python_failure('-c', code)
		code = template.replace('-', '\t').replace('+', ' ')
		assert_python_failure('-c', code)

		def test_cpu_count(self):
		code = "import os; print(os.cpu_count(), os.process_cpu_count())"
		res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		String arguments passed to "-c" are now automatically dedented as if by
picnixz marked this conversation as resolved. Show resolvedHide resolved
		:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
picnixz marked this conversation as resolved. Show resolvedHide resolved
		in shell scripts without causing indentation errors. (Patch by Jon Crall and
		Steven Sun)
picnixz marked this conversation as resolved. Show resolvedHide resolved
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,7 @@
		#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
		#include "pycore_pystate.h" // _PyInterpreterState_GET()
		#include "pycore_pythonrun.h" // _PyRun_AnyFileObject()
		#include "pycore_unicodeobject.h" // _PyUnicode_Dedent()

		/* Includes for exit_sigint() */
		#include <stdio.h> // perror()
Expand DownExpand Up		@@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command)
		return pymain_exit_err_print();
		}

		Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
		if (unicode == NULL) {
		goto error;
		}

		bytes = PyUnicode_AsUTF8String(unicode);
		Py_DECREF(unicode);
		if (bytes == NULL) {
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14270,6 +14270,147 @@ unicode_getnewargs(PyObject v, PyObject Py_UNUSED(ignored))
		return Py_BuildValue("(N)", copy);
		}

		/* Dedent a string.
		Behaviour is expected to be an exact match of `textwrap.dedent`.
		Return a new reference on success, NULL with exception set on error.
		*/
		PyAPI_FUNC(PyObject *)
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved
		_PyUnicode_Dedent(PyObject *unicode)
		{
		Py_ssize_t src_len = 0;
		const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
		if (!src) {
		return NULL;
		}
		if (src_len <= 0) {
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved
		Py_INCREF(unicode);
		return unicode;
		}

		const char *end = src + src_len;

		// [candidate_start, candidate_start + candidate_len)
		// describes the current longest common leading whitespace
		const char *candidate_start = NULL;
		Py_ssize_t candidate_len = 0;

		for (const char *iter = src; iter < end; ++iter) {
		const char *line_start = iter;
		const char *leading_whitespace_end = NULL;

		// scan the whole line
		while (iter < end && *iter != '\n') {
		if (!leading_whitespace_end && iter != ' ' && iter != '\t') {
		/* `iter` points to the first non-whitespace character
		in this line */
		if (iter == line_start) {
		// some line has no indent, fast exit!
		Py_INCREF(unicode);
		return unicode;
		}
		leading_whitespace_end = iter;
		}
		++iter;
		}

		// if this line has all white space, skip it
		if (!leading_whitespace_end) {
		continue;
		}

		if (!candidate_start) {
		// update the first leading whitespace
		candidate_start = line_start;
		candidate_len = leading_whitespace_end - line_start;
		assert(candidate_len > 0);
		} else {
		/* We then compare with the current longest leading whitespace.
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved

		[line_start, leading_whitespace_end) is the leading whitespace of
		this line,

		[candidate_start, candidate_start + candidate_len)
		is the leading whitespace of the current longest leading
		whitespace. */
		Py_ssize_t new_candidate_len = 0;

sunmy2019 marked this conversation as resolved. Show resolvedHide resolved
		for (const char *candidate_iter = candidate_start,
		*line_iter = line_start;
		candidate_iter < candidate_start + candidate_len &&
		line_iter < leading_whitespace_end;
		++candidate_iter, ++line_iter) {
		if (candidate_iter != line_iter) {
		break;
		}
		++new_candidate_len;
		}
sunmy2019 marked this conversation as resolved. OutdatedShow resolvedHide resolved

		candidate_len = new_candidate_len;
		if (candidate_len == 0) {
		// No common things now, fast exit!
		Py_INCREF(unicode);
		return unicode;
		}
		}
		}

		assert(candidate_len >= 0);
		/* Final check for strings that contain nothing but whitespace. */
		if (candidate_len == 0) {
		Py_INCREF(unicode);
		return unicode;
		}

sunmy2019 marked this conversation as resolved. Show resolvedHide resolved
		// now we should trigger a dedent
		char *dest = PyMem_Malloc(src_len);
		if (!dest) {
		PyErr_NoMemory();
		return NULL;
		}
		char *dest_iter = dest;

		for (const char *iter = src; iter < end; ++iter) {
		const char *line_start = iter;
		bool in_leading_space = true;

		// iterate over a line to find the end of a line
		while (iter < end && *iter != '\n') {
		if (in_leading_space && iter != ' ' && iter != '\t') {
		in_leading_space = false;
		}
		++iter;
		}

		// invariant: *iter == '\n' or iter == end
		bool append_newline = iter < end;

		// if this line has all white space, write '\n' and continue
		if (in_leading_space && append_newline) {
		*dest_iter++ = '\n';
		continue;
		}

		/* copy [new_line_start + candidate_len, iter) to buffer, then
		conditionally append '\n' */

		Py_ssize_t new_line_len = iter - line_start - candidate_len;
		assert(new_line_len >= 0);
		memcpy(dest_iter, line_start + candidate_len, new_line_len);

		dest_iter += new_line_len;

		if (append_newline) {
		*dest_iter++ = '\n';
		}
		}

		Py_ssize_t dest_len = dest_iter - dest;

		PyObject *res = PyUnicode_FromStringAndSize(dest, dest_len);
methane marked this conversation as resolved. OutdatedShow resolvedHide resolved
		PyMem_Free(dest);
		return res;
		}

		static PyMethodDef unicode_methods[] = {
		UNICODE_ENCODE_METHODDEF
		UNICODE_REPLACE_METHODDEF
Expand Down