May 8, 2025 · May 8, 2025 · May 8, 2025 · May 8, 2025 · May 12, 2025 · May 12, 2025
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-11-22-57.gh-issue-132380._9vB7H.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-11-22-57.gh-issue-132380._9vB7H.rst
 For free-threaded build, allow non-interned strings to be cached in the type
 lookup cache.  This helps lock contention by reducing cache misses.
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
   MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large
   strings are used as attribute names. */
 #define MCACHE_MAX_ATTR_SIZE    100
 #define MCACHE_HASH(version, name_hash)                                 \
        (((unsigned int)(version) ^ (unsigned int)(name_hash))          \
         & ((1 << MCACHE_SIZE_EXP) - 1))

 #define MCACHE_HASH_METHOD(type, name)                                  \
    MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag),   \
                ((Py_ssize_t)(name)) >> 3)
 static inline unsigned int
 mcache_name_hash(PyTypeObject *type, PyObject *name)
 {
    Py_hash_t name_hash;
 #if Py_GIL_DISABLED
    // Cache misses are relatively more expensive for the free-threaded build.
    // So we use the unicode string hash and unicode compare for caching
    // names.  This allows caching of non-interned strings.
    assert(PyUnicode_CheckExact(name));
    name_hash = _PyObject_HashFast(name);
    // should not fail to hash an exact unicode object
    assert(name_hash != -1);
 #else
    // Use the pointer value of the string for the hash and the compare.  This
    // is faster but non-interned strings can't use the cache.
    name_hash = ((Py_hash_t)(name)) >> 3;
 #endif
    unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag);
    return (((unsigned int)(version) ^ (unsigned int)(name_hash)) &
            ((1 << MCACHE_SIZE_EXP) - 1));
 }

 static inline struct type_cache_entry *
 mcache_get_entry(PyTypeObject *type, PyObject *name, struct type_cache *cache)
 {
 #ifdef Py_GIL_DISABLED
    if (!PyUnicode_CheckExact(name)) {
        return NULL;
    }
 #endif
    unsigned int h = mcache_name_hash(type, name);
    return &cache->hashtable[h];
 }

 static inline int
 mcache_name_eq(PyObject *entry_name,  PyObject *name)
 {
 #ifdef Py_GIL_DISABLED
    if (entry_name == name) {
        return 1;
    }
    if (entry_name == NULL || entry_name == Py_None) {
        return 0;
    }
    assert(PyUnicode_CheckExact(entry_name));
    assert(PyUnicode_CheckExact(name));
    if (_PyObject_HashFast(entry_name) != _PyObject_HashFast(name)) {
        return 0;
    }
    return _PyUnicode_Equal(entry_name, name);
 #else
    return entry_name == name;
 #endif
 }

 #define MCACHE_CACHEABLE_NAME(name)                             \
        PyUnicode_CheckExact(name) &&                           \
        (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE)
 unsigned int
 _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out)
 {
    unsigned int h = MCACHE_HASH_METHOD(type, name);
    struct type_cache *cache = get_type_cache();
    struct type_cache_entry *entry =&cache->hashtable[h];
    struct type_cache_entry *entry =mcache_get_entry(type, name,cache);
 #ifdef Py_GIL_DISABLED
    // synchronize-with other writing threads by doing an acquire load on the sequence
    while (1) {
    while (entry != NULL) {
        uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
        uint32_t entry_version = _Py_atomic_load_uint32_acquire(&entry->version);
        uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
        if (entry_version == type_version &&
            _Py_atomic_load_ptr_relaxed(&entry->name) ==name) {
 mcache_name_eq(_Py_atomic_load_ptr_relaxed(&entry->name),name)) {
            OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
            OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
            if (_Py_TryXGetStackRef(&entry->value, out)) {
        }
    }
 #else
    if (entry->version == type->tp_version_tag && entry->name ==name) {
    if (entry->version == type->tp_version_tag &&mcache_name_eq(entry->name,name)) {
        assert(type->tp_version_tag);
        OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
        OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));

    if (has_version) {
 #if Py_GIL_DISABLED
        assert(entry != NULL);
        update_cache_gil_disabled(entry, name, assigned_version, res);
 #else
        PyObject *old_value = update_cache(entry, name, assigned_version, res);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		For free-threaded build, allow non-interned strings to be cached in the type
Copy link Contributor colesburyMay 12, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I don't think this caches non-interned strings. It seems to me that it allows non-interned strings as the lookup key, but the cache still only contains interned strings.
		lookup cache. This helps lock contention by reducing cache misses.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -43,13 +43,62 @@ class object "PyObject *" "&PyBaseObject_Type"
		MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large
		strings are used as attribute names. */
		#define MCACHE_MAX_ATTR_SIZE 100
		#define MCACHE_HASH(version, name_hash) \
		(((unsigned int)(version) ^ (unsigned int)(name_hash)) \
		& ((1 << MCACHE_SIZE_EXP) - 1))

		#define MCACHE_HASH_METHOD(type, name) \
		MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag), \
		((Py_ssize_t)(name)) >> 3)
		static inline unsigned int
		mcache_name_hash(PyTypeObject type, PyObject name)
		{
		Py_hash_t name_hash;
		#if Py_GIL_DISABLED
		// Cache misses are relatively more expensive for the free-threaded build.
		// So we use the unicode string hash and unicode compare for caching
		// names. This allows caching of non-interned strings.
		assert(PyUnicode_CheckExact(name));
		name_hash = _PyObject_HashFast(name);
		// should not fail to hash an exact unicode object
		assert(name_hash != -1);
		#else
		// Use the pointer value of the string for the hash and the compare. This
		// is faster but non-interned strings can't use the cache.
		name_hash = ((Py_hash_t)(name)) >> 3;
		#endif
		unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag);
		return (((unsigned int)(version) ^ (unsigned int)(name_hash)) &
		((1 << MCACHE_SIZE_EXP) - 1));
		}

		static inline struct type_cache_entry *
		mcache_get_entry(PyTypeObject type, PyObject name, struct type_cache *cache)
		{
		#ifdef Py_GIL_DISABLED
		if (!PyUnicode_CheckExact(name)) {
		return NULL;
		}
		#endif
		unsigned int h = mcache_name_hash(type, name);
		return &cache->hashtable[h];
		}

		static inline int
		mcache_name_eq(PyObject entry_name, PyObject name)
		{
		#ifdef Py_GIL_DISABLED
		if (entry_name == name) {
		return 1;
		}
		if (entry_name == NULL \|\| entry_name == Py_None) {
		return 0;
		}
		assert(PyUnicode_CheckExact(entry_name));
		assert(PyUnicode_CheckExact(name));
		if (_PyObject_HashFast(entry_name) != _PyObject_HashFast(name)) {
		return 0;
		}
		return _PyUnicode_Equal(entry_name, name);
		#else
		return entry_name == name;
		#endif
		}

		#define MCACHE_CACHEABLE_NAME(name) \
		PyUnicode_CheckExact(name) && \
		(PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE)
Expand DownExpand Up		@@ -5721,17 +5770,16 @@ _PyType_LookupRefAndVersion(PyTypeObject type, PyObject name, unsigned int *ve
		unsigned int
		_PyType_LookupStackRefAndVersion(PyTypeObject type, PyObject name, _PyStackRef *out)
		{
		unsigned int h = MCACHE_HASH_METHOD(type, name);
		struct type_cache *cache = get_type_cache();
		struct type_cache_entry *entry =&cache->hashtable[h];
		struct type_cache_entry *entry =mcache_get_entry(type, name,cache);
		#ifdef Py_GIL_DISABLED
		// synchronize-with other writing threads by doing an acquire load on the sequence
		while (1) {
		while (entry != NULL) {
		uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
		uint32_t entry_version = _Py_atomic_load_uint32_acquire(&entry->version);
		uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
		if (entry_version == type_version &&
		_Py_atomic_load_ptr_relaxed(&entry->name) ==name) {
		mcache_name_eq(_Py_atomic_load_ptr_relaxed(&entry->name),name)) {
		OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
		OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
		if (_Py_TryXGetStackRef(&entry->value, out)) {
Expand All		@@ -5752,7 +5800,7 @@ _PyType_LookupStackRefAndVersion(PyTypeObject type, PyObject name, _PyStackRef
		}
		}
		#else
		if (entry->version == type->tp_version_tag && entry->name ==name) {
		if (entry->version == type->tp_version_tag &&mcache_name_eq(entry->name,name)) {
		assert(type->tp_version_tag);
		OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
		OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
Expand DownExpand Up		@@ -5804,6 +5852,7 @@ _PyType_LookupStackRefAndVersion(PyTypeObject type, PyObject name, _PyStackRef

		if (has_version) {
		#if Py_GIL_DISABLED
		assert(entry != NULL);
		update_cache_gil_disabled(entry, name, assigned_version, res);
		#else
		PyObject *old_value = update_cache(entry, name, assigned_version, res);
Expand Down