Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork32.4k
Closed
Description
Currently_asyncio
is built as a shared library module which causes it to use slower functions calls for getting the thread state whereas if it is built as static module then it can read the current thread state by using faster segment registers directly. This affects both free-threading and normal builds however on free-threading, critical sections heavily use thread state so it has a larger impact.
The function calls to_PyThreadState_GetCurrent
are completely eliminated after the change and it reads thread state directly viafs
register.
Normal build:
Before:
(gdb) disassemble _asyncio__get_running_loopDump of assembler code for function _asyncio__get_running_loop: 0x00007ffff73e1d80 <+0>: push %rax 0x00007ffff73e1d81 <+1>: call 0x7ffff73e1500 <_PyThreadState_GetCurrent@plt> 0x00007ffff73e1d86 <+6>: mov 0x358(%rax),%rax 0x00007ffff73e1d8d <+13>: test %rax,%rax 0x00007ffff73e1d90 <+16>: je 0x7ffff73e1da2 <_asyncio__get_running_loop+34> 0x00007ffff73e1d92 <+18>: mov (%rax),%ecx 0x00007ffff73e1d94 <+20>: cmp $0xbfffffff,%ecx 0x00007ffff73e1d9a <+26>: ja 0x7ffff73e1da0 <_asyncio__get_running_loop+32> 0x00007ffff73e1d9c <+28>: inc %ecx 0x00007ffff73e1d9e <+30>: mov %ecx,(%rax) 0x00007ffff73e1da0 <+32>: pop %rcx 0x00007ffff73e1da1 <+33>: ret 0x00007ffff73e1da2 <+34>: mov 0xb1ef(%rip),%rax # 0x7ffff73ecf98 0x00007ffff73e1da9 <+41>: pop %rcx 0x00007ffff73e1daa <+42>: retEnd of assembler dump.(gdb)
After:
(gdb) disassemble _asyncio__get_running_loopDump of assembler code for function _asyncio__get_running_loop: 0x0000555555853c80 <+0>: mov $0xfffffffffffffff0,%rax 0x0000555555853c87 <+7>: mov %fs:(%rax),%rax 0x0000555555853c8b <+11>: mov 0x358(%rax),%rax 0x0000555555853c92 <+18>: test %rax,%rax 0x0000555555853c95 <+21>: je 0x555555853ca6 <_asyncio__get_running_loop+38> 0x0000555555853c97 <+23>: mov (%rax),%ecx 0x0000555555853c99 <+25>: cmp $0xbfffffff,%ecx 0x0000555555853c9f <+31>: ja 0x555555853ca5 <_asyncio__get_running_loop+37> 0x0000555555853ca1 <+33>: inc %ecx 0x0000555555853ca3 <+35>: mov %ecx,(%rax) 0x0000555555853ca5 <+37>: ret 0x0000555555853ca6 <+38>: lea 0x1f377b(%rip),%rax # 0x555555a47428 <_Py_NoneStruct> 0x0000555555853cad <+45>: retEnd of assembler dump.
free-threading:
Before:
(gdb) disassemble _asyncio_Future_doneDump of assembler code for function _asyncio_Future_done: 0x00007ffff7412c30 <+0>: push %r14 0x00007ffff7412c32 <+2>: push %rbx 0x00007ffff7412c33 <+3>: sub $0x18,%rsp 0x00007ffff7412c37 <+7>: mov %rdi,%rbx 0x00007ffff7412c3a <+10>: lea 0xa(%rdi),%r14 0x00007ffff7412c3e <+14>: mov $0x1,%cl 0x00007ffff7412c40 <+16>: xor %eax,%eax 0x00007ffff7412c42 <+18>: lock cmpxchg %cl,0xa(%rdi) 0x00007ffff7412c47 <+23>: jne 0x7ffff7412c74 <_asyncio_Future_done+68> 0x00007ffff7412c49 <+25>: call 0x7ffff740c540 <_PyThreadState_GetCurrent@plt> 0x00007ffff7412c4e <+30>: mov %r14,0x10(%rsp) 0x00007ffff7412c53 <+35>: mov 0xb0(%rax),%rcx 0x00007ffff7412c5a <+42>: mov %rcx,0x8(%rsp) 0x00007ffff7412c5f <+47>: lea 0x8(%rsp),%rcx 0x00007ffff7412c64 <+52>: mov %rcx,0xb0(%rax) 0x00007ffff7412c6b <+59>: cmpq $0x0,0x20(%rbx) 0x00007ffff7412c70 <+64>: jne 0x7ffff7412c88 <_asyncio_Future_done+88> 0x00007ffff7412c72 <+66>: jmp 0x7ffff7412c8e <_asyncio_Future_done+94> 0x00007ffff7412c74 <+68>: lea 0x8(%rsp),%rdi 0x00007ffff7412c79 <+73>: mov %r14,%rsi 0x00007ffff7412c7c <+76>: call 0x7ffff740c130 <_PyCriticalSection_BeginSlow@plt> 0x00007ffff7412c81 <+81>: cmpq $0x0,0x20(%rbx) 0x00007ffff7412c86 <+86>: je 0x7ffff7412c8e <_asyncio_Future_done+94> 0x00007ffff7412c88 <+88>: cmpl $0x0,0x78(%rbx) 0x00007ffff7412c8c <+92>: jne 0x7ffff7412ca1 <_asyncio_Future_done+113> 0x00007ffff7412c8e <+94>: mov 0x92f3(%rip),%rbx # 0x7ffff741bf88 0x00007ffff7412c95 <+101>: mov 0x10(%rsp),%rdi 0x00007ffff7412c9a <+106>: test %rdi,%rdi 0x00007ffff7412c9d <+109>: jne 0x7ffff7412cb2 <_asyncio_Future_done+130> 0x00007ffff7412c9f <+111>: jmp 0x7ffff7412cdf <_asyncio_Future_done+175> 0x00007ffff7412ca1 <+113>: mov 0x92f8(%rip),%rbx # 0x7ffff741bfa0 0x00007ffff7412ca8 <+120>: mov 0x10(%rsp),%rdi 0x00007ffff7412cad <+125>: test %rdi,%rdi 0x00007ffff7412cb0 <+128>: je 0x7ffff7412cdf <_asyncio_Future_done+175> 0x00007ffff7412cb2 <+130>: xor %ecx,%ecx 0x00007ffff7412cb4 <+132>: mov $0x1,%al 0x00007ffff7412cb6 <+134>: lock cmpxchg %cl,(%rdi) 0x00007ffff7412cba <+138>: je 0x7ffff7412cc1 <_asyncio_Future_done+145> 0x00007ffff7412cbc <+140>: call 0x7ffff740c550 <PyMutex_Unlock@plt> 0x00007ffff7412cc1 <+145>: call 0x7ffff740c540 <_PyThreadState_GetCurrent@plt> 0x00007ffff7412cc6 <+150>: mov 0x8(%rsp),%rcx 0x00007ffff7412ccb <+155>: mov %rcx,0xb0(%rax) 0x00007ffff7412cd2 <+162>: test $0x1,%cl 0x00007ffff7412cd5 <+165>: je 0x7ffff7412cdf <_asyncio_Future_done+175> 0x00007ffff7412cd7 <+167>: mov %rax,%rdi 0x00007ffff7412cda <+170>: call 0x7ffff740c390 <_PyCriticalSection_Resume@plt> 0x00007ffff7412cdf <+175>: mov %rbx,%rax 0x00007ffff7412ce2 <+178>: add $0x18,%rsp 0x00007ffff7412ce6 <+182>: pop %rbx 0x00007ffff7412ce7 <+183>: pop %r14 0x00007ffff7412ce9 <+185>: retEnd of assembler dump.
After:
(gdb) disassemble _asyncio_Future_doneDump of assembler code for function _asyncio_Future_done: 0x0000555555892fc0 <+0>: push %rbx 0x0000555555892fc1 <+1>: sub $0x10,%rsp 0x0000555555892fc5 <+5>: mov %rdi,%rbx 0x0000555555892fc8 <+8>: lea 0xa(%rdi),%rsi 0x0000555555892fcc <+12>: mov $0x1,%cl 0x0000555555892fce <+14>: xor %eax,%eax 0x0000555555892fd0 <+16>: lock cmpxchg %cl,0xa(%rdi) 0x0000555555892fd5 <+21>: jne 0x555555893005 <_asyncio_Future_done+69> 0x0000555555892fd7 <+23>: mov $0xfffffffffffffff0,%rax 0x0000555555892fde <+30>: mov %fs:(%rax),%rax 0x0000555555892fe2 <+34>: mov %rsi,0x8(%rsp) 0x0000555555892fe7 <+39>: mov 0xb0(%rax),%rcx 0x0000555555892fee <+46>: mov %rcx,(%rsp) 0x0000555555892ff2 <+50>: mov %rsp,%rcx 0x0000555555892ff5 <+53>: mov %rcx,0xb0(%rax) 0x0000555555892ffc <+60>: cmpq $0x0,0x20(%rbx) 0x0000555555893001 <+65>: jne 0x555555893014 <_asyncio_Future_done+84> 0x0000555555893003 <+67>: jmp 0x55555589301a <_asyncio_Future_done+90> 0x0000555555893005 <+69>: mov %rsp,%rdi 0x0000555555893008 <+72>: call 0x5555557a4970 <_PyCriticalSection_BeginSlow> 0x000055555589300d <+77>: cmpq $0x0,0x20(%rbx) 0x0000555555893012 <+82>: je 0x55555589301a <_asyncio_Future_done+90> 0x0000555555893014 <+84>: cmpl $0x0,0x78(%rbx) 0x0000555555893018 <+88>: jne 0x555555893034 <_asyncio_Future_done+116> 0x000055555589301a <+90>: lea 0x1e9f57(%rip),%rbx # 0x555555a7cf78 <_Py_FalseStruct> 0x0000555555893021 <+97>: mov 0x8(%rsp),%rdi 0x0000555555893026 <+102>: test %rdi,%rdi 0x0000555555893029 <+105>: jne 0x555555893045 <_asyncio_Future_done+133> 0x000055555589302b <+107>: mov %rbx,%rax 0x000055555589302e <+110>: add $0x10,%rsp 0x0000555555893032 <+114>: pop %rbx 0x0000555555893033 <+115>: ret 0x0000555555893034 <+116>: lea 0x1e9f0d(%rip),%rbx # 0x555555a7cf48 <_Py_TrueStruct> 0x000055555589303b <+123>: mov 0x8(%rsp),%rdi 0x0000555555893040 <+128>: test %rdi,%rdi 0x0000555555893043 <+131>: je 0x55555589302b <_asyncio_Future_done+107> 0x0000555555893045 <+133>: xor %ecx,%ecx 0x0000555555893047 <+135>: mov $0x1,%al 0x0000555555893049 <+137>: lock cmpxchg %cl,(%rdi) 0x000055555589304d <+141>: je 0x555555893054 <_asyncio_Future_done+148> 0x000055555589304f <+143>: call 0x5555557e2ec0 <PyMutex_Unlock> 0x0000555555893054 <+148>: mov (%rsp),%rax 0x0000555555893058 <+152>: mov $0xfffffffffffffff0,%rcx 0x000055555589305f <+159>: mov %fs:(%rcx),%rdi 0x0000555555893063 <+163>: mov %rax,0xb0(%rdi) 0x000055555589306a <+170>: test $0x1,%al 0x000055555589306c <+172>: je 0x55555589302b <_asyncio_Future_done+107> 0x000055555589306e <+174>: call 0x5555557a4ae0 <_PyCriticalSection_Resume> 0x0000555555893073 <+179>: mov %rbx,%rax 0x0000555555893076 <+182>: add $0x10,%rsp 0x000055555589307a <+186>: pop %rbx 0x000055555589307b <+187>: retEnd of assembler dump.