Movatterモバイル変換

android /platform /bionic /refs/heads/main /. /libc /bionic /pthread_mutex.cpp

blob: c99717a3f75123551628f86ad74088d4a811e2ca [file] [log] [blame] [edit]

	/*
	* Copyright (C) 2008 The Android Open Source Project
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
	* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#include<pthread.h>

	#include<errno.h>
	#include<limits.h>
	#include<stdatomic.h>
	#include<stdlib.h>
	#include<string.h>
	#include<sys/cdefs.h>
	#include<sys/mman.h>
	#include<unistd.h>

	#include"pthread_internal.h"

	#include"private/bionic_constants.h"
	#include"private/bionic_fortify.h"
	#include"private/bionic_futex.h"
	#include"private/bionic_systrace.h"
	#include"private/bionic_time_conversions.h"
	#include"private/bionic_tls.h"

	/* a mutex attribute holds the following fields
	*
	* bits: name description
	* 0-3 type type of mutex
	* 4 shared process-shared flag
	* 5 protocol whether it is a priority inherit mutex.
	*/
	#define MUTEXATTR_TYPE_MASK0x000f
	#define MUTEXATTR_SHARED_MASK0x0010
	#define MUTEXATTR_PROTOCOL_MASK0x0020

	#define MUTEXATTR_PROTOCOL_SHIFT5

	int pthread_mutexattr_init(pthread_mutexattr_t*attr)
	{
	*attr= PTHREAD_MUTEX_DEFAULT;
	return0;
	}

	int pthread_mutexattr_destroy(pthread_mutexattr_t*attr)
	{
	*attr=-1;
	return0;
	}

	int pthread_mutexattr_gettype(constpthread_mutexattr_tattr,inttype_p)
	{
	int type=(*attr& MUTEXATTR_TYPE_MASK);

	if(type< PTHREAD_MUTEX_NORMAL\|\| type> PTHREAD_MUTEX_ERRORCHECK){
	return EINVAL;
	}

	*type_p= type;
	return0;
	}

	int pthread_mutexattr_settype(pthread_mutexattr_t*attr,int type)
	{
	if(type< PTHREAD_MUTEX_NORMAL\|\| type> PTHREAD_MUTEX_ERRORCHECK){
	return EINVAL;
	}

	attr=(attr&~MUTEXATTR_TYPE_MASK)\| type;
	return0;
	}

	/* process-shared mutexes are not supported at the moment */

	int pthread_mutexattr_setpshared(pthread_mutexattr_t*attr,int pshared)
	{
	switch(pshared){
	case PTHREAD_PROCESS_PRIVATE:
	*attr&=~MUTEXATTR_SHARED_MASK;
	return0;

	case PTHREAD_PROCESS_SHARED:
	/* our current implementation of pthread actually supports shared
	* mutexes but won't cleanup if a process dies with the mutex held.
	* Nevertheless, it's better than nothing. Shared mutexes are used
	* by surfaceflinger and audioflinger.
	*/
	*attr\|= MUTEXATTR_SHARED_MASK;
	return0;
	}
	return EINVAL;
	}

	int pthread_mutexattr_getpshared(constpthread_mutexattr_t* attr,int* pshared){
	pshared=(attr& MUTEXATTR_SHARED_MASK)? PTHREAD_PROCESS_SHARED: PTHREAD_PROCESS_PRIVATE;
	return0;
	}

	int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr,int protocol){
	if(protocol!= PTHREAD_PRIO_NONE&& protocol!= PTHREAD_PRIO_INHERIT){
	return EINVAL;
	}
	attr=(attr&~MUTEXATTR_PROTOCOL_MASK)\|(protocol<< MUTEXATTR_PROTOCOL_SHIFT);
	return0;
	}

	int pthread_mutexattr_getprotocol(constpthread_mutexattr_t* attr,int* protocol){
	protocol=(attr& MUTEXATTR_PROTOCOL_MASK)>> MUTEXATTR_PROTOCOL_SHIFT;
	return0;
	}

	// Priority Inheritance mutex implementation
	structPIMutex{
	// mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime
	uint8_t type;
	// process-shared flag, constant during lifetime
	bool shared;
	// <number of times a thread holding a recursive PI mutex> - 1
	uint16_t counter;
	// owner_tid is read/written by both userspace code and kernel code. It includes three fields:
	// FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK.
	atomic_int owner_tid;
	};

	staticinline __always_inlineintPIMutexTryLock(PIMutex& mutex){
	pid_t tid= __get_thread()->tid;
	// Handle common case first.
	int old_owner=0;
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
	&old_owner, tid,
	memory_order_acquire,
	memory_order_relaxed))){
	return0;
	}
	if(tid==(old_owner& FUTEX_TID_MASK)){
	// We already own this mutex.
	if(mutex.type== PTHREAD_MUTEX_NORMAL){
	return EBUSY;
	}
	if(mutex.type== PTHREAD_MUTEX_ERRORCHECK){
	return EDEADLK;
	}
	if(mutex.counter==0xffff){
	return EAGAIN;
	}
	mutex.counter++;
	return0;
	}
	return EBUSY;
	}

	// Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
	// ARM/ARM64, which increases at most 20 percent overhead. So make it noinline.
	staticint __attribute__((noinline))PIMutexTimedLock(PIMutex& mutex,
	bool use_realtime_clock,
	const timespec* abs_timeout){
	int ret=PIMutexTryLock(mutex);
	if(__predict_true(ret==0)){
	return0;
	}
	if(ret== EBUSY){
	char trace_msg[64];
	constpid_t owner= atomic_load_explicit(&mutex.owner_tid, memory_order_relaxed)
	& FUTEX_TID_MASK;
	snprintf(trace_msg,sizeof(trace_msg),
	"Contending for pthread mutex owned by tid: %d", owner);
	ScopedTrace trace(trace_msg);
	ret=-__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout);
	}
	return ret;
	}

	staticintPIMutexUnlock(PIMutex& mutex){
	pid_t tid= __get_thread()->tid;
	int old_owner= tid;
	// Handle common case first.
	if(__predict_true(mutex.type== PTHREAD_MUTEX_NORMAL)){
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
	&old_owner,0,
	memory_order_release,
	memory_order_relaxed))){
	return0;
	}
	}else{
	old_owner= atomic_load_explicit(&mutex.owner_tid, memory_order_relaxed);
	}

	if(tid!=(old_owner& FUTEX_TID_MASK)){
	// The mutex can only be unlocked by the thread who owns it.
	return EPERM;
	}
	if(mutex.type== PTHREAD_MUTEX_RECURSIVE){
	if(mutex.counter!=0u){
	--mutex.counter;
	return0;
	}
	}
	if(old_owner== tid){
	// No thread is waiting.
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
	&old_owner,0,
	memory_order_release,
	memory_order_relaxed))){
	return0;
	}
	}
	return-__futex_pi_unlock(&mutex.owner_tid, mutex.shared);
	}

	staticintPIMutexDestroy(PIMutex& mutex){
	// The mutex should be in unlocked state (owner_tid == 0) when destroyed.
	// Store 0xffffffff to make the mutex unusable.
	int old_owner=0;
	if(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,&old_owner,0xffffffff,
	memory_order_relaxed, memory_order_relaxed)){
	return0;
	}
	return EBUSY;
	}

	#if !defined(__LP64__)

	namespacePIMutexAllocator{
	// pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex.
	// So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find
	// the allocated PIMutexes. This allows at most 65536 PI mutexes.
	// When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the
	// corresponding PIMutex. To make the map operation fast, we use a lockless mapping method:
	// Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until
	// it is destroyed.
	// Below are the data structures:
	// // struct Node contains a PIMutex.
	// typedef Node NodeArray[256];
	// typedef NodeArray* NodeArrayP;
	// NodeArrayP nodes[256];
	//
	// A 16-bit index is mapped to Node as below:
	// (*nodes[index >> 8])[index & 0xff]
	//
	// Also use a free list to allow O(1) finding recycled PIMutexes.

	unionNode{
	PIMutex mutex;
	int next_free_id;// If not -1, refer to the next node in the free PIMutex list.
	};
	typedefNodeNodeArray[256];
	typedefNodeArray*NodeArrayP;

	// lock_ protects below items.
	staticLock lock;
	staticNodeArrayP* nodes;
	staticint next_to_alloc_id;
	staticint first_free_id=-1;// If not -1, refer to the first node in the free PIMutex list.

	staticinline __always_inlineNode&IdToNode(int id){
	return(*nodes[id>>8])[id&0xff];
	}

	staticinline __always_inlinePIMutex&IdToPIMutex(int id){
	returnIdToNode(id).mutex;
	}

	staticintAllocIdLocked(){
	if(first_free_id!=-1){
	int result= first_free_id;
	first_free_id=IdToNode(result).next_free_id;
	return result;
	}
	if(next_to_alloc_id>=0x10000){
	return-1;
	}
	int array_pos= next_to_alloc_id>>8;
	int node_pos= next_to_alloc_id&0xff;
	if(node_pos==0){
	if(array_pos==0){
	nodes=static_cast<NodeArray*>(calloc(256,sizeof(NodeArray)));
	if(nodes==nullptr){
	return-1;
	}
	}
	nodes[array_pos]=static_cast<NodeArray*>(malloc(sizeof(NodeArray)));
	if(nodes[array_pos]==nullptr){
	return-1;
	}
	}
	return next_to_alloc_id++;
	}

	// If succeed, return an id referring to a PIMutex, otherwise return -1.
	// A valid id is in range [0, 0xffff].
	staticintAllocId(){
	lock.lock();
	int result=AllocIdLocked();
	lock.unlock();
	if(result!=-1){
	memset(&IdToPIMutex(result),0,sizeof(PIMutex));
	}
	return result;
	}

	staticvoidFreeId(int id){
	lock.lock();
	IdToNode(id).next_free_id= first_free_id;
	first_free_id= id;
	lock.unlock();
	}

	}// namespace PIMutexAllocator

	#endif// !defined(__LP64__)


	/* Convenience macro, creates a mask of 'bits' bits that starts from
	* the 'shift'-th least significant bit in a 32-bit word.
	*
	* Examples: FIELD_MASK(0,4) -> 0xf
	* FIELD_MASK(16,9) -> 0x1ff0000
	*/
	#define FIELD_MASK(shift,bits)(((1<<(bits))-1)<<(shift))

	/* This one is used to create a bit pattern from a given field value */
	#define FIELD_TO_BITS(val,shift,bits)(((val)&((1<<(bits))-1))<<(shift))

	/* And this one does the opposite, i.e. extract a field's value from a bit pattern */
	#define FIELD_FROM_BITS(val,shift,bits)(((val)>>(shift))&((1<<(bits))-1))

	/* Convenience macros.
	*
	* These are used to form or modify the bit pattern of a given mutex value
	*/

	/* Mutex state:
	*
	* 0 for unlocked
	* 1 for locked, no waiters
	* 2 for locked, maybe waiters
	*/
	#define MUTEX_STATE_SHIFT0
	#define MUTEX_STATE_LEN2

	#define MUTEX_STATE_MASK FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
	#define MUTEX_STATE_FROM_BITS(v) FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
	#define MUTEX_STATE_TO_BITS(v) FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)

	#define MUTEX_STATE_UNLOCKED0/* must be 0 to match PTHREAD_MUTEX_INITIALIZER */
	#define MUTEX_STATE_LOCKED_UNCONTENDED1/* must be 1 due to atomic dec in unlock operation */
	#define MUTEX_STATE_LOCKED_CONTENDED2/* must be 1 + LOCKED_UNCONTENDED due to atomic dec */

	#define MUTEX_STATE_BITS_UNLOCKED MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
	#define MUTEX_STATE_BITS_LOCKED_UNCONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
	#define MUTEX_STATE_BITS_LOCKED_CONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)

	// Return true iff the mutex is unlocked.
	#define MUTEX_STATE_BITS_IS_UNLOCKED(v)(((v)& MUTEX_STATE_MASK)== MUTEX_STATE_BITS_UNLOCKED)

	// Return true iff the mutex is locked with no waiters.
	#define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)(((v)& MUTEX_STATE_MASK)== MUTEX_STATE_BITS_LOCKED_UNCONTENDED)

	// return true iff the mutex is locked with maybe waiters.
	#define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)(((v)& MUTEX_STATE_MASK)== MUTEX_STATE_BITS_LOCKED_CONTENDED)

	/* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
	#define MUTEX_STATE_BITS_FLIP_CONTENTION(v)((v)^(MUTEX_STATE_BITS_LOCKED_CONTENDED^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))

	/* Mutex counter:
	*
	* We need to check for overflow before incrementing, and we also need to
	* detect when the counter is 0
	*/
	#define MUTEX_COUNTER_SHIFT2
	#define MUTEX_COUNTER_LEN11
	#define MUTEX_COUNTER_MASK FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)

	#define MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)(((v)& MUTEX_COUNTER_MASK)== MUTEX_COUNTER_MASK)
	#define MUTEX_COUNTER_BITS_IS_ZERO(v)(((v)& MUTEX_COUNTER_MASK)==0)

	/* Used to increment the counter directly after overflow has been checked */
	#define MUTEX_COUNTER_BITS_ONE FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)

	/* Mutex shared bit flag
	*
	* This flag is set to indicate that the mutex is shared among processes.
	* This changes the futex opcode we use for futex wait/wake operations
	* (non-shared operations are much faster).
	*/
	#define MUTEX_SHARED_SHIFT13
	#define MUTEX_SHARED_MASK FIELD_MASK(MUTEX_SHARED_SHIFT,1)

	/* Mutex type:
	* We support normal, recursive and errorcheck mutexes.
	*/
	#define MUTEX_TYPE_SHIFT14
	#define MUTEX_TYPE_LEN2
	#define MUTEX_TYPE_MASK FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)

	#define MUTEX_TYPE_TO_BITS(t) FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)

	#define MUTEX_TYPE_BITS_NORMAL MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
	#define MUTEX_TYPE_BITS_RECURSIVE MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
	#define MUTEX_TYPE_BITS_ERRORCHECK MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
	// Use a special mutex type to mark priority inheritance mutexes.
	#define PI_MUTEX_STATE MUTEX_TYPE_TO_BITS(3)

	// For a PI mutex, it includes below fields:
	// Atomic(uint16_t) state;
	// PIMutex pi_mutex; // uint16_t pi_mutex_id in 32-bit programs
	//
	// state holds the following fields:
	//
	// bits: name description
	// 15-14 type mutex type, should be 3
	// 13-0 padding should be 0
	//
	// pi_mutex holds the state of a PI mutex.
	// pi_mutex_id holds an integer to find the state of a PI mutex.
	//
	// For a Non-PI mutex, it includes below fields:
	// Atomic(uint16_t) state;
	// atomic_int owner_tid; // Atomic(uint16_t) in 32-bit programs
	//
	// state holds the following fields:
	//
	// bits: name description
	// 15-14 type mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck)
	// 13 shared process-shared flag
	// 12-2 counter <number of times a thread holding a recursive Non-PI mutex> - 1
	// 1-0 state lock state (0, 1 or 2)
	//
	// bits 15-13 are constant during the lifetime of the mutex.
	//
	// owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner
	// thread id.
	//
	// PI mutexes and Non-PI mutexes are distinguished by checking type field in state.
	#if defined(__LP64__)
	structpthread_mutex_internal_t{
	_Atomic(uint16_t) state;
	uint16_t __pad;
	union{
	atomic_int owner_tid;
	PIMutex pi_mutex;
	};
	char __reserved[28];

	PIMutex&ToPIMutex(){
	return pi_mutex;
	}

	voidFreePIMutex(){
	}
	} __attribute__((aligned(4)));

	#else
	structpthread_mutex_internal_t{
	_Atomic(uint16_t) state;
	union{
	_Atomic(uint16_t) owner_tid;
	uint16_t pi_mutex_id;
	};

	PIMutex&ToPIMutex(){
	returnPIMutexAllocator::IdToPIMutex(pi_mutex_id);
	}

	voidFreePIMutex(){
	PIMutexAllocator::FreeId(pi_mutex_id);
	}
	} __attribute__((aligned(4)));
	#endif

	static_assert(sizeof(pthread_mutex_t)==sizeof(pthread_mutex_internal_t),
	"pthread_mutex_t should actually be pthread_mutex_internal_t in implementation.");

	// For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment
	// than 4-byte alignment.
	static_assert(alignof(pthread_mutex_t)==4,
	"pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t.");

	staticinlinepthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface){
	returnreinterpret_cast<pthread_mutex_internal_t*>(mutex_interface);
	}

	int pthread_mutex_init(pthread_mutex_t* mutex_interface,constpthread_mutexattr_t* attr){
	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);

	memset(mutex,0,sizeof(pthread_mutex_internal_t));

	if(__predict_true(attr==nullptr)){
	atomic_store_explicit(&mutex->state, MUTEX_TYPE_BITS_NORMAL, memory_order_relaxed);
	return0;
	}

	uint16_t state=0;
	if((*attr& MUTEXATTR_SHARED_MASK)!=0){
	state\|= MUTEX_SHARED_MASK;
	}

	switch(*attr& MUTEXATTR_TYPE_MASK){
	case PTHREAD_MUTEX_NORMAL:
	state\|= MUTEX_TYPE_BITS_NORMAL;
	break;
	case PTHREAD_MUTEX_RECURSIVE:
	state\|= MUTEX_TYPE_BITS_RECURSIVE;
	break;
	case PTHREAD_MUTEX_ERRORCHECK:
	state\|= MUTEX_TYPE_BITS_ERRORCHECK;
	break;
	default:
	return EINVAL;
	}

	if(((*attr& MUTEXATTR_PROTOCOL_MASK)>> MUTEXATTR_PROTOCOL_SHIFT)== PTHREAD_PRIO_INHERIT){
	#if !defined(__LP64__)
	if(state& MUTEX_SHARED_MASK){
	return EINVAL;
	}
	int id=PIMutexAllocator::AllocId();
	if(id==-1){
	return ENOMEM;
	}
	mutex->pi_mutex_id= id;
	#endif
	atomic_store_explicit(&mutex->state, PI_MUTEX_STATE, memory_order_relaxed);
	PIMutex& pi_mutex= mutex->ToPIMutex();
	pi_mutex.type=*attr& MUTEXATTR_TYPE_MASK;
	pi_mutex.shared=(*attr& MUTEXATTR_SHARED_MASK)!=0;
	}else{
	atomic_store_explicit(&mutex->state, state, memory_order_relaxed);
	atomic_store_explicit(&mutex->owner_tid,0, memory_order_relaxed);
	}
	return0;
	}

	// namespace for Non-PI mutex routines.
	namespaceNonPI{

	staticinline __always_inlineintNormalMutexTryLock(pthread_mutex_internal_t* mutex,
	uint16_t shared){
	constuint16_t unlocked= shared\| MUTEX_STATE_BITS_UNLOCKED;
	constuint16_t locked_uncontended= shared\| MUTEX_STATE_BITS_LOCKED_UNCONTENDED;

	uint16_t old_state= unlocked;
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state,&old_state,
	locked_uncontended, memory_order_acquire, memory_order_relaxed))){
	return0;
	}
	return EBUSY;
	}

	/*
	* Lock a normal Non-PI mutex.
	*
	* As noted above, there are three states:
	* 0 (unlocked, no contention)
	* 1 (locked, no contention)
	* 2 (locked, contention)
	*
	* Non-recursive mutexes don't use the thread-id or counter fields, and the
	* "type" value is zero, so the only bits that will be set are the ones in
	* the lock state field.
	*/
	staticinline __always_inlineintNormalMutexLock(pthread_mutex_internal_t* mutex,
	uint16_t shared,
	bool use_realtime_clock,
	const timespec* abs_timeout_or_null){
	if(__predict_true(NormalMutexTryLock(mutex, shared)==0)){
	return0;
	}
	int result= check_timespec(abs_timeout_or_null,true);
	if(result!=0){
	return result;
	}

	ScopedTrace trace("Contending for pthread mutex");

	constuint16_t unlocked= shared\| MUTEX_STATE_BITS_UNLOCKED;
	constuint16_t locked_contended= shared\| MUTEX_STATE_BITS_LOCKED_CONTENDED;

	// We want to go to sleep until the mutex is available, which requires
	// promoting it to locked_contended. We need to swap in the new state
	// and then wait until somebody wakes us up.
	// An atomic_exchange is used to compete with other threads for the lock.
	// If it returns unlocked, we have acquired the lock, otherwise another
	// thread still holds the lock and we should wait again.
	// If lock is acquired, an acquire fence is needed to make all memory accesses
	// made by other threads visible to the current CPU.
	while(atomic_exchange_explicit(&mutex->state, locked_contended,
	memory_order_acquire)!= unlocked){
	if(__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock,
	abs_timeout_or_null)==-ETIMEDOUT){
	return ETIMEDOUT;
	}
	}
	return0;
	}

	/*
	* Release a normal Non-PI mutex. The caller is responsible for determining
	* that we are in fact the owner of this lock.
	*/
	staticinline __always_inlinevoidNormalMutexUnlock(pthread_mutex_internal_t* mutex,
	uint16_t shared){
	constuint16_t unlocked= shared\| MUTEX_STATE_BITS_UNLOCKED;
	constuint16_t locked_contended= shared\| MUTEX_STATE_BITS_LOCKED_CONTENDED;

	// We use an atomic_exchange to release the lock. If locked_contended state
	// is returned, some threads is waiting for the lock and we need to wake up
	// one of them.
	// A release fence is required to make previous stores visible to next
	// lock owner threads.
	if(atomic_exchange_explicit(&mutex->state, unlocked,
	memory_order_release)== locked_contended){
	// Wake up one waiting thread. We don't know which thread will be
	// woken or when it'll start executing -- futexes make no guarantees
	// here. There may not even be a thread waiting.
	//
	// The newly-woken thread will replace the unlocked state we just set above
	// with locked_contended state, which means that when it eventually releases
	// the mutex it will also call FUTEX_WAKE. This results in one extra wake
	// call whenever a lock is contended, but let us avoid forgetting anyone
	// without requiring us to track the number of sleepers.
	//
	// It's possible for another thread to sneak in and grab the lock between
	// the exchange above and the wake call below. If the new thread is "slow"
	// and holds the lock for a while, we'll wake up a sleeper, which will swap
	// in locked_uncontended state and then go back to sleep since the lock is
	// still held. If the new thread is "fast", running to completion before
	// we call wake, the thread we eventually wake will find an unlocked mutex
	// and will execute. Either way we have correct behavior and nobody is
	// orphaned on the wait queue.
	//
	// The pthread_mutex_internal_t object may have been deallocated between the
	// atomic exchange and the wake call. In that case, this wake call could
	// target unmapped memory or memory used by an otherwise unrelated futex
	// operation. Even if the kernel avoids spurious futex wakeups from its
	// point of view, this wake call could trigger a spurious wakeup in any
	// futex accessible from this process. References:
	// - https://lkml.org/lkml/2014/11/27/472
	// - http://austingroupbugs.net/view.php?id=811#c2267
	__futex_wake_ex(&mutex->state, shared,1);
	}
	}

	/* This common inlined function is used to increment the counter of a recursive Non-PI mutex.
	*
	* If the counter overflows, it will return EAGAIN.
	* Otherwise, it atomically increments the counter and returns 0.
	*
	*/
	staticinline __always_inlineintRecursiveIncrement(pthread_mutex_internal_t* mutex,
	uint16_t old_state){
	// Detect recursive lock overflow and return EAGAIN.
	// This is safe because only the owner thread can modify the
	// counter bits in the mutex value.
	if(MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)){
	return EAGAIN;
	}

	// Other threads are able to change the lower bits (e.g. promoting it to "contended"),
	// but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
	// The mutex is already locked by current thread, so we don't need an acquire fence.
	atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
	return0;
	}

	// Wait on a recursive or errorcheck Non-PI mutex.
	staticinline __always_inlineintRecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex,
	uint16_t shared,
	uint16_t old_state,
	bool use_realtime_clock,
	const timespec* abs_timeout){
	// __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
	// field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
	// But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
	// owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error.

	#if defined(__LP64__)
	return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout);

	#else
	// This implementation works only when the layout of pthread_mutex_internal_t matches below expectation.
	// And it is based on the assumption that Android is always in little-endian devices.
	static_assert(offsetof(pthread_mutex_internal_t, state)==0,"");
	static_assert(offsetof(pthread_mutex_internal_t, owner_tid)==2,"");

	uint32_t owner_tid= atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed);
	return __futex_wait_ex(&mutex->state, shared,(owner_tid<<16)\| old_state,
	use_realtime_clock, abs_timeout);
	#endif
	}

	// Lock a Non-PI mutex.
	staticintMutexLockWithTimeout(pthread_mutex_internal_t* mutex,bool use_realtime_clock,
	const timespec* abs_timeout_or_null){
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	uint16_t mtype=(old_state& MUTEX_TYPE_MASK);
	uint16_t shared=(old_state& MUTEX_SHARED_MASK);

	// Handle common case first.
	if( __predict_true(mtype== MUTEX_TYPE_BITS_NORMAL)){
	returnNormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
	}

	// Do we already own this recursive or error-check mutex?
	pid_t tid= __get_thread()->tid;
	if(tid== atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)){
	if(mtype== MUTEX_TYPE_BITS_ERRORCHECK){
	return EDEADLK;
	}
	returnRecursiveIncrement(mutex, old_state);
	}

	constuint16_t unlocked= mtype\| shared\| MUTEX_STATE_BITS_UNLOCKED;
	constuint16_t locked_uncontended= mtype\| shared\| MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
	constuint16_t locked_contended= mtype\| shared\| MUTEX_STATE_BITS_LOCKED_CONTENDED;

	// First, if the mutex is unlocked, try to quickly acquire it.
	// In the optimistic case where this works, set the state to locked_uncontended.
	if(old_state== unlocked){
	// If exchanged successfully, an acquire fence is required to make
	// all memory accesses made by other threads visible to the current CPU.
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state,&old_state,
	locked_uncontended, memory_order_acquire, memory_order_relaxed))){
	atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
	return0;
	}
	}

	ScopedTrace trace("Contending for pthread mutex");

	while(true){
	if(old_state== unlocked){
	// NOTE: We put the state to locked_contended since we _know_ there
	// is contention when we are in this loop. This ensures all waiters
	// will be unlocked.

	// If exchanged successfully, an acquire fence is required to make
	// all memory accesses made by other threads visible to the current CPU.
	if(__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state,
	&old_state, locked_contended,
	memory_order_acquire,
	memory_order_relaxed))){
	atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
	return0;
	}
	continue;
	}elseif(MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)){
	// We should set it to locked_contended beforing going to sleep. This can make
	// sure waiters will be woken up eventually.

	int new_state= MUTEX_STATE_BITS_FLIP_CONTENTION(old_state);
	if(__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state,
	&old_state, new_state,
	memory_order_relaxed,
	memory_order_relaxed))){
	continue;
	}
	old_state= new_state;
	}

	int result= check_timespec(abs_timeout_or_null,true);
	if(result!=0){
	return result;
	}
	// We are in locked_contended state, sleep until someone wakes us up.
	if(RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock,
	abs_timeout_or_null)==-ETIMEDOUT){
	return ETIMEDOUT;
	}
	old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	}
	}

	}// namespace NonPI

	staticinline __always_inlineboolIsMutexDestroyed(uint16_t mutex_state){
	return mutex_state==0xffff;
	}

	// Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
	// ARM64. So make it noinline.
	staticint __attribute__((noinline))HandleUsingDestroyedMutex(pthread_mutex_t* mutex,
	constchar* function_name){
	if(android_get_application_target_sdk_version()>=28){
	__fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex);
	}
	return EBUSY;
	}

	int pthread_mutex_lock(pthread_mutex_t* mutex_interface){
	#if !defined(__LP64__)
	// Some apps depend on being able to pass NULL as a mutex and get EINVAL
	// back. Don't need to worry about it for LP64 since the ABI is brand new,
	// but keep compatibility for LP32. http://b/19995172.
	if(mutex_interface==nullptr){
	return EINVAL;
	}
	#endif

	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	uint16_t mtype=(old_state& MUTEX_TYPE_MASK);
	// Avoid slowing down fast path of normal mutex lock operation.
	if(__predict_true(mtype== MUTEX_TYPE_BITS_NORMAL)){
	uint16_t shared=(old_state& MUTEX_SHARED_MASK);
	if(__predict_true(NonPI::NormalMutexTryLock(mutex, shared)==0)){
	return0;
	}
	}
	if(old_state== PI_MUTEX_STATE){
	PIMutex& m= mutex->ToPIMutex();
	// Handle common case first.
	if(__predict_true(PIMutexTryLock(m)==0)){
	return0;
	}
	returnPIMutexTimedLock(mutex->ToPIMutex(),false,nullptr);
	}
	if(__predict_false(IsMutexDestroyed(old_state))){
	returnHandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
	}
	returnNonPI::MutexLockWithTimeout(mutex,false,nullptr);
	}

	int pthread_mutex_unlock(pthread_mutex_t* mutex_interface){
	#if !defined(__LP64__)
	// Some apps depend on being able to pass NULL as a mutex and get EINVAL
	// back. Don't need to worry about it for LP64 since the ABI is brand new,
	// but keep compatibility for LP32. http://b/19995172.
	if(mutex_interface==nullptr){
	return EINVAL;
	}
	#endif

	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	uint16_t mtype=(old_state& MUTEX_TYPE_MASK);
	uint16_t shared=(old_state& MUTEX_SHARED_MASK);

	// Handle common case first.
	if(__predict_true(mtype== MUTEX_TYPE_BITS_NORMAL)){
	NonPI::NormalMutexUnlock(mutex, shared);
	return0;
	}
	if(old_state== PI_MUTEX_STATE){
	returnPIMutexUnlock(mutex->ToPIMutex());
	}
	if(__predict_false(IsMutexDestroyed(old_state))){
	returnHandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
	}

	// Do we already own this recursive or error-check mutex?
	pid_t tid= __get_thread()->tid;
	if( tid!= atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)){
	return EPERM;
	}

	// If the counter is > 0, we can simply decrement it atomically.
	// Since other threads can mutate the lower state bits (and only the
	// lower state bits), use a compare_exchange loop to do it.
	if(!MUTEX_COUNTER_BITS_IS_ZERO(old_state)){
	// We still own the mutex, so a release fence is not needed.
	atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
	return0;
	}

	// The counter is 0, so we'are going to unlock the mutex by resetting its
	// state to unlocked, we need to perform a atomic_exchange inorder to read
	// the current state, which will be locked_contended if there may have waiters
	// to awake.
	// A release fence is required to make previous stores visible to next
	// lock owner threads.
	atomic_store_explicit(&mutex->owner_tid,0, memory_order_relaxed);
	constuint16_t unlocked= mtype\| shared\| MUTEX_STATE_BITS_UNLOCKED;
	old_state= atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release);
	if(MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)){
	__futex_wake_ex(&mutex->state, shared,1);
	}

	return0;
	}

	int pthread_mutex_trylock(pthread_mutex_t* mutex_interface){
	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);

	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	uint16_t mtype=(old_state& MUTEX_TYPE_MASK);

	// Handle common case first.
	if(__predict_true(mtype== MUTEX_TYPE_BITS_NORMAL)){
	uint16_t shared=(old_state& MUTEX_SHARED_MASK);
	returnNonPI::NormalMutexTryLock(mutex, shared);
	}
	if(old_state== PI_MUTEX_STATE){
	returnPIMutexTryLock(mutex->ToPIMutex());
	}
	if(__predict_false(IsMutexDestroyed(old_state))){
	returnHandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
	}

	// Do we already own this recursive or error-check mutex?
	pid_t tid= __get_thread()->tid;
	if(tid== atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)){
	if(mtype== MUTEX_TYPE_BITS_ERRORCHECK){
	return EBUSY;
	}
	returnNonPI::RecursiveIncrement(mutex, old_state);
	}

	uint16_t shared=(old_state& MUTEX_SHARED_MASK);
	constuint16_t unlocked= mtype\| shared\| MUTEX_STATE_BITS_UNLOCKED;
	constuint16_t locked_uncontended= mtype\| shared\| MUTEX_STATE_BITS_LOCKED_UNCONTENDED;

	// Same as pthread_mutex_lock, except that we don't want to wait, and
	// the only operation that can succeed is a single compare_exchange to acquire the
	// lock if it is released / not owned by anyone. No need for a complex loop.
	// If exchanged successfully, an acquire fence is required to make
	// all memory accesses made by other threads visible to the current CPU.
	old_state= unlocked;
	if(__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state,&old_state,
	locked_uncontended,
	memory_order_acquire,
	memory_order_relaxed))){
	atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
	return0;
	}
	return EBUSY;
	}

	#if !defined(__LP64__)
	// This exists only for backward binary compatibility on 32 bit platforms.
	// (This function never existed for LP64.)
	extern"C"int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface,unsigned ms){
	timespec ts;
	timespec_from_ms(ts, ms);
	timespec abs_timeout;
	absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC);
	int error=NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface),false,
	&abs_timeout);
	if(error== ETIMEDOUT){
	error= EBUSY;
	}
	return error;
	}
	#endif

	staticint __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface,bool use_realtime_clock,
	const timespec* abs_timeout,constchar* function){
	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	uint16_t mtype=(old_state& MUTEX_TYPE_MASK);
	// Handle common case first.
	if(__predict_true(mtype== MUTEX_TYPE_BITS_NORMAL)){
	uint16_t shared=(old_state& MUTEX_SHARED_MASK);
	if(__predict_true(NonPI::NormalMutexTryLock(mutex, shared)==0)){
	return0;
	}
	}
	if(old_state== PI_MUTEX_STATE){
	returnPIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout);
	}
	if(__predict_false(IsMutexDestroyed(old_state))){
	returnHandleUsingDestroyedMutex(mutex_interface, function);
	}
	returnNonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout);
	}

	int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface,conststruct timespec* abs_timeout){
	return __pthread_mutex_timedlock(mutex_interface,true, abs_timeout, __FUNCTION__);
	}

	int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface,
	conststruct timespec* abs_timeout){
	return __pthread_mutex_timedlock(mutex_interface,false, abs_timeout, __FUNCTION__);
	}

	int pthread_mutex_clocklock(pthread_mutex_t* mutex_interface,clockid_t clock,
	conststruct timespec* abs_timeout){
	switch(clock){
	case CLOCK_MONOTONIC:
	return __pthread_mutex_timedlock(mutex_interface,false, abs_timeout, __FUNCTION__);
	case CLOCK_REALTIME:
	return __pthread_mutex_timedlock(mutex_interface,true, abs_timeout, __FUNCTION__);
	default:{
	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	if(IsMutexDestroyed(old_state)){
	returnHandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
	}
	return EINVAL;
	}
	}
	}

	int pthread_mutex_destroy(pthread_mutex_t* mutex_interface){
	pthread_mutex_internal_t* mutex= __get_internal_mutex(mutex_interface);
	uint16_t old_state= atomic_load_explicit(&mutex->state, memory_order_relaxed);
	if(__predict_false(IsMutexDestroyed(old_state))){
	returnHandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
	}
	if(old_state== PI_MUTEX_STATE){
	int result=PIMutexDestroy(mutex->ToPIMutex());
	if(result==0){
	mutex->FreePIMutex();
	atomic_store(&mutex->state,0xffff);
	}
	return result;
	}
	// Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined
	// behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation.
	if(MUTEX_STATE_BITS_IS_UNLOCKED(old_state)&&
	atomic_compare_exchange_strong_explicit(&mutex->state,&old_state,0xffff,
	memory_order_relaxed, memory_order_relaxed)){
	return0;
	}
	return EBUSY;
	}