Document number: N2406=07-0266

Howard E. Hinnant
2007-09-09

Mutex, Lock, Condition Variable Rationale

Contents

Introduction

This paper adds rationale for the design decisions made for mutexes, locks and condition variables. The TR2-targeted shared mutexes and locks are included here only to help show how the complete package works together. Namespace tr2 is used to indicate the targeting intent.

Threads, thread pools and futures are not addressed in this paper.

Unique Ownership Mutexes and Locks

Below is the proposed synopsis of the header <mutex>. Below the synopsis an informal description and rationale is given for each of the components and design decisions.

<mutex> Synopsis

namespace std {

// A basic mutex
class mutex
{
public:
    mutex();
    ~mutex();

    mutex(const mutex&) = delete;
    mutex& operator=(const mutex&) = delete;

    void lock();
    bool try_lock();
    void unlock();

    typedef unspecified native_handle_type;  // conditionally present.  example: pthread_mutex_t*
    native_handle_type native_handle();      // conditionally present
};

// Add recursive functionality to the basic mutex
class recursive_mutex
{
public:
    recursive_mutex();
    ~recursive_mutex();

    recursive_mutex(const recursive_mutex&) = delete;
    recursive_mutex& operator=(const recursive_mutex&) = delete;

    void lock();
    bool try_lock();
    void unlock();

    typedef unspecified native_handle_type;  // conditionally present.  example: pthread_mutex_t*
    native_handle_type native_handle();      // conditionally present
};

// Add timed lock functionality to the basic mutex
class timed_mutex
{
public:
    timed_mutex();
    ~timed_mutex();

    timed_mutex(const timed_mutex&) = delete;
    timed_mutex& operator=(const timed_mutex&) = delete;

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

    typedef unspecified native_handle_type;  // conditionally present.  example: pthread_mutex_t*
    native_handle_type native_handle();      // conditionally present
};

// Add timed lock functionality to the recursive mutex
class recursive_timed_mutex
{
public:
    recursive_timed_mutex();
    ~recursive_timed_mutex();

    recursive_timed_mutex(const recursive_timed_mutex&) = delete;
    recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

    typedef unspecified native_handle_type;  // conditionally present.  example: pthread_mutex_t*
    native_handle_type native_handle();      // conditionally present
};

// An exception type for lock related errors
class lock_error
    : public exception
{
public:
    virtual const char* what() const throw();
};

// Tag types for lock construction options
struct do_not_lock_type    {};
struct try_to_lock_type    {};
struct already_locked_type {};

extern do_not_lock_type    do_not_lock;
extern try_to_lock_type    try_to_lock;
extern already_locked_type already_locked;


// RAII wrapper for locking and unlocking a mutex within a scope.
// It always references a mutex with the lock owned.
template <class Mutex>
class scoped_lock
{
public:
    typedef Mutex mutex_type;

    explicit scoped_lock(mutex_type& m);
    scoped_lock(mutex_type& m, already_locked_type);
    ~scoped_lock();

    scoped_lock(scoped_lock const&) = delete;
    scoped_lock& operator=(scoped_lock const&) = delete;
};

// Forward declaration of proposed TR2 locks
namespace tr2 {

template <class Mutex> class shared_lock;
template <class Mutex> class upgrade_lock;

} // tr2

// A movable (multi-scope) RAII wrapper for locking and unlocking mutexes.
// It may or may not reference a mutex, and if it does, the
//    lock may not be owned.
template <class Mutex>
class unique_lock
{
public:
    typedef Mutex mutex_type;

    unique_lock();
    explicit unique_lock(mutex_type& m);
    unique_lock(mutex_type& m, do_not_lock_type);
    unique_lock(mutex_type& m, try_to_lock_type);
    unique_lock(mutex_type& m, already_locked_type);
    unique_lock(mutex_type& m, nanoseconds rel_t);
    ~unique_lock();

    unique_lock(unique_lock const&) = delete;
    unique_lock& operator=(unique_lock const&) = delete;

    unique_lock(unique_lock&& u);
    unique_lock& operator=(unique_lock&& u);

// TR2 constructors begin
    // convert upgrade ownership to unique ownership
    explicit unique_lock(tr2::upgrade_lock<mutex_type>&&);
    explicit unique_lock(const tr2::upgrade_lock<mutex_type>&) = delete;

    unique_lock(tr2::upgrade_lock<mutex_type>&&, try_to_lock_type);
    unique_lock(const tr2::upgrade_lock<mutex_type>&, try_to_lock_type) = delete;

    unique_lock(tr2::upgrade_lock<mutex_type>&&, nanoseconds rel_t);
    unique_lock(const tr2::upgrade_lock<mutex_type>&, nanoseconds rel_t) = delete;

    // convert shared ownership to unique ownership
    unique_lock(tr2::shared_lock<mutex_type>&&, try_to_lock_type);
    unique_lock(const tr2::shared_lock<mutex_type>&, try_to_lock_type) = delete;

    unique_lock(tr2::shared_lock<mutex_type>&&, nanoseconds rel_t));
    unique_lock(const tr2::shared_lock<mutex_type>&, nanoseconds rel_t)) = delete;
// TR2 constructors end

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

    bool owns_lock() const;
    mutex_type* mutex() const;

    void swap(unique_lock&& u);
    mutex_type* release();
};

template <class Mutex> void swap(unique_lock<Mutex>&  x, unique_lock<Mutex>&  y);
template <class Mutex> void swap(unique_lock<Mutex>&& x, unique_lock<Mutex>&  y);
template <class Mutex> void swap(unique_lock<Mutex>&  x, unique_lock<Mutex>&& y);

// Generic locking algorithms used to avoid deadlock.
template <class L1, class L2, class ...L3> int try_lock(L1&, L2&, L3&...);
template <class L1, class L2, class ...L3> void lock(L1&, L2&, L3&...);
}  // std

Mutex Rationale

The overall intent of the above design is to allow user defined mutexes to work with standard defined locks, and to also allow standard defined mutexes to work with user defined locks. The standard defined mutexes and locks create mutex and lock concepts, which if followed allow for the interoperability between user defined and standard defined components (much like our existing containers and algorithms interoperate with user defined containers and algorithms).

Examples of user-defined mutexes might include debugging mutexes, mutexes which atomically lock and destroy within the destructor to aid in orderly shut down, and read / write mutexes with different priority policies for readers and writers.

Examples of user-defined locks might include the bundling of mutexes and files to create locked files which both lock and open on lock() and close and unlock on unlock().

There are four standard-defined mutexes:

  1. mutex
  2. recursive_mutex
  3. timed_mutex
  4. recursive_timed_mutex

Like boost, and unlike POSIX, this design separates these functionalities into several distinct types which form a hierarchy of concepts. The rationale for the different types is to keep the most common, and simplest mutex type as small and efficient as possible. Each successive concept adds both functionality, and potentially expense. As mutexes are a very low level facility, it is very important that efficiency on a variety of platforms be a priority.

High level facilities can build on this low level, adding functionality (and expense) as required. But higher level layers can not subtract expense from the lower level layers.

Unlike boost, try_mutex is not separated out. The reason for this is because adding the try-functionality adds no expense to all known mutex implementations. Furthermore the try_lock functionality is known to be valuable and heavily used in practice. Therefore the try_lock functionality is included in all of the mutex types (as part of the base Mutex concept).

Unlike boost, the mutexes have public member functions for lock(), unlock(), etc. This is necessary to support one of the primary goals: User defined mutexes can be used with standard defined locks. If there were no interface for the user defined mutex to implement, there would be no way for a standard defined lock to communicate with the user defined mutex.

Like both boost and POSIX, the mutex types are neither copyable nor movable. Because several threads will access the mutex simultaneously, it would be bad for one thread to move it while another is locking or unlocking it.

Reference mutex implementation on POSIX

A reference implementation is given to show the intent of the proposal. Part of the intent is to be a thin wrapper around OS primitives (such as pthread_mutex_t). For brevity, only a mutex implementation is shown, and not an implementation for the other three mutex types. Also, only for brevity, code is inlined. Asserts are used to indicate requirements on the client (such as one can't destruct a locked mutex).

class mutex
{
    pthread_mutex_t mut_;
public:
    mutex()
    {
        error_code::value_type ec = pthread_mutex_init(&mut_, 0);
        if (ec)
            throw system_error(ec, native_category, "mutex constructor failed");
    }

    ~mutex()
    {
        int e = pthread_mutex_destroy(&mut_);
        assert(e == 0);
    }

    mutex(const mutex&) = delete;
    mutex& operator=(const mutex&) = delete;

    void lock()
    {
        error_code::value_type ec = pthread_mutex_lock(&mut_);
        if (ec)
            throw system_error(ec, native_category, "mutex lock failed");
    }

    bool try_lock()
    {
        return pthread_mutex_trylock(&mut_) == 0;
    }

    void unlock()
    {
        int e = pthread_mutex_unlock(&mut_);
        assert(e == 0);
    }

    typedef pthread_mutex_t* native_handle_type;
    native_handle_type native_handle() {return &mut_;}
};

Lock Rationale

The lock types are used as RAII devices for the locked state of a mutex. That is, the locks don't own the mutexes they reference. They just own the lock on the mutex.

Unlike boost, the locks are namespace scope objects to enable user defined mutexes to work with standard defined locks. If the locks existed only as nested types of the standard mutex types, then user defined mutexes would have to reinvent the lock type for every new mutex type.

Boost has several lock concepts:

This proposal simplifies the above list of "lock concepts" down to two:

(Two more locks are proposed for TR2 to support read/write locks).

The boost list of locks is necessary because each mutex specifies its own list of lock types. In this design the locks are namespace scope objects, templated on the mutex type, and designed to work with a wide variety of mutex types. Thus each different lock template is not designed for a specific mutex, but for specific use cases instead.

The scoped_lock is meant to address the most common use case, which is also the simplest use case: Lock the mutex at the beginning of a scope and unlock it at the end of the scope. That mutex could be a mutex, or a recursive_timed_mutex, or a MySpecialMutex. scoped_lock does not care. All it requires is that the mutex template parameter provide lock() and unlock(). The scoped_lock constructor lock()'s the mutex, and the scoped_lock destructor unlock()'s the mutex. It is as simple as that, and nothing more.

std::mutex mut;

void foo()
{
    std::scoped_lock<std::mutex> _(mut);  // mut.lock() called here
    // do protected work here
}   // mut.unlock() called here, no matter how foo() is exited

An invariant of scoped_lock is that it always owns the lock on the referenced mutex. The boost::mutex::scoped_lock does not have this invariant. An advantage of this invariant is that there is no if statement in ~scoped_lock() which tests whether the mutex needs to be unlocked or not. In this design ~scoped_lock() always unlocks the mutex because it always owns the lock on the mutex.

On rare occasions, the client may have a mutex that it already owns the lock, and wants to transfer the ownership of that lock into a scoped_lock. This functionality can be added to scoped_lock with zero cost. One simply adds an extra constructor which does not lock the mutex (because it is already locked):

std::mutex mut;

void foo()
{
    // for whatever reasons this thread already owns the lock on mut
    std::scoped_lock<std::mutex> _(mut, std::already_locked);  // mut.lock() not called here
    // do protected work here
}   // mut.unlock() called here, no matter how foo() is exited

See the implementation of gen_cond_var later in this paper for a real-life use case of the already_locked scoped_lock constructor.

Despite the fact that scoped_lock efficiently covers most of the use cases for locking and unlocking a mutex, it does not cover all of the use cases. To cover the rest of these use cases unique_lock is introduced. This adds both functionality and a small amount of expense. It no longer has an invariant that it always owns the lock on the referenced mutex. Indeed it may not even reference a mutex. And if it does reference a mutex, it may or may not own the lock on it, so unique_lock must add a bool data member and test that before it tries to unlock the mutex (such as in ~unique_lock()).

unique_lock is more similar to the boost::mutex::scoped_lock, but adds functionality without adding expense over and above that required for boost::mutex::scoped_lock. Part of this added functionality is move semantics, heretofore unavailable to boost code. Thus unique_lock can be returned from factory functions and stored in containers.

unique_lock can perform the functionality of scoped_lock, but with the added expense of the if statement in the destructor, and with the added expense imposed upon the code reviewer who now has to scan the entire function to know if mutex lock ownership is transferred away from this unique_lock or not. When your use case fits into the limited functionality of scoped_lock it is best to use scoped_lock not only for efficiency reasons, but also for documentation reasons. You more clearly state what you are doing with the mutex.

unique_lock has an array of constructors allowing the client to choose among:

It is proposed for TR2 that the list of unique_lock constructors be expanded to include converting shared or upgrade ownership to unique ownership (in support of converting read/write mutexes).

Like boost, this lock supports member lock(), try_lock(), unlock() and timed_lock() member functions. Unlike boost, these are all in the same lock "concept" whether the mutex type supports this functionality or not. If the mutex doesn't support timed_lock (for example), no harm is done unless the timed_lock member function of unique_lock is instantiated. This is no different than vector only requiring its value_type to be DefaultConstructible if certain vector members are instantiated.

owns_lock() tests whether the unique_lock owns the lock on the mutex or not. Boost calls this member locked(). I prefer owns_lock() because this member doesn't tell whether the mutex is locked or not. Another thread may own the lock on the mutex at the time. This member function only tells whether this unique_lock owns the locks on the mutex or not.

The mutex() member returns a pointer to the referenced mutex (or null if there is no referenced mutex). Some algorithms may need access to the referenced mutex, say for example, to take its address for mutex ordering or error checking purposes.

The swap member is included for full move semantics support.

The release() member releases the lock ownership of the mutex to the client, without unlocking the mutex (transfers lock ownership out of the unique_lock). This is necessary for transferring lock ownership from a unique_lock to a user-defined lock.

Rationale for generic locking algorithms

Occasionally a programmer finds it necessary to lock two (or more) mutexes at once. If such an operation is not done properly there exists a risk of deadlock. By using std::lock(l1, l2, ...) the programmer can safely lock multiple locks or mutexes without fear of deadlock. The only requirements on these lock types is that they support lock(), unlock(), and try_lock(). The locks need not all be of the same type. This algorithm can be used to easily build class lock types which refer to multiple mutexes (say Lock2). The lock() member function of Lock2 might use std::lock to lock all of the mutexes. One could then use Lock2 in a (generalized) condition variable wait statement. This is another good example of a user defined lock (which works with standard defined mutexes).

template <class M1, class M2>
class Lock2
{
    M1& m1_;
    M2& m2_;
public:
    Lock2(M1& m1, M2& m2) : m1_(m1), m2_(m2)
    {
        lock();
    }

    ~Lock2() {unlock();}

    Lock2(const Lock2&) = delete;
    Lock2& operator=(const Lock2&) = delete;

    void lock() {std::lock(m1_, m2_);}

    void unlock()
    {
        m1_.unlock();
        m2_.unlock();
    }
};
Generalizing Lock2<M1, M2> to MultiLock<M1, M2, ...Mn> is an interesting exercise and the existence of std::lock (and tuple) greatly eases the implementation burden of MultiLock<M1, M2, ...Mn>.

Condition Variables

Below is the proposed synopsis of the header <cond_var>. Below the synopsis an informal description and rationale is given for each of the components and design decisions.

<cond_var> Synopsis

namespace std {

// A basic condition variable.
// It can wait only on a unique_lock<mutex>.
class cond_var
{
public:
   
    cond_var();
    ~cond_var();

    cond_var(const cond_var&) = delete;
    cond_var& operator=(const cond_var&) = delete;

    void notify_one();
    void notify_all();
    void wait(unique_lock<mutex>& lock);
    template <class Predicate>
        void wait(unique_lock<mutex>& lock, Predicate pred);
    bool timed_wait(unique_lock<mutex>& lock, const utc_time& abs_time);
    template <class Predicate>
        bool timed_wait(unique_lock<mutex>& lock, const utc_time& abs_time, Predicate pred);
};

// A generalized condition variable.
// It can wait on anything which supports lock() and unlock().
class gen_cond_var
{
public:
   
    gen_cond_var();
    ~gen_cond_var();

    gen_cond_var(const gen_cond_var&) = delete;
    gen_cond_var& operator=(const gen_cond_var&) = delete;

    void notify_one();
    void notify_all();
    template <class Lock>
        void wait(Lock& lock);
    template <class Lock, class Predicate>
        void wait(Lock& lock, Predicate pred);
    template <class Lock>
        bool timed_wait(Lock& lock, const utc_time& abs_time);
    template <class Lock, class Predicate>
        bool timed_wait(Lock& lock, const utc_time& abs_time, Predicate pred);
};

}  // std

Condition Variable Rationale

The condition variable has been a most difficult class. While it is an indispensable part of the multithreaded tool set, it is also extremely low level, and has an inherent complexity to its interface. This interface is not of my choosing, but is the product of several decades of research and experience in the multithreaded arena, independent of C++. Condition variables are arguably lower level than the mutex. Because of their low level, efficiency is a major concern:

High level facilities can build on this low level, adding functionality (and expense) as required. But higher level layers can not subtract expense from the lower level layers.

For those platforms which offer a native condition variable (e.g. pthread_cond_t), C++ should offer as thin a wrapper as possible around that OS functionality.

At the same time, the condition variable is very powerful if generalized. A generalized (but more expensive) condition variable could offer the programmer a sufficiently higher level of abstraction to enable significantly more powerful multithreaded programming paradigms (analogous to moving from assembly to C).

I have studied condition variables extensively over several years within the context of C++. This study has included boost::condition and several variations of it. I even implemented a version of the boost::condition variables for the CodeWarrior (Metrowerks/Motorola/Freescale) C++ library. I have come to the following conclusions:

  1. I do not believe user defined condition variables are practical except as adaptors layered over the standard supplied condition variable.
  2. Standard supplied condition variables can interoperate with user defined mutexes/locks, but this functionality will definitely add expense to the condition variable over a native condition variable (e.g. pthread_cond_t).
  3. The semantics of boost::condition do not interoperate with user defined mutexes/locks.
  4. The syntactic interface of boost::condition can be specified to interoperate with user defined mutexes/locks but at an added expense over the native condition variable (e.g. pthread_cond_t).
  5. Well over 90% of the condition variable use cases in current day code require only the semantics of those equivalent to pthread_cond_t.
  6. It is not an intrinsic error to wait on a single condition variable with multiple mutexes unless more than one mutex is waited upon at the same time. Multiple threads can wait on the same condition variable with the same mutex at the same time.
  7. There exist use cases for waiting on the same condition variable with multiple mutexes. The simplest example involves only one waiting thread which uses a different mutex on each wait.
  8. There exist use cases for waiting on the same condition variable with the same mutex but locked in different ways simultaneously. For example one thread might want to wait on a shared_mutex locked with unique ownership, while another thread waits on the same shared_mutex locked with shared ownership. A third thread might not care if it is signaling a reader or a writer.

Because of these conclusions I believe we need to support both a razor thin layer over OS supplied condition variables, and also supply a more generalized condition variable which can work with user defined mutexes/locks. These are at least two different types of condition variables.

I have experimented with templating the condition variable but have discovered problems with this approach.

Because the condition specialized on the native mutex type can not have the same interface as the primary condition template (it can't wait on any lock type), specialization is not appropriate for this application (reference the vector<bool> example).

The only conclusion I can come to which supports both a razor thin layer over the native OS condition variable, and a generalized condition variable which works with user defined mutexes/locks (such as the Lock2 example) is two distinct types:

cond_var : A condition variable that can wait on nothing but unique_lock<mutex> (or perhaps mutex).
gen_cond_var : A condition variable that can wait on anything which supports lock() and unlock().

Boost uses the name condition for a condition variable. With our recent addition of conditional to the type traits library I fear that using condition will be confusing. cond_var is a readable abreviation of condition_variable.

cond_var

Below is an example implementation of cond_var on top of pthread_cond_t. The reference implementation is meant to demonstrate how thinly cond_var maps to pthread_cond_t (or whatever native OS condition variable is available).

class cond_var
{
    pthread_cond_t cv_;
public:
   
    cond_var()
    {
        error_code::value_type ec = pthread_cond_init(&cv_, 0);
        if (ec)
            throw system_error(ec, native_category, "cond_var constructor failed");
    }

    ~cond_var()
    {
        int ec = pthread_cond_destroy(&cv_);
        assert(ec == 0);
    }

    cond_var(const cond_var&) = delete;
    cond_var& operator=(const cond_var&) = delete;

    void notify_one()
    {
        error_code::value_type ec = pthread_cond_signal(&cv_);
        if (ec)
            throw system_error(ec, native_category, "cond_var notify_one failed");
    }

    void notify_all()
    {
        error_code::value_type ec = pthread_cond_broadcast(&cv_);
        if (ec)
            throw system_error(ec, native_category, "cond_var notify_all failed");
    }

    void wait(unique_lock<mutex>& lock)
    {
        error_code::value_type ec = pthread_cond_wait(&cv_, lock.mutex()->native_handle());
        if (ec)
            throw system_error(ec, native_category, "cond_var wait failed");
    }

    template <class Predicate>
        void wait(unique_lock<mutex>& lock, Predicate pred)
        {
            while (!pred())
                wait(lock);
        }

    bool timed_wait(unique_lock<mutex>& lock, const utc_time& abs_time)
    {
        timespec tm = convert_to_timespec(abs_time);
        error_code::value_type ec = pthread_cond_timedwait(&cv_, lock.mutex()->native_handle(), &tm);
        if (ec != 0 && ec != ETIMEDOUT)
            throw system_error(ec, native_category, "cond_var timed_wait failed");
        return ec == 0;
    }

    template <class Predicate>
        bool timed_wait(unique_lock<mutex>& lock, const utc_time& abs_time, Predicate pred)
        {
            while (!pred())
                if (!timed_wait(lock, abs_time))
                    return pred();
            return true;
        }
};

The above assumes that either there is no cancellation / interruption in the C++ threading API, or that it is handled by pthread_cancel. If we do have interruption which needs to be handled independently of pthread_cancel on some platform, the above wait function would simply need to register the cond_var with thread local data set aside so that other threads would know which cond_var to notify in case it needed to interrupt the waiting thread. Perhaps something like:

struct done_with_cv
{
    void operator()(thread_state* ts) {ts->done_with_cv();}
};

void wait(unique_lock<mutex>& lock)
{
    thread_state* ts = get_thread_state();
    if (ts)
        ts->wait_on_cv(&cv_);
    unique_ptr<thread_state, done_with_cv> _(ts);
    error_code::value_type ec = pthread_cond_wait(&cv_, lock.mutex()->native_handle());
    if (ec)
        throw system_error(ec, native_category, "cond_var wait failed");
}

For those OS's which do not support a native condition variable, but do support mutexes and semaphores, one has to use Alexander Terekhov's "algorithm 8a". This requires two semaphores, a mutex and three counters.

gen_cond_var

Once a minimalist but portable layer over a condition variable exists, then it becomes possible to build higher-level layers on top of cond_var. Those clients who don't need or want this higher-level functionality should program directly to the lowest-level cond_var to avoid unwanted costs associated with the higher-level functionality (e.g. see the shared_mutex reference implementation below).

The first higher-level layer I would like to explore is gen_cond_var. This class is capable of waiting on any object that supports lock() and unlock(). For example given the earlier user-defined Lock2 example, one could with gen_cond_var, compose two mutexes with a single lock and wait on that lock:

std::mutex m1;
std::timed_mutex m2;
std::gen_cond_var cv;
...

void foo()
{
    Lock2<std::mutex, std::timed_mutex> lk(m1, m2);
    // m1 and m2 locked here
    while (not_ready_to_proceed())
        cv.wait(lk);  // m1 and m2 unlocked while sleeping
    // m1 and m2 locked here
    ...
}   // m1 and m2 unlocked here

The first question one may reasonably ask about gen_cond_var is: This looks great. But why does this rise to the level of needing to be standardized? Can't user's portably create gen_cond_var themselves?

I'd like to answer the second part of that question first: Yes, anyone could portably create gen_cond_var himself, layered on top of cond_var. That is the good news. The bad news is that while the implementation is relatively brief, the code is exceptionally hard to get right. Here's a first cut:

class gen_cond_var
{
    cond_var cv_;
    mutex    mut_;
public:
   
    gen_cond_var() = default;
    ~gen_cond_var() = default;

    gen_cond_var(const gen_cond_var&) = delete;
    gen_cond_var& operator=(const gen_cond_var&) = delete;

    void notify_one()
    {
        cv_.notify_one();
    }

    void notify_all()
    {
        cv_notify_all();
    }

    template 
        void wait(Lock& lock)
        {
            unique_lock lk(mut_);
            lock.unlock();
            cv_.wait(lk);
            lock.lock();
        }

    template 
        void wait(Lock& lock, Predicate pred)
        {
            while (!pred())
                wait(lock);
        }

    template 
        bool timed_wait(Lock& lock, const utc_time& abs_time)
        {
            unique_lock lk(mut_);
            lock.unlock();
            cv_.timed_wait(lk, abs_time);
            lock.lock();
        }

    template 
        bool timed_wait(Lock& lock, const utc_time& abs_time, Predicate pred)
        {
            while (!pred())
                if (!timed_wait(lock, abs_time))
                    return pred();
            return true;
        }
};

The first problem is fairly obvious: cv_.wait(lk) in wait might throw and that would result in the external lock being incorrectly left unlocked as wait exited exceptionally. The fix is simple, and the same for both wait and timed_wait:

struct lock_it
{
    template <class Lock>
        void operator()(Lock* lk) {lk->lock();}
};

...

    template <class Lock>
        void wait(Lock& lock)
        {
            unique_lock<mutex> lk(mut_);
            lock.unlock();
            unique_ptr<Lock, lock_it> _(&lock);
            cv_.wait(lk);
        }

Now we're exception safe, and I would expect most programmers to get this far. But there looms a more subtle multithread related problem. Consider two threads executing nearly simultaneously within wait() and notify_one().

Thread AThread B
lock.lock()  
check predicate, decide to wait  
enter gen_cond_var::wait  
unique_lock<mutex> lk(mut_)  
lock.unlock();  
  lock.lock()
  change predicate, wake Thread A
  enter gen_cond_var::notify_one
  cv_.notify_one()
  exit gen_cond_var::notify_one
  lock.unlock()
cv_.wait(lk)  
 
Thread A never wakes! Thread B believes it has notified Thread A

Thread A waits forever waiting for Thread B to change the predicate, even though Thread B has already changed the predicate and notified A. The problem is that the unlock/wait sequence is no longer atomic. This is exactly the reason the condition variable was invented for in the first place. To make the unlock/wait sequence atomic, it is important to realize that it only needs to be atomic with respect to the notify functions. Now we have:

struct lock_it
{
    template <class Lock>
        void operator()(Lock* lk) {lk->lock();}
};

class gen_cond_var
{
    cond_var cv_;
    mutex    mut_;
public:
   
    gen_cond_var() = default;
    ~gen_cond_var() = default;

    gen_cond_var(const gen_cond_var&) = delete;
    gen_cond_var& operator=(const gen_cond_var&) = delete;

    void notify_one()
    {
        scoped_lock<mutex> _(mut_);
        cv_.notify_one();
    }

    void notify_all()
    {
        scoped_lock<mutex> _(mut_);
        cv_notify_all();
    }

    template 
        void wait(Lock& external)
        {
            unique_lock<mutex> lk(mut_);
            external.unlock();
            unique_ptr<Lock, lock_it> lock_last(&external);
            cv_.wait(lk);
        }

    template 
        void wait(Lock& lock, Predicate pred)
        {
            while (!pred())
                wait(lock);
        }

    template 
        bool timed_wait(Lock& external, const utc_time& abs_time)
        {
            unique_lock lk(mut_);
            external.unlock();
            unique_ptr<Lock, lock_it> lock_last(&external);
            cv_.timed_wait(lk, abs_time);
        }

    template 
        bool timed_wait(Lock& lock, const utc_time& abs_time, Predicate pred)
        {
            while (!pred())
                if (!timed_wait(lock, abs_time))
                    return pred();
            return true;
        }
};

We are now exception safe. And we have atomic unlock/wait with respect to the notify functions. Now we're good to go, right? Wrong! I personally got this far, but two experts I highly respect didn't, at least on the first try. And the above code is still buggy. It took me another year of study before I even knew I still had a bug. My test cases would very rarely deadlock. At times I wrote it off as an operating system bug. But persistent analysis turned the blame for this very-difficult-to-reproduce behavior on myself. Consider:

Thread AThread B
external.lock()  
check predicate, decide to wait  
enter gen_cond_var::wait  
unique_lock<mutex> lk(mut_)  
external.unlock();  
cv_.wait(lk)mut_.unlock()
mut_.lock()
external.lock()
external.lock() check predicate, decide to wait
enter gen_cond_var::wait
unique_lock<mutex> lk(mut_)
Thread A deadlocked! Thread B deadlocked!

In a nutshell, as Thread A exits the wait it is locking the internal mutex, and then the external lock. If Thread B begins a wait at about the same time, it is locking these two mutexes in the opposite order: external and then internal. This is the classic recipe for deadlock!

The fix is to unlock the internal mutex first as you exit the wait and then lock the external mutex. The final correct code is as follows:

struct lock_it
{
    template <class Lock>
        void operator()(Lock* lk) {lk->lock();}
};

class gen_cond_var
{
    cond_var cv_;
    mutex    mut_;
public:
   
    gen_cond_var() = default;
    ~gen_cond_var() = default;

    gen_cond_var(const gen_cond_var&) = delete;
    gen_cond_var& operator=(const gen_cond_var&) = delete;

    void notify_one()
    {
        scoped_lock<mutex> _(mut_);
        cv_.notify_one();
    }

    void notify_all()
    {
        scoped_lock<mutex> _(mut_);
        cv_notify_all();
    }

    template 
        void wait(Lock& external)
        {
            unique_lock<mutex> internal(mut_);
            external.unlock();
            unique_ptr<Lock, lock_it>       lock_last(&external);
            scoped_lock<unique_lock<mutex>> unlock_next(internal, already_locked);
            cv_.wait(internal);
        }  // mut_.unlock(), external.lock()

    template 
        void wait(Lock& lock, Predicate pred)
        {
            while (!pred())
                wait(lock);
        }

    template 
        bool timed_wait(Lock& external, const utc_time& abs_time)
        {
            unique_lock<mutex> internal(mut_);
            external.unlock();
            unique_ptr<Lock, lock_it>       lock_last(&external);
            scoped_lock<unique_lock<mutex>> unlock_next(internal, already_locked);
            cv_.timed_wait(internal, abs_time);
        }  // mut_.unlock(), external.lock()

    template 
        bool timed_wait(Lock& lock, const utc_time& abs_time, Predicate pred)
        {
            while (!pred())
                if (!timed_wait(lock, abs_time))
                    return pred();
            return true;
        }
};

Finally we have:

I'm sure there are people more experienced with condition variables than than I am for whom this tutorial was a fairly boring and obvious exercise. However I am personally impressed that gen_cond_var is sufficiently difficult for the average programmer to get right that standardizing this adaptor is appropriate. gen_cond_var is arguably very useful. There are no obvious alternative options for implementing gen_cond_var. And reinventing it is extremely error prone, even for experts.

Constrained condition variables

The majority of use cases with condition variables involve only one mutex/lock for the lifetime of the condition variable. Thus there is a desire to explicitly bind the desired mutex to the condition variable, and either check that the correct mutex is passed to wait, or just eliminate the parameter to wait altogether.

It is important to recognize that this one-to-one binding between condition variable and mutex is a very common use case, but is not the only use case. Therefore it would be inappropriate for the constrained condition variable to be the C++ client's only option.

A constrained condition variable might be templated on the condition variable type so that it could work with a cond_var or gen_cond_var or even another condition variable adaptor. It also might be templated on the mutex type to which it will be bound. Below is a first cut implementation of a constrained condition variable:

template <class CondVar, class Mutex>
class constrained_cond_var
{
    CondVar cv_;
    Mutex& mut_;

    void on_error()
    {
        throw std::runtime_error("cv - mutex mismatch");
    }
public:
    explicit constrained_cond_var(Mutex& mut) : mut_(mut) {}

    void notify_one() {cv_.notify_one();}
    void notify_all() {cv_.notify_all();}

    template <class Lock>
        void wait(Lock& lock)
        {
            if (lock.mutex() != &mut_)
                on_error();
            cv_.wait(lock);
        }

    template <class Lock, class Predicate>
        void wait(Lock& lock, Predicate pred)
        {
            if (lock.mutex() != &mut_)
                on_error();
            cv_.wait(lock, pred);
        }

    template <class Lock>
        bool timed_wait(Lock& lock, const std::utc_time& abs_time)
        {
            if (lock.mutex() != &mut_)
                on_error();
            return cv_.timed_wait(lock, abs_time);
        }

    template <class Lock, class Predicate>
        bool timed_wait(Lock& lock, const std::utc_time& abs_time, Predicate pred)
        {
            if (lock.mutex() != &mut_)
                on_error();
            return cv_.timed_wait(lock, abs_time, pred);
        }
};

There are several things to notice about the above code:

There are several alternatives, and they are all fairly easy to build on top of cond_var and/or gen_cond_var. Therefore I do not think it is appropriate to standardize a constrained condition variable. Nor do I believe we could gain consensus on what it should look like, at least not at this time.

Shared / Unique Ownership Mutexes and Locks

This section explores and motivates the TR2-targeted shared (read/write) mutexes. The reason for including these in this paper is to impress upon the reader how the entire package works together. Some of the design choices for the C++0X-targeted components are not obvious until one considers these TR2 components (e.g. cond_var, gen_cond_var and namespace scope templated locks).

<shared_mutex> Synopsis

namespace std {
namespace tr2 {

// A mutex supporting both unique (write) and shared (read) ownership
class shared_mutex
{
public:

    shared_mutex();
    ~shared_mutex();

    shared_mutex(const shared_mutex&) = delete;
    shared_mutex& operator=(const shared_mutex&) = delete;

// Unique ownership

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

// Shared ownership

    void lock_shared();
    bool try_lock_shared();
    bool timed_lock_shared(nanoseconds rel_time);
    void unlock_shared();
};

// A mutex supporting both unique and shared ownership,
//   and the ability to convert between unique and shared ownership.
// Upgrade ownership is exclusive among unique and upgrade ownerships, but shares with
//   shared ownership.  It alone can perform a non-timed, non-try conversion to unique ownership.
class upgrade_mutex
{
public:

    upgrade_mutex();
    ~upgrade_mutex();

    upgrade_mutex(const upgrade_mutex&) = delete;
    upgrade_mutex& operator=(const upgrade_mutex&) = delete;

// Unique ownership

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

// Shared ownership

    void lock_shared();
    bool try_lock_shared();
    bool timed_lock_shared(nanoseconds rel_time);
    void unlock_shared();

// Upgrade ownership

    void lock_upgrade();
    bool try_lock_upgrade();
    bool timed_lock_upgrade(nanoseconds rel_time);
    void unlock_upgrade();

// Shared <-> Unique

    bool try_unlock_shared_and_lock();
    bool timed_unlock_shared_and_lock(nanoseconds rel_time);
    void unlock_and_lock_shared();

// Shared <-> Upgrade

    bool try_unlock_shared_and_lock_upgrade();
    bool timed_unlock_shared_and_lock_upgrade(nanoseconds rel_time);
    void unlock_upgrade_and_lock_shared();

// Upgrade <-> Unique

    void unlock_upgrade_and_lock();  // This conversion is unique to upgrade ownership
    bool try_unlock_upgrade_and_lock();
    bool timed_unlock_upgrade_and_lock(nanoseconds rel_time);
    void unlock_and_lock_upgrade();
};

// A movable (multi-scope) RAII wrapper for share-locking and share-unlocking mutexes supporting
//   shared ownership such as shared_mutex and upgrade_mutex.  unique_lock is used for locking and unlocking
//   such mutexes for unique (write) ownership.
template <class Mutex>
class shared_lock
{
public:
    typedef Mutex mutex_type;

    shared_lock();
    explicit shared_lock(mutex_type& m);
    shared_lock(mutex_type& m, do_not_lock_type);
    shared_lock(mutex_type& m, try_to_lock_type);
    shared_lock(mutex_type& m, already_locked_type);
    shared_lock(mutex_type& m, nanoseconds rel_t);
    ~shared_lock();

    shared_lock(shared_lock const&) = delete;
    shared_lock& operator=(shared_lock const&) = delete;

    shared_lock(shared_lock&& u);
    shared_lock& operator=(shared_lock&& u);

    // convert upgrade ownership to shared ownership
    explicit shared_lock(upgrade_lock<mutex_type>&&);
    explicit shared_lock(const upgrade_lock<mutex_type>&) = delete;

    // convert unique ownership to shared ownership
    explicit shared_lock(unique_lock<mutex_type>&&);
    explicit shared_lock(const unique_lock<mutex_type>&) = delete;

    void lock();                         // calls lock_shared()
    bool try_lock();                     // calls try_lock_shared()
    bool timed_lock(nanoseconds rel_t);  // calls timed_lock_shared()
    void unlock();                       // calls unlock_shared()

    bool owns_lock() const;
    mutex_type* mutex() const;

    void swap(shared_lock&& u);
    mutex_type* release();
};

template <class Mutex> void swap(shared_lock<Mutex>&  x, shared_lock<Mutex>&  y);
template <class Mutex> void swap(shared_lock<Mutex>&& x, shared_lock<Mutex>&  y);
template <class Mutex> void swap(shared_lock<Mutex>&  x, shared_lock<Mutex>&& y);

// A movable (multi-scope) RAII wrapper for upgrade-locking and upgrade-unlocking mutexes supporting
//   upgrade ownership such as upgrade_mutex.  unique_lock is used for locking and unlocking
//   such mutexes for unique (write) ownership.  shared_lock is used for locking and unlocking
//   such mutexes for shared ownership.
template <class Mutex>
class upgrade_lock
{
public:
    typedef Mutex mutex_type;

    upgrade_lock();
    explicit upgrade_lock(mutex_type& m);
    upgrade_lock(mutex_type& m, do_not_lock_type);
    upgrade_lock(mutex_type& m, try_to_lock_type);
    upgrade_lock(mutex_type& m, already_locked_type);
    upgrade_lock(mutex_type& m, nanoseconds rel_t);
    ~upgrade_lock();

    upgrade_lock(upgrade_lock const&) = delete;
    upgrade_lock& operator=(upgrade_lock const&) = delete;

    upgrade_lock(upgrade_lock&& u);
    upgrade_lock& operator=(upgrade_lock&& u);

    // convert shared ownership to upgrade ownership
    upgrade_lock(shared_lock<mutex_type>&&, try_to_lock_type);
    upgrade_lock(const shared_lock<mutex_type>&, try_to_lock_type) = delete;

    upgrade_lock(shared_lock<mutex_type>&&, nanoseconds rel_t);
    upgrade_lock(const shared_lock<mutex_type>&, nanoseconds rel_t) = delete;

    // convert unique ownership to upgrade ownership
    explicit upgrade_lock(unique_lock<mutex_type>&&);
    explicit upgrade_lock(const unique_lock<mutex_type>&) = delete;

    void lock();                         // calls lock_upgrade()
    bool try_lock();                     // calls try_lock_upgrade()
    bool timed_lock(nanoseconds rel_t);  // calls timed_lock_upgrade()
    void unlock();                       // calls unlock_upgrade()

    bool owns_lock() const;
    mutex_type* mutex() const;

    void swap(upgrade_lock&& u);
    mutex_type* release();
};

template <class Mutex> void swap(upgrade_lock<Mutex>&  x, upgrade_lock<Mutex>&  y);
template <class Mutex> void swap(upgrade_lock<Mutex>&& x, upgrade_lock<Mutex>&  y);
template <class Mutex> void swap(upgrade_lock<Mutex>&  x, upgrade_lock<Mutex>&& y);

template <class ToLock, class FromLock>
class transfer_lock
{
    ToLock to_lock_;       // exposition only
    FromLock& from_lock_;  // exposition only
public:
    typedef typename FromLock::mutex_type mutex_type;

    explicit transfer_lock(FromLock& fl);  // Transfers ownership from FromLock to ToLock
    ~transfer_lock();                      // Transfers ownership from ToLock to FromLock

    transfer_lock(const transfer_lock&) = delete;
    transfer_lock& operator=(const transfer_lock&) = delete;

    void lock();                 // Transfers ownership from FromLock to ToLock
    void try_lock();             // Tries to transfers ownership from FromLock to ToLock
    void unlock();               // Transfers ownership from ToLock to FromLock

    bool owns_lock() const;
    mutex_type* mutex() const;
};

}  // tr2
}  // std

shared_mutex Rationale

shared_mutex is roughly equivalent to pthread_rwlock_t. It is a "read/write" mutex. The reason the name shared was chosen over read or read-write is to emphasize the behavior of the mutex, instead of an example use case of the mutex. There are many more reasons for a group of threads to share ownership of a resource other than "reading it".

There are more naming differences between shared_mutex and pthread_rwlock_t, largely due to generic programming concerns. Here is a comparison of shared_mutex names and their POSIX counterparts:

shared_mutex pthread_rwlock_t Semantics
lock wrlock Lock the mutex in unique ownership mode
try_lock trywrlock Try lock the mutex in unique ownership mode
timed_lock timedwrlock Timed lock the mutex in unique ownership mode
unlock unlock unlock the mutex from unique ownership mode
lock_shared rdlock Lock the mutex in shared ownership mode
try_lock_shared tryrdlock Try lock the mutex in shared ownership mode
timed_lock_shared timedrdlock Timed lock the mutex in shared ownership mode
unlock_shared unlock unlock the mutex from shared ownership mode

The most important point to notice in the above table is that the names for locking a shared_mutex in unique_ownership mode are identical to the names used for the unique ownership mutexes (mutex, recursive_mutex, timed_mutex, recursive_timed_mutex). This means that shared_mutex can be used in generic mutex code which assumes unique ownership semantics. A prime example of such generic code is unique_lock. The recommended technique for locking a shared_mutex in unique ownership mode is to construct a unique_lock<shared_mutex>.

A second difference to notice is that shared_mutex uses different names for unlocking from unique ownership as opposed to unlocking from shared ownership. POSIX uses the same name for both operations. The different names are desired because these are two distinct operations. Client code either knows how the mutex is locked (and thus how to unlock it), or is using an adaptor/lock wrapped around the shared_mutex that knows how it is locked (e.g. a shared_lock or unique_lock).

Additionally, some operating systems (e.g. Windows) have different names for unlocking shared vs unlocking unique as well. Use of different names in the C++ API allows for a more efficient binding to such OS API's.

When viewed in terms of concepts, timed_mutex is a mutex that adds one more signature (timed_lock). And shared_mutex is a timed_mutex that adds four more signatures (for shared locking/unlocking).

Like POSIX, shared_mutex allows recursive shared mode locking, but not recursive unique ownership mode locking. A recursive shared mutex could be built upon shared_mutex if desired, but this is not proposed.

shared_mutex Reference Implementation

shared_mutex can certainly be implemented on top of an OS supplied read-write mutex. However a portable subset of the implementation is shown here for the purpose of motivating the existence of cond_var: the razor-thin layer over the OS condition variable.

A secondary motivation is to explain the lack of reader-writer priority policies in shared_mutex. This is due to an algorithm credited to Alexander Terekhov which lets the OS decide which thread is the next to get the lock without caring whether a unique lock or shared lock is being sought. This results in a complete lack of reader or writer starvation. It is simply fair.

Below is most of the implementation of shared_mutex demonstrating that with this proposal, very high quality synchronization devices can be easily coded, and with the same efficiency as if they were coded straight to the OS. The timed-locking functions are omitted only for brevity. They are similar to the locking functions but make use of cond_var::timed_wait. Dependence on the lower-level C++ API is highlighted.

class shared_mutex
{
    mutex    mut_;
    cond_var gate1_;
    cond_var gate2_;
    unsigned state_;

    static const unsigned write_entered_ = 1U << (sizeof(unsigned)*CHAR_BIT - 1);
    static const unsigned n_readers_ = ~write_entered_;

public:

    shared_mutex() : state_(0) {}

// Exclusive ownership

    void lock();
    bool try_lock();
    bool timed_lock(nanoseconds rel_time);
    void unlock();

// Shared ownership

    void lock_shared();
    bool try_lock_shared();
    bool timed_lock_shared(nanoseconds rel_time);
    void unlock_shared();
};

// Exclusive ownership

void
shared_mutex::lock()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    while (state_ & write_entered_)
        gate1_.wait(lk);
    state_ |= write_entered_;
    while (state_ & n_readers_)
        gate2_.wait(lk);
}

bool
shared_mutex::try_lock()
{
    unique_lock<mutex> lk(mut_, try_to_lock);
    if (lk.owns_lock() && state_ == 0)
    {
        state_ = write_entered_;
        return true;
    }
    return false;
}

void
shared_mutex::unlock()
{
    {
    scoped_lock<mutex> _(mut_);
    state_ = 0;
    }
    gate1_.notify_all();
}

// Shared ownership

void
shared_mutex::lock_shared()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    while ((state_ & write_entered_) || (state_ & n_readers_) == n_readers_)
        gate1_.wait(lk);
    unsigned num_readers = (state_ & n_readers_) + 1;
    state_ &= ~n_readers_;
    state_ |= num_readers;
}

bool
shared_mutex::try_lock_shared()
{
    unique_lock<mutex> lk(mut_, try_to_lock);
    unsigned num_readers = state_ & n_readers_;
    if (lk.owns_lock() && !(state_ & write_entered_) && num_readers != n_readers_)
    {
        ++num_readers;
        state_ &= ~n_readers_;
        state_ |= num_readers;
        return true;
    }
    return false;
}

void
shared_mutex::unlock_shared()
{
    scoped_lock<mutex> _(mut_);
    unsigned num_readers = (state_ & n_readers_) - 1;
    state_ &= ~n_readers_;
    state_ |= num_readers;
    if (state_ & write_entered_)
    {
        if (num_readers == 0)
            gate2_.notify_one();
    }
    else
    {
        if (num_readers == n_readers_ - 1)
            gate1_.notify_one();
    }
}

shared_lock Rationale

shared_lock is a movable RAII wrapper for locking a mutex in shared lock mode. It is very analogous to unique_lock. The chief difference is that when you lock() a shared_lock it calls lock_shared() on the referenced mutex.

The reason that the member functions of shared_lock use the member functions lock, try_lock, timed_lock, and unlock, as opposed to lock_shared, try_lock_shared, timed_lock_shared, and unlock_shared, is to facilitate generic code for locks. For example the standard defined generic locking algorithm which locks multiple locks without deadlock can just as easily work on a shared_lock<shared_mutex> as a unique_lock<mutex>.

The shared_lock constructors which convert from unique_lock and from upgrade_lock will not compile for shared_lock<shared_mutex>. In order to use these constructors one must instantiate shared_lock with a mutex type which supports unlock_and_lock_shared and unlock_upgrade_and_lock_shared respecitively. upgrade_mutex is a mutex which meets these requirements.

Similarly, shared_mutex does not support the interface required to convert a shared_lock to a unique_lock. Again, this is what upgrade_mutex is designed to do.

Here is example code which locks a shared_mutex in both unique and shared ownership modes:

std::tr2::shared_mutex mut;

void example_reader()
{
    std::tr2::shared_lock<std::tr2::shared_mutex> _(mut);
    // mut is now shared-locked
    // ...
}   // mut is now unlocked

void example_writer()
{
    std::scoped_lock<std::tr2::shared_mutex> _(mut);
    // mut is now unique-locked
    // ...
}   // mut is now unlocked

Here is example code which waits on a shared_mutex in both unique and shared ownership modes:

std::tr2::shared_mutex mut;
std::gen_cond_var cv;

void wait_in_shared_ownership_mode()
{
    std::tr2::shared_lock<std::tr2::shared_mutex> shared_lk(mut);
    // mut is now shared-locked
    // ...
    while (not_ready_to_proceed())
        cv.wait(shared_lk);  // shared-lock released while waiting
    // mut is now shared-locked
    // ...
}   // mut is now unlocked

void wait_in_unique_ownership_mode()
{
    std::unique_lock<std::tr2::shared_mutex> lk(mut);
    // mut is now unique-locked
    // ...
    while (not_ready_to_proceed())
        cv.wait(lk);  // unique-lock released while waiting
    // mut is now unique-locked
    // ...
}   // mut is now unlocked

Here is example code which implements a copy assignment operator for a class, shared-locking the source (which it will not modify), and unique-locking the target. Each instance of the class carries a member shared_mutex to protect the instance data. Care must be taken to avoid deadlock if one thread assigns a1 = a2; at the same time that another thread assigns a2 = a1;.

class A
{
    typedef std::tr2::shared_mutex         mutex_t;
    typedef std::tr2::shared_lock<mutex_t> ReadLock;
    typedef std::unique_lock<mutex_t>      WriteLock;

    mutex_t mut_;
    // ... more data ...
public:
    // ...
    A& operator=(const A& a)
    {
        if (this != &a)
        {
            WriteLock this_lock(mut_, do_not_lock);
            ReadLock that_lock(a.mut_, do_not_lock);
            std::lock(this_lock, that_lock);  // lock both locks "atomically" (without deadlock)
            // mut_ is now unique-locked and a.mut_ is now share-locked
            // ... now safe to assign data ...
        }   // mut_ and a.mut_ now unlocked, even if there was an exception
        return *this;
    }
    // ...
};

upgrade_mutex Rationale

In the mutex concept hierarchy, upgrade_mutex is a shared_mutex and adds a third ownership mode (unique ownership, shared ownership and now upgrade ownership), and adds the ability to convert between these three ownership modes. The reason upgrade_mutex exists at all, is to facilitate converting shared ownership to unique ownership without unlocking.

The value in converting shared ownership to unique ownership without unlocking is that there may have been considerable work done under shared ownership that would have to be redone if the shared lock is released. In this use case the client needs to be sure that he is the next owner with unique ownership rights. To understand why a third ownership mode is needed to achieve this, consider:

Thread A Thread B Thread C
mut.lock_shared(); mut.lock_shared(); mut.lock_shared();
mut.unlock_shared_and_lock(); mut.unlock_shared_and_lock(); mut.unlock_shared_and_lock();
blocked
blocked
blocked
Thread A deadlocked Thread B deadlocked Thread C deadlocked

We can promise Thread A or Thread B or Thread C that they are the next one to get unique ownership rights, but we can't make that promise to all threads (or even two). Therefore the hypothetical unlock_shared_and_lock() member shown above does not exist. So how does a third ownership mode (upgrade ownership) help this?

Upgrade ownership is a privileged form of shared ownership. It shares ownership with other threads that have shared ownership of the mutex. But it does not share ownership with other threads wanting upgrade ownership (much less unique ownership). This allows the programmer to choose a single shared ownership thread and designate it as the one thread among all other shared ownership threads that can convert its ownership to unique:

Thread A Thread B Thread C
mut.lock_shared(); mut.lock_shared(); mut.lock_upgrade();
working
working
working
mut.unlock_upgrade_and_lock();
mut.unlock_shared()
working without ownership
mut.unlock_shared();
blocked
working without ownership
working with unique ownership

The next question one may reasonably ask is: If only one thread has shared ownership, why can't I then convert it to unique ownership? The answer is you can. This is the purpose of the try_unlock_shared_and_lock() member of upgrade_mutex. We can't promise to give the owner of shared ownership the next unique ownership. But we can try. Additionally there is even a timed_unlock_shared_and_lock() member of upgrade_mutex in case you want to wait a little while before giving up. Naturally this is plenty of rope for the client to hang himself if he abuses this interface with long wait times (this is C++, where the tools have good handles, but are sharp nonetheless).

In summary with upgrade_mutex, we have three ownership modes, and the ability to convert among all three of these modes:

unique
  |   \
  |    \
  |    upgrade
  |    /
  |   /
shared

Ownership conversions heading down this graph are non-blocking. There are try and timed-conversions heading up the graph. And only between upgrade and unique there is an upward indefinitely blocking ownership conversion. At any one time, there can be only one unique ownership owner, which shares ownership with no one else. Or there can be one upgrade ownership owner which will share with zero or more shared ownership owners. Or there can be zero or more shared ownership owners.

This tool is only useful when there are multiple shared ownership owners, and only a few of those desire the ability to guarantee conversion to unique ownership. If all of the shared ownership owners require this guarantee, then the system degenerates into one of mutually exclusive ownership, and mutex becomes the tool of choice.

upgrade_mutex Implementation

So this sounds very powerful, but how much does it cost?

This is truly amazing but the cost is virtually identical to the reference implementation shown for shared_mutex. The Terekhov algorithm needs only minor tweaking to adjust for the introduction of the upgrade ownership mode, and all of the conversions. There are no subtleties (this is a cake walk compared to gen_cond_var!). Below is shown the upgrade_mutex data layout, and a few of the member functions. Note that the data layout is identical to that of shared_mutex! Only a single bit from the unsigned state_ is taken to record the presence of an upgrade owner. The result is that one can have only half as many simultaneous shared owners (1,073,741,823 instead of 2,147,483,647 with a 32 bit unsigned state_). The common member functions of shared_mutex and upgrade_mutex are no more expensive in upgrade_mutex. Compile time constants are changed. But the complexity or expense of the member functions do not change. The expense of locking in the upgrade ownership mode is comparable to locking in shared ownership mode. And the expense of converting among the ownership modes is comparable to locking and unlocking shared_mutex in its two ownership modes (actually faster).

class upgrade_mutex
{
    mutex    mut_;
    cond_var gate1_;
    cond_var gate2_;
    unsigned state_;

    static const unsigned write_entered_ = 1U << (sizeof(unsigned)*CHAR_BIT - 1);
    static const unsigned upgradable_entered_ = write_entered_ >> 1;
    static const unsigned n_readers_ = ~(write_entered_ | upgradable_entered_);

public:

    upgrade_mutex() : state_(0) {}
    // ...
};

void
upgrade_mutex::lock()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    while (state_ & (write_entered_ | upgradable_entered_))
        gate1_.wait(lk);
    state_ |= write_entered_;
    while (state_ & n_readers_)
        gate2_.wait(lk);
}

void
upgrade_mutex::unlock()
{
    {
    scoped_lock<mutex> _(mut_);
    state_ = 0;
    }
    gate1_.notify_all();
}

void
upgrade_mutex::lock_shared()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    while ((state_ & write_entered_) || (state_ & n_readers_) == n_readers_)
        gate1_.wait(lk);
    unsigned num_readers = (state_ & n_readers_) + 1;
    state_ &= ~n_readers_;
    state_ |= num_readers;
}

void
upgrade_mutex::lock_upgrade()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    while ((state_ & (write_entered_ | upgradable_entered_)) || 
           (state_ & n_readers_) == n_readers_)
        gate1_.wait(lk);
    unsigned num_readers = (state_ & n_readers_) + 1;
    state_ &= ~n_readers_;
    state_ |= upgradable_entered_ | num_readers;
}

void
upgrade_mutex::unlock_upgrade_and_lock()
{
    std::this_thread::disable_interruption _;
    unique_lock<mutex> lk(mut_);
    unsigned num_readers = (state_ & n_readers_) - 1;
    state_ &= ~(upgradable_entered_ | n_readers_);
    state_ |= write_entered_ | num_readers;
    while (state_ & n_readers_)
        gate2_.wait(lk);
}

upgrade_lock Rationale

upgrade_lock is very similar to shared_lock and unique_lock. It exists to serve as a RAII wrapper to lock a mutex satisfying the concept of an upgrade mutex in upgrade ownership mode (just like shared_lock only locks its mutex in shared ownership mode). If you want to lock an upgrade_mutex in unique ownership mode, use unique_lock<upgrade_mutex>. If you want to lock an upgrade_mutex in shared ownership mode, use shared_lock<upgrade_mutex>. At the end of the day, an upgrade_mutex is a mutex. And it also is a shared_mutex. It just also happens to be a little bit more.

Also recall that the reason for upgrade_mutex to exist is to facilitate conversions among the various ownership modes. The locks (unique_lock, shared_lock, upgrade_lock) represent the ownership modes. One could use upgrade_mutex without locks and manually convert among the ownership modes. But this is tedious and error prone (especially within the context of exceptions). The locks exist to homogenize the syntax for locking and unlocking various types of mutexes, and for converting among the different types of ownership with a uniform syntax. This not only makes the upgrade_mutex easier to use, and enables generic locking algorithms (such as std::lock), it also enables generic lock conversion algorithms (such as std::tr2::transfer_lock).

If this all sounds complicated, that is only because I am a poor writer. The key drivers here are: learn one lock, and you now know them all. Learn how to convert between two locks, and you now know how to convert between any two locks. The upgrade_mutex interface is complicated. The locks greatly simplify dealing with this interface. Example code follows:

typedef std::tr2::upgrade_mutex       Mutex;
typedef std::tr2::shared_lock<Mutex>  ReadLock;
typedef std::tr2::upgrade_lock<Mutex> UpgradeLock;
typedef std::unique_lock<Mutex>       WriteLock;

Mutex mut;

void reader()
{
    ReadLock read_lock(mut);
    // mut is now share-locked
    // ...
}   // mut is now unlocked

void writer()
{
    WriteLock write_lock(mut);
    // mut is now unique-locked
    // ...
}   // mut is now unlocked

void reader_writer()
{
    UpgradeLock read_lock(mut);
    // mut is now share-locked, but with privilege to upgrade
    // ...
    WriteLock write_lock(std::move(read_lock));
    // mut is now unique-locked
    // ...
}   // mut is now unlocked

void reader_writer_reader()
{
    UpgradeLock upgrade_lock(mut);
    // mut is now share-locked, but with privilege to upgrade
    // ...
    WriteLock write_lock(std::move(read_lock));
    // mut is now unique-locked
    // ...
    upgrade_lock = std::move(write_lock);
    // mut is now share-locked, but with privilege to upgrade
    // ...
    ReadLock read_lock(std::move(upgrade_lock));
    // mut is now share-locked, without the privilege to upgrade
    // ...
}   // mut is now unlocked

The only clients I expect to have to deal with the upgrade_mutex interface directly are those wishing to write user-defined mutexes satisfying this interface. Everyone else can use the homogeneous interface of the locks demonstrated above. For those clients writing user-defined mutexes with the upgrade_mutex interface, they will not have to replicate the lock infrastructure. All of the standard-defined locks will work with their mutex exactly as they do with std::tr2::upgrade_mutex thanks to the standardized interface for std::tr2::upgrade_mutex.

transfer_lock Rationale

In the above description of upgrade_lock and converting ownership as a function progresses, no regard was given to if statements. That is, sometimes you might want to conditionally convert ownership for a portion of a function, and then revert that ownership conversion later in the function. And it all has to be exception safe. Here's a preliminary sketch:

typedef std::tr2::upgrade_mutex       Mutex;
typedef std::tr2::shared_lock<Mutex>  ReadLock;
typedef std::tr2::upgrade_lock<Mutex> UpgradeLock;
typedef std::unique_lock<Mutex>       WriteLock;

Mutex mut;

void foo()
{
    // Here I need shared ownership of mut
    // ...
    if (some_predicate())
    {
        // Here I need unique ownership of mut
        // ...
    }
    // Here I need shared ownership of mut
    // ...
    if (some_other_predicate())
    {
        // Here I need unique ownership of mut
        // ...
    }
    // Here I need shared ownership of mut, and will not need further upgradability
    // ...
}   // Here mut needs to be unlocked, no matter what

This is the job transfer_lock is designed for. It is a very simple lock adaptor. Its constructor takes a lvalue lock and transfers ownership (via the homogeneous lock ownership transfer interface) to another embedded lock. On destruction it transfers ownership back to the lock received in the constructor. This adaptor is only simple because of the homogenized lock conversion interface outlined earlier. transfer_lock is a prime example of generic lock conversion algorithms.

Here is how the above foo might be coded:

typedef std::tr2::upgrade_mutex       Mutex;
typedef std::tr2::shared_lock<Mutex>  ReadLock;
typedef std::tr2::upgrade_lock<Mutex> UpgradeLock;
typedef std::unique_lock<Mutex>       WriteLock;

Mutex mut;

void foo()
{
    UpgradeLock upgrade_lock(mut);
    // Here I need shared ownership of mut
    // ...
    if (some_predicate())
    {
        std::tr2::transfer_lock<WriteLock, UpgradeLock> _(upgrade_lock);
        // Here I need unique ownership of mut
        // ...
    }
    // Here I need shared ownership of mut
    // ...
    if (some_other_predicate())
    {
        std::tr2::transfer_lock<WriteLock, UpgradeLock> _(upgrade_lock);
        // Here I need unique ownership of mut
        // ...
    }
    ReadLock _(std::move(upgrade_lock));
    // Here I need shared ownership of mut, and will not need further upgradability
    // ...
}   // Here mut needs to be unlocked, no matter what

transfer_lock Implementation

The implementation of transfer_lock is presented just for the purpose of demonstrating how simple it is. And the only reason it is simple is because of the generic lock conversion syntax adopted by the locks.

template <class ToLock, class FromLock>
class transfer_lock
{
    ToLock to_lock_;
    FromLock& from_lock_;
public:
    typedef typename ToLock::mutex_type mutex_type;

    explicit transfer_lock(FromLock& fl) : to_lock_(std::move(fl)), from_lock_(fl) {}
    ~transfer_lock() {from_lock_ = FromLock(std::move(to_lock_));}

    transfer_lock(const transfer_lock&) = delete;
    transfer_lock& operator=(const transfer_lock&) = delete;

    void lock()     {to_lock_ = ToLock(std::move(from_lock_));}
    void try_lock() {to_lock_ = ToLock(std::move(from_lock_), std::try_to_lock);}
    void unlock()   {from_lock_ = FromLock(std::move(to_lock_));}

    bool owns_lock() const {return to_lock_.owns();}
    mutex_type* mutex() const {return to_lock_.mutex();}
};

Acknowledgements

This document is the result of years worth of study. Many people and organizations have contributed. I would like to especially acknowledge my former employer Freescale (formerly Motorola, formerly Metrowerks) for their support in the early stages of this work. And I would like to acknowledge Apple for their continued support of this work. Without this strong and continuous visionary support, this work would not be presented here for your consideration.