Daniel P. Berrangé <berrange@redhat.com> pointed out that the coroutine
pool size heuristic is very conservative. Instead of halving
max_map_count, he suggested reserving 5,000 mappings for non-coroutine
users based on observations of guests he has access to.
Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size")
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-id: 20240320181232.1464819-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
		
	
			
		
			
				
	
	
		
			402 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			402 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * QEMU coroutines
 | 
						|
 *
 | 
						|
 * Copyright IBM, Corp. 2011
 | 
						|
 *
 | 
						|
 * Authors:
 | 
						|
 *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
 | 
						|
 *  Kevin Wolf         <kwolf@redhat.com>
 | 
						|
 *
 | 
						|
 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
 | 
						|
 * See the COPYING.LIB file in the top-level directory.
 | 
						|
 *
 | 
						|
 */
 | 
						|
 | 
						|
#include "qemu/osdep.h"
 | 
						|
#include "trace.h"
 | 
						|
#include "qemu/thread.h"
 | 
						|
#include "qemu/atomic.h"
 | 
						|
#include "qemu/coroutine_int.h"
 | 
						|
#include "qemu/coroutine-tls.h"
 | 
						|
#include "qemu/cutils.h"
 | 
						|
#include "block/aio.h"
 | 
						|
 | 
						|
enum {
 | 
						|
    COROUTINE_POOL_BATCH_MAX_SIZE = 128,
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * Coroutine creation and deletion is expensive so a pool of unused coroutines
 | 
						|
 * is kept as a cache. When the pool has coroutines available, they are
 | 
						|
 * recycled instead of creating new ones from scratch. Coroutines are added to
 | 
						|
 * the pool upon termination.
 | 
						|
 *
 | 
						|
 * The pool is global but each thread maintains a small local pool to avoid
 | 
						|
 * global pool contention. Threads fetch and return batches of coroutines from
 | 
						|
 * the global pool to maintain their local pool. The local pool holds up to two
 | 
						|
 * batches whereas the maximum size of the global pool is controlled by the
 | 
						|
 * qemu_coroutine_inc_pool_size() API.
 | 
						|
 *
 | 
						|
 * .-----------------------------------.
 | 
						|
 * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
 | 
						|
 * `-----------------------------------'
 | 
						|
 *
 | 
						|
 * .-------------------.
 | 
						|
 * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
 | 
						|
 * `-------------------'
 | 
						|
 */
 | 
						|
typedef struct CoroutinePoolBatch {
 | 
						|
    /* Batches are kept in a list */
 | 
						|
    QSLIST_ENTRY(CoroutinePoolBatch) next;
 | 
						|
 | 
						|
    /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
 | 
						|
    QSLIST_HEAD(, Coroutine) list;
 | 
						|
    unsigned int size;
 | 
						|
} CoroutinePoolBatch;
 | 
						|
 | 
						|
typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
 | 
						|
 | 
						|
/* Host operating system limit on number of pooled coroutines */
 | 
						|
static unsigned int global_pool_hard_max_size;
 | 
						|
 | 
						|
static QemuMutex global_pool_lock; /* protects the following variables */
 | 
						|
static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
 | 
						|
static unsigned int global_pool_size;
 | 
						|
static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
 | 
						|
 | 
						|
QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
 | 
						|
QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
 | 
						|
 | 
						|
static CoroutinePoolBatch *coroutine_pool_batch_new(void)
 | 
						|
{
 | 
						|
    CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
 | 
						|
 | 
						|
    QSLIST_INIT(&batch->list);
 | 
						|
    batch->size = 0;
 | 
						|
    return batch;
 | 
						|
}
 | 
						|
 | 
						|
static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
 | 
						|
{
 | 
						|
    Coroutine *co;
 | 
						|
    Coroutine *tmp;
 | 
						|
 | 
						|
    QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
 | 
						|
        QSLIST_REMOVE_HEAD(&batch->list, pool_next);
 | 
						|
        qemu_coroutine_delete(co);
 | 
						|
    }
 | 
						|
    g_free(batch);
 | 
						|
}
 | 
						|
 | 
						|
static void local_pool_cleanup(Notifier *n, void *value)
 | 
						|
{
 | 
						|
    CoroutinePool *local_pool = get_ptr_local_pool();
 | 
						|
    CoroutinePoolBatch *batch;
 | 
						|
    CoroutinePoolBatch *tmp;
 | 
						|
 | 
						|
    QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
 | 
						|
        QSLIST_REMOVE_HEAD(local_pool, next);
 | 
						|
        coroutine_pool_batch_delete(batch);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* Ensure the atexit notifier is registered */
 | 
						|
static void local_pool_cleanup_init_once(void)
 | 
						|
{
 | 
						|
    Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
 | 
						|
    if (!notifier->notify) {
 | 
						|
        notifier->notify = local_pool_cleanup;
 | 
						|
        qemu_thread_atexit_add(notifier);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* Helper to get the next unused coroutine from the local pool */
 | 
						|
static Coroutine *coroutine_pool_get_local(void)
 | 
						|
{
 | 
						|
    CoroutinePool *local_pool = get_ptr_local_pool();
 | 
						|
    CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
 | 
						|
    Coroutine *co;
 | 
						|
 | 
						|
    if (unlikely(!batch)) {
 | 
						|
        return NULL;
 | 
						|
    }
 | 
						|
 | 
						|
    co = QSLIST_FIRST(&batch->list);
 | 
						|
    QSLIST_REMOVE_HEAD(&batch->list, pool_next);
 | 
						|
    batch->size--;
 | 
						|
 | 
						|
    if (batch->size == 0) {
 | 
						|
        QSLIST_REMOVE_HEAD(local_pool, next);
 | 
						|
        coroutine_pool_batch_delete(batch);
 | 
						|
    }
 | 
						|
    return co;
 | 
						|
}
 | 
						|
 | 
						|
/* Get the next batch from the global pool */
 | 
						|
static void coroutine_pool_refill_local(void)
 | 
						|
{
 | 
						|
    CoroutinePool *local_pool = get_ptr_local_pool();
 | 
						|
    CoroutinePoolBatch *batch;
 | 
						|
 | 
						|
    WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
 | 
						|
        batch = QSLIST_FIRST(&global_pool);
 | 
						|
 | 
						|
        if (batch) {
 | 
						|
            QSLIST_REMOVE_HEAD(&global_pool, next);
 | 
						|
            global_pool_size -= batch->size;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if (batch) {
 | 
						|
        QSLIST_INSERT_HEAD(local_pool, batch, next);
 | 
						|
        local_pool_cleanup_init_once();
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* Add a batch of coroutines to the global pool */
 | 
						|
static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
 | 
						|
{
 | 
						|
    WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
 | 
						|
        unsigned int max = MIN(global_pool_max_size,
 | 
						|
                               global_pool_hard_max_size);
 | 
						|
 | 
						|
        if (global_pool_size < max) {
 | 
						|
            QSLIST_INSERT_HEAD(&global_pool, batch, next);
 | 
						|
 | 
						|
            /* Overshooting the max pool size is allowed */
 | 
						|
            global_pool_size += batch->size;
 | 
						|
            return;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /* The global pool was full, so throw away this batch */
 | 
						|
    coroutine_pool_batch_delete(batch);
 | 
						|
}
 | 
						|
 | 
						|
/* Get the next unused coroutine from the pool or return NULL */
 | 
						|
static Coroutine *coroutine_pool_get(void)
 | 
						|
{
 | 
						|
    Coroutine *co;
 | 
						|
 | 
						|
    co = coroutine_pool_get_local();
 | 
						|
    if (!co) {
 | 
						|
        coroutine_pool_refill_local();
 | 
						|
        co = coroutine_pool_get_local();
 | 
						|
    }
 | 
						|
    return co;
 | 
						|
}
 | 
						|
 | 
						|
static void coroutine_pool_put(Coroutine *co)
 | 
						|
{
 | 
						|
    CoroutinePool *local_pool = get_ptr_local_pool();
 | 
						|
    CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
 | 
						|
 | 
						|
    if (unlikely(!batch)) {
 | 
						|
        batch = coroutine_pool_batch_new();
 | 
						|
        QSLIST_INSERT_HEAD(local_pool, batch, next);
 | 
						|
        local_pool_cleanup_init_once();
 | 
						|
    }
 | 
						|
 | 
						|
    if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
 | 
						|
        CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
 | 
						|
 | 
						|
        /* Is the local pool full? */
 | 
						|
        if (next) {
 | 
						|
            QSLIST_REMOVE_HEAD(local_pool, next);
 | 
						|
            coroutine_pool_put_global(batch);
 | 
						|
        }
 | 
						|
 | 
						|
        batch = coroutine_pool_batch_new();
 | 
						|
        QSLIST_INSERT_HEAD(local_pool, batch, next);
 | 
						|
    }
 | 
						|
 | 
						|
    QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
 | 
						|
    batch->size++;
 | 
						|
}
 | 
						|
 | 
						|
Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
 | 
						|
{
 | 
						|
    Coroutine *co = NULL;
 | 
						|
 | 
						|
    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
 | 
						|
        co = coroutine_pool_get();
 | 
						|
    }
 | 
						|
 | 
						|
    if (!co) {
 | 
						|
        co = qemu_coroutine_new();
 | 
						|
    }
 | 
						|
 | 
						|
    co->entry = entry;
 | 
						|
    co->entry_arg = opaque;
 | 
						|
    QSIMPLEQ_INIT(&co->co_queue_wakeup);
 | 
						|
    return co;
 | 
						|
}
 | 
						|
 | 
						|
static void coroutine_delete(Coroutine *co)
 | 
						|
{
 | 
						|
    co->caller = NULL;
 | 
						|
 | 
						|
    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
 | 
						|
        coroutine_pool_put(co);
 | 
						|
    } else {
 | 
						|
        qemu_coroutine_delete(co);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
 | 
						|
{
 | 
						|
    QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
 | 
						|
    Coroutine *from = qemu_coroutine_self();
 | 
						|
 | 
						|
    QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
 | 
						|
 | 
						|
    /* Run co and any queued coroutines */
 | 
						|
    while (!QSIMPLEQ_EMPTY(&pending)) {
 | 
						|
        Coroutine *to = QSIMPLEQ_FIRST(&pending);
 | 
						|
        CoroutineAction ret;
 | 
						|
 | 
						|
        /*
 | 
						|
         * Read to before to->scheduled; pairs with qatomic_cmpxchg in
 | 
						|
         * qemu_co_sleep(), aio_co_schedule() etc.
 | 
						|
         */
 | 
						|
        smp_read_barrier_depends();
 | 
						|
 | 
						|
        const char *scheduled = qatomic_read(&to->scheduled);
 | 
						|
 | 
						|
        QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
 | 
						|
 | 
						|
        trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
 | 
						|
 | 
						|
        /* if the Coroutine has already been scheduled, entering it again will
 | 
						|
         * cause us to enter it twice, potentially even after the coroutine has
 | 
						|
         * been deleted */
 | 
						|
        if (scheduled) {
 | 
						|
            fprintf(stderr,
 | 
						|
                    "%s: Co-routine was already scheduled in '%s'\n",
 | 
						|
                    __func__, scheduled);
 | 
						|
            abort();
 | 
						|
        }
 | 
						|
 | 
						|
        if (to->caller) {
 | 
						|
            fprintf(stderr, "Co-routine re-entered recursively\n");
 | 
						|
            abort();
 | 
						|
        }
 | 
						|
 | 
						|
        to->caller = from;
 | 
						|
        to->ctx = ctx;
 | 
						|
 | 
						|
        /* Store to->ctx before anything that stores to.  Matches
 | 
						|
         * barrier in aio_co_wake and qemu_co_mutex_wake.
 | 
						|
         */
 | 
						|
        smp_wmb();
 | 
						|
 | 
						|
        ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
 | 
						|
 | 
						|
        /* Queued coroutines are run depth-first; previously pending coroutines
 | 
						|
         * run after those queued more recently.
 | 
						|
         */
 | 
						|
        QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
 | 
						|
 | 
						|
        switch (ret) {
 | 
						|
        case COROUTINE_YIELD:
 | 
						|
            break;
 | 
						|
        case COROUTINE_TERMINATE:
 | 
						|
            assert(!to->locks_held);
 | 
						|
            trace_qemu_coroutine_terminate(to);
 | 
						|
            coroutine_delete(to);
 | 
						|
            break;
 | 
						|
        default:
 | 
						|
            abort();
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void qemu_coroutine_enter(Coroutine *co)
 | 
						|
{
 | 
						|
    qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
 | 
						|
}
 | 
						|
 | 
						|
void qemu_coroutine_enter_if_inactive(Coroutine *co)
 | 
						|
{
 | 
						|
    if (!qemu_coroutine_entered(co)) {
 | 
						|
        qemu_coroutine_enter(co);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void coroutine_fn qemu_coroutine_yield(void)
 | 
						|
{
 | 
						|
    Coroutine *self = qemu_coroutine_self();
 | 
						|
    Coroutine *to = self->caller;
 | 
						|
 | 
						|
    trace_qemu_coroutine_yield(self, to);
 | 
						|
 | 
						|
    if (!to) {
 | 
						|
        fprintf(stderr, "Co-routine is yielding to no one\n");
 | 
						|
        abort();
 | 
						|
    }
 | 
						|
 | 
						|
    self->caller = NULL;
 | 
						|
    qemu_coroutine_switch(self, to, COROUTINE_YIELD);
 | 
						|
}
 | 
						|
 | 
						|
bool qemu_coroutine_entered(Coroutine *co)
 | 
						|
{
 | 
						|
    return co->caller;
 | 
						|
}
 | 
						|
 | 
						|
AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
 | 
						|
{
 | 
						|
    return co->ctx;
 | 
						|
}
 | 
						|
 | 
						|
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
 | 
						|
{
 | 
						|
    QEMU_LOCK_GUARD(&global_pool_lock);
 | 
						|
    global_pool_max_size += additional_pool_size;
 | 
						|
}
 | 
						|
 | 
						|
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
 | 
						|
{
 | 
						|
    QEMU_LOCK_GUARD(&global_pool_lock);
 | 
						|
    global_pool_max_size -= removing_pool_size;
 | 
						|
}
 | 
						|
 | 
						|
static unsigned int get_global_pool_hard_max_size(void)
 | 
						|
{
 | 
						|
#ifdef __linux__
 | 
						|
    g_autofree char *contents = NULL;
 | 
						|
    int max_map_count;
 | 
						|
 | 
						|
    /*
 | 
						|
     * Linux processes can have up to max_map_count virtual memory areas
 | 
						|
     * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
 | 
						|
     * must limit the coroutine pool to a safe size to avoid running out of
 | 
						|
     * VMAs.
 | 
						|
     */
 | 
						|
    if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
 | 
						|
                            NULL) &&
 | 
						|
        qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
 | 
						|
        /*
 | 
						|
         * This is an upper bound that avoids exceeding max_map_count. Leave a
 | 
						|
         * fixed amount for non-coroutine users like library dependencies,
 | 
						|
         * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
 | 
						|
         * remaining amount.
 | 
						|
         */
 | 
						|
        if (max_map_count > 5000) {
 | 
						|
            return (max_map_count - 5000) / 2;
 | 
						|
        } else {
 | 
						|
            /* Disable the global pool but threads still have local pools */
 | 
						|
            return 0;
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif
 | 
						|
 | 
						|
    return UINT_MAX;
 | 
						|
}
 | 
						|
 | 
						|
static void __attribute__((constructor)) qemu_coroutine_init(void)
 | 
						|
{
 | 
						|
    qemu_mutex_init(&global_pool_lock);
 | 
						|
    global_pool_hard_max_size = get_global_pool_hard_max_size();
 | 
						|
}
 |