diff --git a/MAINTAINERS b/MAINTAINERS index 4b025a7b63..89f274f85e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2650,6 +2650,7 @@ F: util/aio-*.c F: util/aio-*.h F: util/fdmon-*.c F: block/io.c +F: block/plug.c F: migration/block* F: include/block/aio.h F: include/block/aio-wait.h diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 4a7db82a71..0289a96569 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -175,7 +175,6 @@ struct tb_desc { tb_page_addr_t page_addr0; uint32_t flags; uint32_t cflags; - uint32_t trace_vcpu_dstate; }; static bool tb_lookup_cmp(const void *p, const void *d) @@ -187,7 +186,6 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb_page_addr0(tb) == desc->page_addr0 && tb->cs_base == desc->cs_base && tb->flags == desc->flags && - tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && tb_cflags(tb) == desc->cflags) { /* check next page if needed */ tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb); @@ -228,7 +226,6 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.cs_base = cs_base; desc.flags = flags; desc.cflags = cflags; - desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; phys_pc = get_page_addr_code(desc.env, pc); if (phys_pc == -1) { @@ -236,7 +233,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, } desc.page_addr0 = phys_pc; h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc), - flags, cflags, *cpu->trace_dstate); + flags, cs_base, cflags); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -263,7 +260,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, jc->array[hash].pc == pc && tb->cs_base == cs_base && tb->flags == flags && - tb->trace_vcpu_dstate == *cpu->trace_dstate && tb_cflags(tb) == cflags)) { return tb; } @@ -282,7 +278,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags && - tb->trace_vcpu_dstate == *cpu->trace_dstate && tb_cflags(tb) == cflags)) { return tb; } diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index 83dc610e4c..2ba2193731 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -61,10 +61,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) #endif /* CONFIG_SOFTMMU */ static inline -uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, - uint32_t cf_mask, uint32_t trace_vcpu_dstate) +uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, + uint32_t flags, uint64_t flags2, uint32_t cf_mask) { - return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); + return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask); } #endif diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 991746f80f..892eecda2d 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -50,7 +50,6 @@ static bool tb_cmp(const void *ap, const void *bp) a->cs_base == b->cs_base && a->flags == b->flags && (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && - a->trace_vcpu_dstate == b->trace_vcpu_dstate && tb_page_addr0(a) == tb_page_addr0(b) && tb_page_addr1(a) == tb_page_addr1(b)); } @@ -888,7 +887,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb_page_addr0(tb); h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc), - tb->flags, orig_cflags, tb->trace_vcpu_dstate); + tb->flags, tb->cs_base, orig_cflags); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; } @@ -969,7 +968,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, /* add in the hash table */ h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc), - tb->flags, tb->cflags, tb->trace_vcpu_dstate); + tb->flags, tb->cs_base, tb->cflags); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); /* remove TB from the page(s) if we couldn't insert it */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index e4e2195a8c..0e52fa1126 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -657,11 +657,6 @@ void libafl_add_backdoor_hook(void (*exec)(target_ulong id, uint64_t data), //// --- End LibAFL code --- -/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ -QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > - sizeof_field(TranslationBlock, trace_vcpu_dstate) - * BITS_PER_BYTE); - TBContext tb_ctx; /* @@ -1005,7 +1000,6 @@ TranslationBlock *libafl_gen_edge(CPUState *cpu, target_ulong src_block, tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; - tb->trace_vcpu_dstate = *cpu->trace_dstate; //tb_set_page_addr0(tb, phys_pc); //tb_set_page_addr1(tb, -1); tcg_ctx->gen_tb = tb; @@ -1148,7 +1142,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; - tb->trace_vcpu_dstate = *cpu->trace_dstate; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); tcg_ctx->gen_tb = tb; diff --git a/block/blkio.c b/block/blkio.c index 72117fa005..527323d625 100644 --- a/block/blkio.c +++ b/block/blkio.c @@ -17,6 +17,7 @@ #include "qemu/error-report.h" #include "qapi/qmp/qdict.h" #include "qemu/module.h" +#include "sysemu/block-backend.h" #include "exec/memory.h" /* for ram_block_discard_disable() */ #include "block/block-io.h" @@ -320,16 +321,30 @@ static void blkio_detach_aio_context(BlockDriverState *bs) NULL, NULL, NULL); } -/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */ -static void blkio_submit_io(BlockDriverState *bs) +/* + * Called by blk_io_unplug() or immediately if not plugged. Called without + * blkio_lock. + */ +static void blkio_unplug_fn(void *opaque) { - if (qatomic_read(&bs->io_plugged) == 0) { - BDRVBlkioState *s = bs->opaque; + BDRVBlkioState *s = opaque; + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_do_io(s->blkioq, NULL, 0, 0, NULL); } } +/* + * Schedule I/O submission after enqueuing a new request. Called without + * blkio_lock. + */ +static void blkio_submit_io(BlockDriverState *bs) +{ + BDRVBlkioState *s = bs->opaque; + + blk_io_plug_call(blkio_unplug_fn, s); +} + static int coroutine_fn blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { @@ -340,9 +355,9 @@ blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_discard(s->blkioq, offset, bytes, &cod, 0); - blkio_submit_io(bs); } + blkio_submit_io(bs); qemu_coroutine_yield(); return cod.ret; } @@ -373,9 +388,9 @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0); - blkio_submit_io(bs); } + blkio_submit_io(bs); qemu_coroutine_yield(); if (use_bounce_buffer) { @@ -418,9 +433,9 @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset, WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags); - blkio_submit_io(bs); } + blkio_submit_io(bs); qemu_coroutine_yield(); if (use_bounce_buffer) { @@ -439,9 +454,9 @@ static int coroutine_fn blkio_co_flush(BlockDriverState *bs) WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_flush(s->blkioq, &cod, 0); - blkio_submit_io(bs); } + blkio_submit_io(bs); qemu_coroutine_yield(); return cod.ret; } @@ -467,22 +482,13 @@ static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs, WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags); - blkio_submit_io(bs); } + blkio_submit_io(bs); qemu_coroutine_yield(); return cod.ret; } -static void coroutine_fn blkio_co_io_unplug(BlockDriverState *bs) -{ - BDRVBlkioState *s = bs->opaque; - - WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { - blkio_submit_io(bs); - } -} - typedef enum { BMRR_OK, BMRR_SKIP, @@ -667,25 +673,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, { const char *path = qdict_get_try_str(options, "path"); BDRVBlkioState *s = bs->opaque; - int ret; + bool fd_supported = false; + int fd, ret; if (!path) { error_setg(errp, "missing 'path' option"); return -EINVAL; } - ret = blkio_set_str(s->blkio, "path", path); - qdict_del(options, "path"); - if (ret < 0) { - error_setg_errno(errp, -ret, "failed to set path: %s", - blkio_get_error_msg()); - return ret; - } - if (!(flags & BDRV_O_NOCACHE)) { error_setg(errp, "cache.direct=off is not supported"); return -EINVAL; } + + if (blkio_get_int(s->blkio, "fd", &fd) == 0) { + fd_supported = true; + } + + /* + * If the libblkio driver supports fd passing, let's always use qemu_open() + * to open the `path`, so we can handle fd passing from the management + * layer through the "/dev/fdset/N" special path. + */ + if (fd_supported) { + int open_flags; + + if (flags & BDRV_O_RDWR) { + open_flags = O_RDWR; + } else { + open_flags = O_RDONLY; + } + + fd = qemu_open(path, open_flags, errp); + if (fd < 0) { + return -EINVAL; + } + + ret = blkio_set_int(s->blkio, "fd", fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set fd: %s", + blkio_get_error_msg()); + qemu_close(fd); + return ret; + } + } else { + ret = blkio_set_str(s->blkio, "path", path); + if (ret < 0) { + error_setg_errno(errp, -ret, "failed to set path: %s", + blkio_get_error_msg()); + return ret; + } + } + + qdict_del(options, "path"); + return 0; } @@ -1004,7 +1045,6 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) .bdrv_co_pwritev = blkio_co_pwritev, \ .bdrv_co_flush_to_disk = blkio_co_flush, \ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ - .bdrv_co_io_unplug = blkio_co_io_unplug, \ .bdrv_refresh_limits = blkio_refresh_limits, \ .bdrv_register_buf = blkio_register_buf, \ .bdrv_unregister_buf = blkio_unregister_buf, \ diff --git a/block/block-backend.c b/block/block-backend.c index 241f643507..4009ed5fed 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2582,28 +2582,6 @@ void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) notifier_list_add(&blk->insert_bs_notifiers, notify); } -void coroutine_fn blk_co_io_plug(BlockBackend *blk) -{ - BlockDriverState *bs = blk_bs(blk); - IO_CODE(); - GRAPH_RDLOCK_GUARD(); - - if (bs) { - bdrv_co_io_plug(bs); - } -} - -void coroutine_fn blk_co_io_unplug(BlockBackend *blk) -{ - BlockDriverState *bs = blk_bs(blk); - IO_CODE(); - GRAPH_RDLOCK_GUARD(); - - if (bs) { - bdrv_co_io_unplug(bs); - } -} - BlockAcctStats *blk_get_stats(BlockBackend *blk) { IO_CODE(); diff --git a/block/file-posix.c b/block/file-posix.c index 0ab158efba..ac1ed54811 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2550,36 +2550,6 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); } -static void coroutine_fn raw_co_io_plug(BlockDriverState *bs) -{ - BDRVRawState __attribute__((unused)) *s = bs->opaque; -#ifdef CONFIG_LINUX_AIO - if (s->use_linux_aio) { - laio_io_plug(); - } -#endif -#ifdef CONFIG_LINUX_IO_URING - if (s->use_linux_io_uring) { - luring_io_plug(); - } -#endif -} - -static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) -{ - BDRVRawState __attribute__((unused)) *s = bs->opaque; -#ifdef CONFIG_LINUX_AIO - if (s->use_linux_aio) { - laio_io_unplug(s->aio_max_batch); - } -#endif -#ifdef CONFIG_LINUX_IO_URING - if (s->use_linux_io_uring) { - luring_io_unplug(); - } -#endif -} - static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -3924,8 +3894,6 @@ BlockDriver bdrv_file = { .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, - .bdrv_co_io_plug = raw_co_io_plug, - .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, @@ -4296,8 +4264,6 @@ static BlockDriver bdrv_host_device = { .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, - .bdrv_co_io_plug = raw_co_io_plug, - .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, @@ -4434,8 +4400,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_pwritev = raw_co_pwritev, .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = cdrom_refresh_limits, - .bdrv_co_io_plug = raw_co_io_plug, - .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, @@ -4562,8 +4526,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_pwritev = raw_co_pwritev, .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = cdrom_refresh_limits, - .bdrv_co_io_plug = raw_co_io_plug, - .bdrv_co_io_unplug = raw_co_io_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, diff --git a/block/io.c b/block/io.c index 540bf8d26d..f2dfc7c405 100644 --- a/block/io.c +++ b/block/io.c @@ -3223,43 +3223,6 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) return mem; } -void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs) -{ - BdrvChild *child; - IO_CODE(); - assert_bdrv_graph_readable(); - - QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_io_plug(child->bs); - } - - if (qatomic_fetch_inc(&bs->io_plugged) == 0) { - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_co_io_plug) { - drv->bdrv_co_io_plug(bs); - } - } -} - -void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs) -{ - BdrvChild *child; - IO_CODE(); - assert_bdrv_graph_readable(); - - assert(bs->io_plugged); - if (qatomic_fetch_dec(&bs->io_plugged) == 1) { - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_co_io_unplug) { - drv->bdrv_co_io_unplug(bs); - } - } - - QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_io_unplug(child->bs); - } -} - /* Helper that undoes bdrv_register_buf() when it fails partway through */ static void GRAPH_RDLOCK bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size, diff --git a/block/io_uring.c b/block/io_uring.c index 3a77480e16..69d9820928 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -16,6 +16,7 @@ #include "block/raw-aio.h" #include "qemu/coroutine.h" #include "qapi/error.h" +#include "sysemu/block-backend.h" #include "trace.h" /* Only used for assertions. */ @@ -41,7 +42,6 @@ typedef struct LuringAIOCB { } LuringAIOCB; typedef struct LuringQueue { - int plugged; unsigned int in_queue; unsigned int in_flight; bool blocked; @@ -267,7 +267,7 @@ static void luring_process_completions_and_submit(LuringState *s) { luring_process_completions(s); - if (!s->io_q.plugged && s->io_q.in_queue > 0) { + if (s->io_q.in_queue > 0) { ioq_submit(s); } } @@ -301,29 +301,17 @@ static void qemu_luring_poll_ready(void *opaque) static void ioq_init(LuringQueue *io_q) { QSIMPLEQ_INIT(&io_q->submit_queue); - io_q->plugged = 0; io_q->in_queue = 0; io_q->in_flight = 0; io_q->blocked = false; } -void luring_io_plug(void) +static void luring_unplug_fn(void *opaque) { - AioContext *ctx = qemu_get_current_aio_context(); - LuringState *s = aio_get_linux_io_uring(ctx); - trace_luring_io_plug(s); - s->io_q.plugged++; -} - -void luring_io_unplug(void) -{ - AioContext *ctx = qemu_get_current_aio_context(); - LuringState *s = aio_get_linux_io_uring(ctx); - assert(s->io_q.plugged); - trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged, - s->io_q.in_queue, s->io_q.in_flight); - if (--s->io_q.plugged == 0 && - !s->io_q.blocked && s->io_q.in_queue > 0) { + LuringState *s = opaque; + trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue, + s->io_q.in_flight); + if (!s->io_q.blocked && s->io_q.in_queue > 0) { ioq_submit(s); } } @@ -370,14 +358,16 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); s->io_q.in_queue++; - trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged, - s->io_q.in_queue, s->io_q.in_flight); - if (!s->io_q.blocked && - (!s->io_q.plugged || - s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) { - ret = ioq_submit(s); - trace_luring_do_submit_done(s, ret); - return ret; + trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue, + s->io_q.in_flight); + if (!s->io_q.blocked) { + if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) { + ret = ioq_submit(s); + trace_luring_do_submit_done(s, ret); + return ret; + } + + blk_io_plug_call(luring_unplug_fn, s); } return 0; } diff --git a/block/linux-aio.c b/block/linux-aio.c index 916f001e32..561c71a9ae 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -15,6 +15,7 @@ #include "qemu/event_notifier.h" #include "qemu/coroutine.h" #include "qapi/error.h" +#include "sysemu/block-backend.h" /* Only used for assertions. */ #include "qemu/coroutine_int.h" @@ -46,7 +47,6 @@ struct qemu_laiocb { }; typedef struct { - int plugged; unsigned int in_queue; unsigned int in_flight; bool blocked; @@ -236,7 +236,7 @@ static void qemu_laio_process_completions_and_submit(LinuxAioState *s) { qemu_laio_process_completions(s); - if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { + if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) { ioq_submit(s); } } @@ -277,7 +277,6 @@ static void qemu_laio_poll_ready(EventNotifier *opaque) static void ioq_init(LaioQueue *io_q) { QSIMPLEQ_INIT(&io_q->pending); - io_q->plugged = 0; io_q->in_queue = 0; io_q->in_flight = 0; io_q->blocked = false; @@ -354,31 +353,11 @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch) return max_batch; } -void laio_io_plug(void) +static void laio_unplug_fn(void *opaque) { - AioContext *ctx = qemu_get_current_aio_context(); - LinuxAioState *s = aio_get_linux_aio(ctx); + LinuxAioState *s = opaque; - s->io_q.plugged++; -} - -void laio_io_unplug(uint64_t dev_max_batch) -{ - AioContext *ctx = qemu_get_current_aio_context(); - LinuxAioState *s = aio_get_linux_aio(ctx); - - assert(s->io_q.plugged); - s->io_q.plugged--; - - /* - * Why max batch checking is performed here: - * Another BDS may have queued requests with a higher dev_max_batch and - * therefore in_queue could now exceed our dev_max_batch. Re-check the max - * batch so we can honor our device's dev_max_batch. - */ - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || - (!s->io_q.plugged && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { ioq_submit(s); } } @@ -410,10 +389,12 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); s->io_q.in_queue++; - if (!s->io_q.blocked && - (!s->io_q.plugged || - s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) { - ioq_submit(s); + if (!s->io_q.blocked) { + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) { + ioq_submit(s); + } else { + blk_io_plug_call(laio_unplug_fn, s); + } } return 0; diff --git a/block/meson.build b/block/meson.build index 486dda8b85..fb4332bd66 100644 --- a/block/meson.build +++ b/block/meson.build @@ -23,6 +23,7 @@ block_ss.add(files( 'mirror.c', 'nbd.c', 'null.c', + 'plug.c', 'qapi.c', 'qcow2-bitmap.c', 'qcow2-cache.c', diff --git a/block/nvme.c b/block/nvme.c index 17937d398d..7ca85bc44a 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -25,6 +25,7 @@ #include "qemu/vfio-helpers.h" #include "block/block-io.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "sysemu/replay.h" #include "trace.h" @@ -119,7 +120,6 @@ struct BDRVNVMeState { int blkshift; uint64_t max_transfer; - bool plugged; bool supports_write_zeroes; bool supports_discard; @@ -282,7 +282,7 @@ static void nvme_kick(NVMeQueuePair *q) { BDRVNVMeState *s = q->s; - if (s->plugged || !q->need_kick) { + if (!q->need_kick) { return; } trace_nvme_kick(s, q->index); @@ -387,10 +387,6 @@ static bool nvme_process_completion(NVMeQueuePair *q) NvmeCqe *c; trace_nvme_process_completion(s, q->index, q->inflight); - if (s->plugged) { - trace_nvme_process_completion_queue_plugged(s, q->index); - return false; - } /* * Support re-entrancy when a request cb() function invokes aio_poll(). @@ -480,6 +476,15 @@ static void nvme_trace_command(const NvmeCmd *cmd) } } +static void nvme_unplug_fn(void *opaque) +{ + NVMeQueuePair *q = opaque; + + QEMU_LOCK_GUARD(&q->lock); + nvme_kick(q); + nvme_process_completion(q); +} + static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, NvmeCmd *cmd, BlockCompletionFunc cb, void *opaque) @@ -496,8 +501,7 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd)); q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE; q->need_kick++; - nvme_kick(q); - nvme_process_completion(q); + blk_io_plug_call(nvme_unplug_fn, q); qemu_mutex_unlock(&q->lock); } @@ -1567,27 +1571,6 @@ static void nvme_attach_aio_context(BlockDriverState *bs, } } -static void coroutine_fn nvme_co_io_plug(BlockDriverState *bs) -{ - BDRVNVMeState *s = bs->opaque; - assert(!s->plugged); - s->plugged = true; -} - -static void coroutine_fn nvme_co_io_unplug(BlockDriverState *bs) -{ - BDRVNVMeState *s = bs->opaque; - assert(s->plugged); - s->plugged = false; - for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) { - NVMeQueuePair *q = s->queues[i]; - qemu_mutex_lock(&q->lock); - nvme_kick(q); - nvme_process_completion(q); - qemu_mutex_unlock(&q->lock); - } -} - static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size, Error **errp) { @@ -1664,9 +1647,6 @@ static BlockDriver bdrv_nvme = { .bdrv_detach_aio_context = nvme_detach_aio_context, .bdrv_attach_aio_context = nvme_attach_aio_context, - .bdrv_co_io_plug = nvme_co_io_plug, - .bdrv_co_io_unplug = nvme_co_io_unplug, - .bdrv_register_buf = nvme_register_buf, .bdrv_unregister_buf = nvme_unregister_buf, }; diff --git a/block/plug.c b/block/plug.c new file mode 100644 index 0000000000..98a155d2f4 --- /dev/null +++ b/block/plug.c @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Block I/O plugging + * + * Copyright Red Hat. + * + * This API defers a function call within a blk_io_plug()/blk_io_unplug() + * section, allowing multiple calls to batch up. This is a performance + * optimization that is used in the block layer to submit several I/O requests + * at once instead of individually: + * + * blk_io_plug(); <-- start of plugged region + * ... + * blk_io_plug_call(my_func, my_obj); <-- deferred my_func(my_obj) call + * blk_io_plug_call(my_func, my_obj); <-- another + * blk_io_plug_call(my_func, my_obj); <-- another + * ... + * blk_io_unplug(); <-- end of plugged region, my_func(my_obj) is called once + * + * This code is actually generic and not tied to the block layer. If another + * subsystem needs this functionality, it could be renamed. + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine-tls.h" +#include "qemu/notify.h" +#include "qemu/thread.h" +#include "sysemu/block-backend.h" + +/* A function call that has been deferred until unplug() */ +typedef struct { + void (*fn)(void *); + void *opaque; +} UnplugFn; + +/* Per-thread state */ +typedef struct { + unsigned count; /* how many times has plug() been called? */ + GArray *unplug_fns; /* functions to call at unplug time */ +} Plug; + +/* Use get_ptr_plug() to fetch this thread-local value */ +QEMU_DEFINE_STATIC_CO_TLS(Plug, plug); + +/* Called at thread cleanup time */ +static void blk_io_plug_atexit(Notifier *n, void *value) +{ + Plug *plug = get_ptr_plug(); + g_array_free(plug->unplug_fns, TRUE); +} + +/* This won't involve coroutines, so use __thread */ +static __thread Notifier blk_io_plug_atexit_notifier; + +/** + * blk_io_plug_call: + * @fn: a function pointer to be invoked + * @opaque: a user-defined argument to @fn() + * + * Call @fn(@opaque) immediately if not within a blk_io_plug()/blk_io_unplug() + * section. + * + * Otherwise defer the call until the end of the outermost + * blk_io_plug()/blk_io_unplug() section in this thread. If the same + * @fn/@opaque pair has already been deferred, it will only be called once upon + * blk_io_unplug() so that accumulated calls are batched into a single call. + * + * The caller must ensure that @opaque is not freed before @fn() is invoked. + */ +void blk_io_plug_call(void (*fn)(void *), void *opaque) +{ + Plug *plug = get_ptr_plug(); + + /* Call immediately if we're not plugged */ + if (plug->count == 0) { + fn(opaque); + return; + } + + GArray *array = plug->unplug_fns; + if (!array) { + array = g_array_new(FALSE, FALSE, sizeof(UnplugFn)); + plug->unplug_fns = array; + blk_io_plug_atexit_notifier.notify = blk_io_plug_atexit; + qemu_thread_atexit_add(&blk_io_plug_atexit_notifier); + } + + UnplugFn *fns = (UnplugFn *)array->data; + UnplugFn new_fn = { + .fn = fn, + .opaque = opaque, + }; + + /* + * There won't be many, so do a linear search. If this becomes a bottleneck + * then a binary search (glib 2.62+) or different data structure could be + * used. + */ + for (guint i = 0; i < array->len; i++) { + if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) { + return; /* already exists */ + } + } + + g_array_append_val(array, new_fn); +} + +/** + * blk_io_plug: Defer blk_io_plug_call() functions until blk_io_unplug() + * + * blk_io_plug/unplug are thread-local operations. This means that multiple + * threads can simultaneously call plug/unplug, but the caller must ensure that + * each unplug() is called in the same thread of the matching plug(). + * + * Nesting is supported. blk_io_plug_call() functions are only called at the + * outermost blk_io_unplug(). + */ +void blk_io_plug(void) +{ + Plug *plug = get_ptr_plug(); + + assert(plug->count < UINT32_MAX); + + plug->count++; +} + +/** + * blk_io_unplug: Run any pending blk_io_plug_call() functions + * + * There must have been a matching blk_io_plug() call in the same thread prior + * to this blk_io_unplug() call. + */ +void blk_io_unplug(void) +{ + Plug *plug = get_ptr_plug(); + + assert(plug->count > 0); + + if (--plug->count > 0) { + return; + } + + GArray *array = plug->unplug_fns; + if (!array) { + return; + } + + UnplugFn *fns = (UnplugFn *)array->data; + + for (guint i = 0; i < array->len; i++) { + fns[i].fn(fns[i].opaque); + } + + /* + * This resets the array without freeing memory so that appending is cheap + * in the future. + */ + g_array_set_size(array, 0); +} diff --git a/block/trace-events b/block/trace-events index 32665158d6..6f121b7636 100644 --- a/block/trace-events +++ b/block/trace-events @@ -64,9 +64,8 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) " # io_uring.c luring_init_state(void *s, size_t size) "s %p size %zu" luring_cleanup_state(void *s) "%p freed" -luring_io_plug(void *s) "LuringState %p plug" -luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" -luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" +luring_unplug_fn(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d" +luring_do_submit(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d" luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d" luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d" luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d" @@ -141,7 +140,6 @@ nvme_kick(void *s, unsigned q_index) "s %p q #%u" nvme_dma_flush_queue_wait(void *s) "s %p" nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d" -nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u" nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index c8f998ecec..de36c4b71c 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -528,10 +528,8 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1, abi_long arg5, abi_long arg6, abi_long arg7, abi_long arg8) { - CPUState *cpu = env_cpu(cpu_env); abi_long ret; - trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); if (do_strace) { print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6); } @@ -541,7 +539,6 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1, if (do_strace) { print_freebsd_syscall_ret(num, ret); } - trace_guest_user_syscall_ret(cpu, num, ret); return ret; } diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index e934e0a13a..0743459862 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -218,6 +218,22 @@ instruction per translated block" mode (which can be set on the command line or via the HMP, but not via QMP). The information remains available via the HMP 'info jit' command. +QEMU Machine Protocol (QMP) events +---------------------------------- + +``MEM_UNPLUG_ERROR`` (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead. + +``vcpu`` trace events (since 8.1) +''''''''''''''''''''''''''''''''' + +The ability to instrument QEMU helper functions with vCPU-aware trace +points was removed in 7.0. However QMP still exposed the vcpu +parameter. This argument has now been deprecated and the remaining +remaining trace points that used it are selected just by name. + Human Monitor Protocol (HMP) commands ------------------------------------- @@ -251,15 +267,6 @@ it. Since all recent x86 hardware from the past >10 years is capable of the 64-bit x86 extensions, a corresponding 64-bit OS should be used instead. -QEMU API (QAPI) events ----------------------- - -``MEM_UNPLUG_ERROR`` (since 6.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead. - - System emulator machines ------------------------ diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 9621ec1341..991645adca 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -738,15 +738,14 @@ static VariLenAffix affixForIndex(uint64_t index) return invertAffix(&prefix); /* convert prefix to suffix */ } -/* creative abuse of tb_hash_func7, which is based on xxhash */ static uint32_t qpp_hash(QppEntry e) { - return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); + return qemu_xxhash4(e.ino_prefix, e.dev); } static uint32_t qpf_hash(QpfEntry e) { - return qemu_xxhash7(e.ino, e.dev, 0, 0, 0); + return qemu_xxhash4(e.ino, e.dev); } static bool qpd_cmp_func(const void *obj, const void *userp) diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 2597f38805..3b6f2b0aa2 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -537,7 +537,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) * is below us. */ if (inflight_atstart > IO_PLUG_THRESHOLD) { - blk_io_plug(dataplane->blk); + blk_io_plug(); } while (rc != rp) { /* pull request from ring */ @@ -577,12 +577,12 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) { - blk_io_unplug(dataplane->blk); + blk_io_unplug(); } xen_block_do_aio(request); if (inflight_atstart > IO_PLUG_THRESHOLD) { if (batched >= inflight_atstart) { - blk_io_plug(dataplane->blk); + blk_io_plug(); batched = 0; } else { batched++; @@ -590,7 +590,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) } } if (inflight_atstart > IO_PLUG_THRESHOLD) { - blk_io_unplug(dataplane->blk); + blk_io_unplug(); } return done_something; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 4ca66b5860..39e7f23fab 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1134,7 +1134,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) bool suppress_notifications = virtio_queue_get_notification(vq); aio_context_acquire(blk_get_aio_context(s->blk)); - blk_io_plug(s->blk); + blk_io_plug(); do { if (suppress_notifications) { @@ -1158,7 +1158,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) virtio_blk_submit_multireq(s, &mrb); } - blk_io_unplug(s->blk); + blk_io_unplug(); aio_context_release(blk_get_aio_context(s->blk)); } diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index 5ccc3837b6..f4e51c8a1b 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -32,7 +32,7 @@ #include "sysemu/tcg.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include "trace/trace-root.h" +#include "trace.h" #include "qemu/plugin.h" CPUState *cpu_by_arch_id(int64_t id) @@ -113,7 +113,7 @@ void cpu_reset(CPUState *cpu) { device_cold_reset(DEVICE(cpu)); - trace_guest_cpu_reset(cpu); + trace_cpu_reset(cpu->cpu_index); } static void cpu_common_reset_hold(Object *obj) @@ -211,7 +211,6 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp) } /* NOTE: latest generic point where the cpu is fully realized */ - trace_init_vcpu(cpu); } static void cpu_common_unrealizefn(DeviceState *dev) @@ -219,7 +218,6 @@ static void cpu_common_unrealizefn(DeviceState *dev) CPUState *cpu = CPU(dev); /* NOTE: latest generic point before the cpu is fully unrealized */ - trace_fini_vcpu(cpu); cpu_exec_unrealizefn(cpu); } diff --git a/hw/core/trace-events b/hw/core/trace-events index 56da55bd71..2cf085ac66 100644 --- a/hw/core/trace-events +++ b/hw/core/trace-events @@ -29,3 +29,6 @@ clock_set(const char *clk, uint64_t old, uint64_t new) "'%s', %"PRIu64"Hz->%"PRI clock_propagate(const char *clk) "'%s'" clock_update(const char *clk, const char *src, uint64_t hz, int cb) "'%s', src='%s', val=%"PRIu64"Hz cb=%d" clock_set_mul_div(const char *clk, uint32_t oldmul, uint32_t mul, uint32_t olddiv, uint32_t div) "'%s', mul: %u -> %u, div: %u -> %u" + +# cpu-common.c +cpu_reset(int cpu_index) "%d" diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 4a8849cc7e..9c8ef0aaa6 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -799,7 +799,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) return -ENOBUFS; } scsi_req_ref(req->sreq); - blk_io_plug(d->conf.blk); + blk_io_plug(); object_unref(OBJECT(d)); return 0; } @@ -810,7 +810,7 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) if (scsi_req_enqueue(sreq)) { scsi_req_continue(sreq); } - blk_io_unplug(sreq->dev->conf.blk); + blk_io_unplug(); scsi_req_unref(sreq); } @@ -836,7 +836,7 @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) while (!QTAILQ_EMPTY(&reqs)) { req = QTAILQ_FIRST(&reqs); QTAILQ_REMOVE(&reqs, req, next); - blk_io_unplug(req->sreq->dev->conf.blk); + blk_io_unplug(); scsi_req_unref(req->sreq); virtqueue_detach_element(req->vq, &req->elem, 0); virtio_scsi_free_req(req); diff --git a/include/block/block-io.h b/include/block/block-io.h index a27e471a87..43af816d75 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -259,9 +259,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); -void coroutine_fn GRAPH_RDLOCK bdrv_co_io_plug(BlockDriverState *bs); -void coroutine_fn GRAPH_RDLOCK bdrv_co_io_unplug(BlockDriverState *bs); - bool coroutine_fn GRAPH_RDLOCK bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, uint32_t granularity, Error **errp); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index b1cbc1e00c..74195c3004 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -768,11 +768,6 @@ struct BlockDriver { void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)( BlockDriverState *bs, BlkdebugEvent event); - /* io queue for linux-aio */ - void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs); - void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)( - BlockDriverState *bs); - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)( @@ -1227,12 +1222,6 @@ struct BlockDriverState { unsigned int in_flight; unsigned int serialising_in_flight; - /* - * counter for nested bdrv_io_plug. - * Accessed with atomic ops. - */ - unsigned io_plugged; - /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 0fe85ade77..0f63c2800c 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -62,13 +62,6 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); - -/* - * laio_io_plug/unplug work in the thread's current AioContext, therefore the - * caller must ensure that they are paired in the same IOThread. - */ -void laio_io_plug(void); -void laio_io_unplug(uint64_t dev_max_batch); #endif /* io_uring.c - Linux io_uring implementation */ #ifdef CONFIG_LINUX_IO_URING @@ -81,13 +74,6 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, QEMUIOVector *qiov, int type); void luring_detach_aio_context(LuringState *s, AioContext *old_context); void luring_attach_aio_context(LuringState *s, AioContext *new_context); - -/* - * luring_io_plug/unplug work in the thread's current AioContext, therefore the - * caller must ensure that they are paired in the same IOThread. - */ -void luring_io_plug(void); -void luring_io_unplug(void); #endif #ifdef _WIN32 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 4d2b151986..3b1b57f6ad 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -545,9 +545,6 @@ struct TranslationBlock { #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 - /* Per-vCPU dynamic tracing state used to generate this TB */ - uint32_t trace_vcpu_dstate; - /* * Above fields used for comparing */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 39150cf8f8..383456d1b3 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); struct qemu_work_item; #define CPU_UNSET_NUMA_NODE_ID -1 -#define CPU_TRACE_DSTATE_MAX_EVENTS 32 /** * CPUState: @@ -407,10 +406,6 @@ struct CPUState { /* Use by accel-block: CPU is executing an ioctl() */ QemuLockCnt in_ioctl_lock; - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ - DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX); #ifdef CONFIG_PLUGIN diff --git a/include/migration/global_state.h b/include/migration/global_state.h index 945eb35d5b..d7c2cd3216 100644 --- a/include/migration/global_state.h +++ b/include/migration/global_state.h @@ -16,7 +16,7 @@ #include "qapi/qapi-types-run-state.h" void register_global_state(void); -int global_state_store(void); +void global_state_store(void); void global_state_store_running(void); bool global_state_received(void); RunState global_state_get_runstate(void); diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h index c2dcccadbf..0259bbef18 100644 --- a/include/qemu/xxhash.h +++ b/include/qemu/xxhash.h @@ -48,8 +48,8 @@ * xxhash32, customized for input variables that are not guaranteed to be * contiguous in memory. */ -static inline uint32_t -qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) +static inline uint32_t qemu_xxhash8(uint64_t ab, uint64_t cd, uint64_t ef, + uint32_t g, uint32_t h) { uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2; uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; @@ -59,6 +59,8 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) uint32_t b = ab >> 32; uint32_t c = cd; uint32_t d = cd >> 32; + uint32_t e = ef; + uint32_t f = ef >> 32; uint32_t h32; v1 += a * PRIME32_2; @@ -89,6 +91,9 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) h32 += g * PRIME32_3; h32 = rol32(h32, 17) * PRIME32_4; + h32 += h * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; @@ -100,23 +105,29 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) static inline uint32_t qemu_xxhash2(uint64_t ab) { - return qemu_xxhash7(ab, 0, 0, 0, 0); + return qemu_xxhash8(ab, 0, 0, 0, 0); } static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd) { - return qemu_xxhash7(ab, cd, 0, 0, 0); + return qemu_xxhash8(ab, cd, 0, 0, 0); } static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e) { - return qemu_xxhash7(ab, cd, e, 0, 0); + return qemu_xxhash8(ab, cd, 0, e, 0); } static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f) { - return qemu_xxhash7(ab, cd, e, f, 0); + return qemu_xxhash8(ab, cd, 0, e, f); +} + +static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint64_t ef, + uint32_t g) +{ + return qemu_xxhash8(ab, cd, ef, g, 0); } /* diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index d62a7ee773..be4dcef59d 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -100,16 +100,9 @@ void blk_iostatus_set_err(BlockBackend *blk, int error); int blk_get_max_iov(BlockBackend *blk); int blk_get_max_hw_iov(BlockBackend *blk); -/* - * blk_io_plug/unplug are thread-local operations. This means that multiple - * IOThreads can simultaneously call plug/unplug, but the caller must ensure - * that each unplug() is called in the same IOThread of the matching plug(). - */ -void coroutine_fn blk_co_io_plug(BlockBackend *blk); -void co_wrapper blk_io_plug(BlockBackend *blk); - -void coroutine_fn blk_co_io_unplug(BlockBackend *blk); -void co_wrapper blk_io_unplug(BlockBackend *blk); +void blk_io_plug(void); +void blk_io_unplug(void); +void blk_io_plug_call(void (*fn)(void *), void *opaque); AioContext *blk_get_aio_context(BlockBackend *blk); BlockAcctStats *blk_get_stats(BlockBackend *blk); diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index f3ed52548e..7beb29c2e2 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -6,9 +6,9 @@ bool runstate_check(RunState state); void runstate_set(RunState new_state); +RunState runstate_get(void); bool runstate_is_running(void); bool runstate_needs_reset(void); -bool runstate_store(char *str, size_t size); typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h index 90bda7631c..557f881a79 100644 --- a/include/user/syscall-trace.h +++ b/include/user/syscall-trace.h @@ -26,9 +26,6 @@ static inline void record_syscall_start(void *cpu, int num, abi_long arg5, abi_long arg6, abi_long arg7, abi_long arg8) { - trace_guest_user_syscall(cpu, num, - arg1, arg2, arg3, arg4, - arg5, arg6, arg7, arg8); qemu_plugin_vcpu_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); @@ -36,7 +33,6 @@ static inline void record_syscall_start(void *cpu, int num, static inline void record_syscall_return(void *cpu, int num, abi_long ret) { - trace_guest_user_syscall_ret(cpu, num, ret); qemu_plugin_vcpu_syscall_ret(cpu, num, ret); } diff --git a/meson.build b/meson.build index 98b328d596..375c26ae61 100644 --- a/meson.build +++ b/meson.build @@ -2106,6 +2106,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) config_host_data.set('CONFIG_MPATH', mpathpersist.found()) config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) config_host_data.set('CONFIG_BLKIO', blkio.found()) +if blkio.found() + config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', + blkio.version().version_compare('>=1.3.0')) +endif config_host_data.set('CONFIG_CURL', curl.found()) config_host_data.set('CONFIG_CURSES', curses.found()) config_host_data.set('CONFIG_GBM', gbm.found()) diff --git a/migration/global_state.c b/migration/global_state.c index a33947ca32..4e2a9d8ec0 100644 --- a/migration/global_state.c +++ b/migration/global_state.c @@ -29,23 +29,22 @@ typedef struct { static GlobalState global_state; -int global_state_store(void) +static void global_state_do_store(RunState state) { - if (!runstate_store((char *)global_state.runstate, - sizeof(global_state.runstate))) { - error_report("runstate name too big: %s", global_state.runstate); - trace_migrate_state_too_big(); - return -EINVAL; - } - return 0; + const char *state_str = RunState_str(state); + assert(strlen(state_str) < sizeof(global_state.runstate)); + strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), + state_str, '\0'); +} + +void global_state_store(void) +{ + global_state_do_store(runstate_get()); } void global_state_store_running(void) { - const char *state = RunState_str(RUN_STATE_RUNNING); - assert(strlen(state) < sizeof(global_state.runstate)); - strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), - state, '\0'); + global_state_do_store(RUN_STATE_RUNNING); } bool global_state_received(void) diff --git a/migration/migration.c b/migration/migration.c index 5de7f734b9..dc05c6f6ea 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1402,7 +1402,7 @@ void migrate_init(MigrationState *s) s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); s->total_time = 0; - s->vm_was_running = false; + s->vm_old_state = -1; s->iteration_initial_bytes = 0; s->threshold_size = 0; } @@ -2287,28 +2287,28 @@ static void migration_completion(MigrationState *s) qemu_mutex_lock_iothread(); s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); - ret = global_state_store(); - if (!ret) { - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { - ret = migration_maybe_pause(s, ¤t_active_state, - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { - /* - * Inactivate disks except in COLO, and track that we - * have done so in order to remember to reactivate - * them if migration fails or is cancelled. - */ - s->block_inactive = !migrate_colo(); - migration_rate_set(RATE_LIMIT_DISABLED); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - s->block_inactive); - } + s->vm_old_state = runstate_get(); + global_state_store(); + + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { + ret = migration_maybe_pause(s, ¤t_active_state, + MIGRATION_STATUS_DEVICE); } + if (ret >= 0) { + /* + * Inactivate disks except in COLO, and track that we + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ + s->block_inactive = !migrate_colo(); + migration_rate_set(RATE_LIMIT_DISABLED); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); + } + qemu_mutex_unlock_iothread(); if (ret < 0) { @@ -2400,13 +2400,6 @@ static void bg_migration_completion(MigrationState *s) { int current_active_state = s->state; - /* - * Stop tracking RAM writes - un-protect memory, un-register UFFD - * memory ranges, flush kernel wait queues and wake up threads - * waiting for write fault to be resolved. - */ - ram_write_tracking_stop(); - if (s->state == MIGRATION_STATUS_ACTIVE) { /* * By this moment we have RAM content saved into the migration stream. @@ -2761,18 +2754,18 @@ static void migration_iteration_finish(MigrationState *s) case MIGRATION_STATUS_COLO: assert(migrate_colo()); migrate_start_colo_process(s); - s->vm_was_running = true; + s->vm_old_state = RUN_STATE_RUNNING; /* Fallthrough */ case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: - if (s->vm_was_running) { + if (s->vm_old_state == RUN_STATE_RUNNING) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { vm_start(); } } else { if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - runstate_set(RUN_STATE_POSTMIGRATE); + runstate_set(s->vm_old_state); } } break; @@ -2788,6 +2781,13 @@ static void migration_iteration_finish(MigrationState *s) static void bg_migration_iteration_finish(MigrationState *s) { + /* + * Stop tracking RAM writes - un-protect memory, un-register UFFD + * memory ranges, flush kernel wait queues and wake up threads + * waiting for write fault to be resolved. + */ + ram_write_tracking_stop(); + qemu_mutex_lock_iothread(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: @@ -3086,11 +3086,9 @@ static void *bg_migration_thread(void *opaque) * transition in vm_stop_force_state() we need to wakeup it up. */ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); + s->vm_old_state = runstate_get(); - if (global_state_store()) { - goto fail; - } + global_state_store(); /* Forcibly stop VM before saving state of vCPUs and devices */ if (vm_stop_force_state(RUN_STATE_PAUSED)) { goto fail; diff --git a/migration/migration.h b/migration/migration.h index 48a46123a0..30c3e97635 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -25,6 +25,7 @@ #include "net/announce.h" #include "qom/object.h" #include "postcopy-ram.h" +#include "sysemu/runstate.h" struct PostcopyBlocktimeContext; @@ -317,12 +318,14 @@ struct MigrationState { int64_t expected_downtime; bool capabilities[MIGRATION_CAPABILITY__MAX]; int64_t setup_time; + /* - * Whether guest was running when we enter the completion stage. + * State before stopping the vm by vm_stop_force_state(). * If migration is interrupted by any reason, we need to continue - * running the guest on source. + * running the guest on source if it was running or restore its stopped + * state. */ - bool vm_was_running; + RunState vm_old_state; /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; diff --git a/migration/savevm.c b/migration/savevm.c index c2b4c81cd2..05985c2a51 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2927,11 +2927,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, saved_vm_running = runstate_is_running(); - ret = global_state_store(); - if (ret) { - error_setg(errp, "Error saving global state"); - return false; - } + global_state_store(); vm_stop(RUN_STATE_SAVE_VM); bdrv_drain_all_begin(); diff --git a/qapi/block-core.json b/qapi/block-core.json index 98d9116dae..4bf89171c6 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3955,10 +3955,16 @@ # # @path: path to the vhost-vdpa character device. # +# Features: +# @fdset: Member @path supports the special "/dev/fdset/N" path +# (since 8.1) +# # Since: 7.2 ## { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', 'data': { 'path': 'str' }, + 'features': [ { 'name' :'fdset', + 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], 'if': 'CONFIG_BLKIO' } ## diff --git a/qapi/trace.json b/qapi/trace.json index 6bf0af0946..39b752fc88 100644 --- a/qapi/trace.json +++ b/qapi/trace.json @@ -37,13 +37,14 @@ # # @vcpu: Whether this is a per-vCPU event (since 2.7). # -# An event is per-vCPU if it has the "vcpu" property in the -# "trace-events" files. +# Features: +# @deprecated: Member @vcpu is deprecated, and always ignored. # # Since: 2.2 ## { 'struct': 'TraceEventInfo', - 'data': {'name': 'str', 'state': 'TraceEventState', 'vcpu': 'bool'} } + 'data': {'name': 'str', 'state': 'TraceEventState', + 'vcpu': { 'type': 'bool', 'features': ['deprecated'] } } } ## # @trace-event-get-state: @@ -52,19 +53,15 @@ # # @name: Event name pattern (case-sensitive glob). # -# @vcpu: The vCPU to query (any by default; since 2.7). +# @vcpu: The vCPU to query (since 2.7). +# +# Features: +# @deprecated: Member @vcpu is deprecated, and always ignored. # # Returns: a list of @TraceEventInfo for the matching events # -# An event is returned if: -# -# - its name matches the @name pattern, and -# - if @vcpu is given, the event has the "vcpu" property. -# -# Therefore, if @vcpu is given, the operation will only match per-vCPU -# events, returning their state on the specified vCPU. Special case: -# if @name is an exact match, @vcpu is given and the event does not -# have the "vcpu" property, an error is returned. +# An event is returned if its name matches the @name pattern +# (There are no longer any per-vCPU events). # # Since: 2.2 # @@ -75,7 +72,8 @@ # <- { "return": [ { "name": "qemu_memalign", "state": "disabled", "vcpu": false } ] } ## { 'command': 'trace-event-get-state', - 'data': {'name': 'str', '*vcpu': 'int'}, + 'data': {'name': 'str', + '*vcpu': {'type': 'int', 'features': ['deprecated'] } }, 'returns': ['TraceEventInfo'] } ## @@ -91,15 +89,11 @@ # # @vcpu: The vCPU to act upon (all by default; since 2.7). # -# An event's state is modified if: +# Features: +# @deprecated: Member @vcpu is deprecated, and always ignored. # -# - its name matches the @name pattern, and -# - if @vcpu is given, the event has the "vcpu" property. -# -# Therefore, if @vcpu is given, the operation will only match per-vCPU -# events, setting their state on the specified vCPU. Special case: if -# @name is an exact match, @vcpu is given and the event does not have -# the "vcpu" property, an error is returned. +# An event is enabled if its name matches the @name pattern +# (There are no longer any per-vCPU events). # # Since: 2.2 # @@ -111,4 +105,4 @@ ## { 'command': 'trace-event-set-state', 'data': {'name': 'str', 'enable': 'bool', '*ignore-unavailable': 'bool', - '*vcpu': 'int'} } + '*vcpu': {'type': 'int', 'features': ['deprecated'] } } } diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py index 8f8f784f4a..70bc576a10 100644 --- a/scripts/qapi/gen.py +++ b/scripts/qapi/gen.py @@ -13,6 +13,7 @@ from contextlib import contextmanager import os +import sys import re from typing import ( Dict, @@ -162,7 +163,7 @@ class QAPIGenC(QAPIGenCCode): def _top(self) -> str: return mcgen(''' -/* AUTOMATICALLY GENERATED, DO NOT MODIFY */ +/* AUTOMATICALLY GENERATED by %(tool)s DO NOT MODIFY */ /* %(blurb)s @@ -174,6 +175,7 @@ class QAPIGenC(QAPIGenCCode): */ ''', + tool=os.path.basename(sys.argv[0]), blurb=self._blurb, copyright=self._copyright) def _bottom(self) -> str: @@ -195,7 +197,10 @@ class QAPIGenH(QAPIGenC): class QAPIGenTrace(QAPIGen): def _top(self) -> str: - return super()._top() + '# AUTOMATICALLY GENERATED, DO NOT MODIFY\n\n' + return (super()._top() + + '# AUTOMATICALLY GENERATED by ' + + os.path.basename(sys.argv[0]) + + ', DO NOT MODIFY\n\n') @contextmanager diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py index c390c1844a..69edf0d588 100644 --- a/scripts/tracetool/format/c.py +++ b/scripts/tracetool/format/c.py @@ -32,19 +32,13 @@ def generate(events, backend, group): out('uint16_t %s;' % e.api(e.QEMU_DSTATE)) for e in events: - if "vcpu" in e.properties: - vcpu_id = 0 - else: - vcpu_id = "TRACE_VCPU_EVENT_NONE" out('TraceEvent %(event)s = {', ' .id = 0,', - ' .vcpu_id = %(vcpu_id)s,', ' .name = \"%(name)s\",', ' .sstate = %(sstate)s,', ' .dstate = &%(dstate)s ', '};', event = e.api(e.QEMU_EVENT), - vcpu_id = vcpu_id, name = e.name, sstate = "TRACE_%s_ENABLED" % e.name.upper(), dstate = e.api(e.QEMU_DSTATE)) diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py index e94f0be7da..ea126b07ea 100644 --- a/scripts/tracetool/format/h.py +++ b/scripts/tracetool/format/h.py @@ -16,10 +16,7 @@ from tracetool import out def generate(events, backend, group): - if group == "root": - header = "trace/control-vcpu.h" - else: - header = "trace/control.h" + header = "trace/control.h" out('/* This file is autogenerated by tracetool, do not edit. */', '', @@ -74,16 +71,7 @@ def generate(events, backend, group): out('}') - # tracer wrapper with checks (per-vCPU tracing) - if "vcpu" in e.properties: - trace_cpu = next(iter(e.args))[1] - cond = "trace_event_get_vcpu_state(%(cpu)s,"\ - " TRACE_%(id)s)"\ - % dict( - cpu=trace_cpu, - id=e.name.upper()) - else: - cond = "true" + cond = "true" out('', 'static inline void %(api)s(%(args)s)', diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 11b72fde06..f4b6c5617a 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -121,7 +121,13 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO}, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED }, { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, @@ -175,18 +181,6 @@ bool runstate_check(RunState state) return current_run_state == state; } -bool runstate_store(char *str, size_t size) -{ - const char *state = RunState_str(current_run_state); - size_t len = strlen(state) + 1; - - if (len > size) { - return false; - } - memcpy(str, state, len); - return true; -} - static void runstate_init(void) { const RunStateTransition *p; @@ -221,6 +215,11 @@ void runstate_set(RunState new_state) current_run_state = new_state; } +RunState runstate_get(void) +{ + return current_run_state; +} + bool runstate_is_running(void) { return runstate_check(RUN_STATE_RUNNING); diff --git a/stubs/trace-control.c b/stubs/trace-control.c index 7f856e5c24..b428f34c87 100644 --- a/stubs/trace-control.c +++ b/stubs/trace-control.c @@ -36,16 +36,3 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool state) } } } - -void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, - TraceEvent *ev, bool state) -{ - /* should never be called on non-target binaries */ - abort(); -} - -void trace_init_vcpu(CPUState *vcpu) -{ - /* should never be called on non-target binaries */ - abort(); -} diff --git a/trace-events b/trace-events index b6b84b175e..dd318ed1af 100644 --- a/trace-events +++ b/trace-events @@ -54,53 +54,3 @@ qmp_job_resume(void *job) "job %p" qmp_job_complete(void *job) "job %p" qmp_job_finalize(void *job) "job %p" qmp_job_dismiss(void *job) "job %p" - - -### Guest events, keep at bottom - - -## vCPU - -# trace/control-target.c - -# Hot-plug a new virtual (guest) CPU -# -# Mode: user, softmmu -# Targets: all -vcpu guest_cpu_enter(void) - -# trace/control.c - -# Hot-unplug a virtual (guest) CPU -# -# Mode: user, softmmu -# Targets: all -vcpu guest_cpu_exit(void) - -# hw/core/cpu.c - -# Reset the state of a virtual (guest) CPU -# -# Mode: user, softmmu -# Targets: all -vcpu guest_cpu_reset(void) - -# include/user/syscall-trace.h - -# @num: System call number. -# @arg*: System call argument value. -# -# Start executing a guest system call in syscall emulation mode. -# -# Mode: user -# Targets: TCG(all) -vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64 - -# @num: System call number. -# @ret: System call result value. -# -# Finish executing a guest system call in syscall emulation mode. -# -# Mode: user -# Targets: TCG(all) -vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64 diff --git a/trace/control-internal.h b/trace/control-internal.h index 8b2b50a7cf..8d818d359b 100644 --- a/trace/control-internal.h +++ b/trace/control-internal.h @@ -25,16 +25,6 @@ static inline uint32_t trace_event_get_id(TraceEvent *ev) return ev->id; } -static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev) -{ - return ev->vcpu_id; -} - -static inline bool trace_event_is_vcpu(TraceEvent *ev) -{ - return ev->vcpu_id != TRACE_VCPU_EVENT_NONE; -} - static inline const char * trace_event_get_name(TraceEvent *ev) { assert(ev != NULL); diff --git a/trace/control-target.c b/trace/control-target.c index c0c1e2310a..97f21e476d 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -36,113 +36,22 @@ void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state) void trace_event_set_state_dynamic(TraceEvent *ev, bool state) { - CPUState *vcpu; assert(trace_event_get_state_static(ev)); - if (trace_event_is_vcpu(ev) && likely(first_cpu != NULL)) { - CPU_FOREACH(vcpu) { - trace_event_set_vcpu_state_dynamic(vcpu, ev, state); - } - } else { - /* - * Without the "vcpu" property, dstate can only be 1 or 0. With it, we - * haven't instantiated any vCPU yet, so we will set a global state - * instead, and trace_init_vcpu will reconcile it afterwards. - */ - bool state_pre = *ev->dstate; - if (state_pre != state) { - if (state) { - trace_events_enabled_count++; - *ev->dstate = 1; - } else { - trace_events_enabled_count--; - *ev->dstate = 0; - } - } - } -} -static void trace_event_synchronize_vcpu_state_dynamic( - CPUState *vcpu, run_on_cpu_data ignored) -{ - bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed, - CPU_TRACE_DSTATE_MAX_EVENTS); - tcg_flush_jmp_cache(vcpu); -} - -void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, - TraceEvent *ev, bool state) -{ - uint32_t vcpu_id; - bool state_pre; - assert(trace_event_get_state_static(ev)); - assert(trace_event_is_vcpu(ev)); - vcpu_id = trace_event_get_vcpu_id(ev); - state_pre = test_bit(vcpu_id, vcpu->trace_dstate); + /* + * There is no longer a "vcpu" property, dstate can only be 1 or + * 0. With it, we haven't instantiated any vCPU yet, so we will + * set a global state instead, and trace_init_vcpu will reconcile + * it afterwards. + */ + bool state_pre = *ev->dstate; if (state_pre != state) { if (state) { trace_events_enabled_count++; - set_bit(vcpu_id, vcpu->trace_dstate_delayed); - (*ev->dstate)++; + *ev->dstate = 1; } else { trace_events_enabled_count--; - clear_bit(vcpu_id, vcpu->trace_dstate_delayed); - (*ev->dstate)--; - } - if (vcpu->created) { - /* - * Delay changes until next TB; we want all TBs to be built from a - * single set of dstate values to ensure consistency of generated - * tracing code. - */ - async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic, - RUN_ON_CPU_NULL); - } else { - trace_event_synchronize_vcpu_state_dynamic(vcpu, RUN_ON_CPU_NULL); + *ev->dstate = 0; } } } - -static bool adding_first_cpu1(void) -{ - CPUState *cpu; - size_t count = 0; - CPU_FOREACH(cpu) { - count++; - if (count > 1) { - return false; - } - } - return true; -} - -static bool adding_first_cpu(void) -{ - QEMU_LOCK_GUARD(&qemu_cpu_list_lock); - - return adding_first_cpu1(); -} - -void trace_init_vcpu(CPUState *vcpu) -{ - TraceEventIter iter; - TraceEvent *ev; - trace_event_iter_init_all(&iter); - while ((ev = trace_event_iter_next(&iter)) != NULL) { - if (trace_event_is_vcpu(ev) && - trace_event_get_state_static(ev) && - trace_event_get_state_dynamic(ev)) { - if (adding_first_cpu()) { - /* check preconditions */ - assert(*ev->dstate == 1); - /* disable early-init state ... */ - *ev->dstate = 0; - trace_events_enabled_count--; - /* ... and properly re-enable */ - trace_event_set_vcpu_state_dynamic(vcpu, ev, true); - } else { - trace_event_set_vcpu_state_dynamic(vcpu, ev, true); - } - } - } - trace_guest_cpu_enter(vcpu); -} diff --git a/trace/control-vcpu.h b/trace/control-vcpu.h deleted file mode 100644 index 0f98ebe7b5..0000000000 --- a/trace/control-vcpu.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Interface for configuring and controlling the state of tracing events. - * - * Copyright (C) 2011-2016 LluĂ­s Vilanova - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef TRACE__CONTROL_VCPU_H -#define TRACE__CONTROL_VCPU_H - -#include "control.h" -#include "event-internal.h" -#include "hw/core/cpu.h" - -/** - * trace_event_get_vcpu_state: - * @vcpu: Target vCPU. - * @id: Event identifier name. - * - * Get the tracing state of an event (both static and dynamic) for the given - * vCPU. - * - * If the event has the disabled property, the check will have no performance - * impact. - */ -#define trace_event_get_vcpu_state(vcpu, id) \ - ((id ##_ENABLED) && \ - trace_event_get_vcpu_state_dynamic_by_vcpu_id( \ - vcpu, _ ## id ## _EVENT.vcpu_id)) - -/** - * trace_event_get_vcpu_state_dynamic: - * - * Get the dynamic tracing state of an event for the given vCPU. - */ -static bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev); - -#include "control-internal.h" - -static inline bool -trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu, - uint32_t vcpu_id) -{ - /* it's on fast path, avoid consistency checks (asserts) */ - if (unlikely(trace_events_enabled_count)) { - return test_bit(vcpu_id, vcpu->trace_dstate); - } else { - return false; - } -} - -static inline bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, - TraceEvent *ev) -{ - uint32_t vcpu_id; - assert(trace_event_is_vcpu(ev)); - vcpu_id = trace_event_get_vcpu_id(ev); - return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id); -} - -#endif diff --git a/trace/control.c b/trace/control.c index 6c77cc6318..1a48a7e266 100644 --- a/trace/control.c +++ b/trace/control.c @@ -68,16 +68,6 @@ void trace_event_register_group(TraceEvent **events) size_t i; for (i = 0; events[i] != NULL; i++) { events[i]->id = next_id++; - if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) { - continue; - } - - if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) { - events[i]->vcpu_id = next_vcpu_id++; - } else { - warn_report("too many vcpu trace events; dropping '%s'", - events[i]->name); - } } event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1); event_groups[nevent_groups].events = events; @@ -272,24 +262,6 @@ void trace_init_file(void) #endif } -void trace_fini_vcpu(CPUState *vcpu) -{ - TraceEventIter iter; - TraceEvent *ev; - - trace_guest_cpu_exit(vcpu); - - trace_event_iter_init_all(&iter); - while ((ev = trace_event_iter_next(&iter)) != NULL) { - if (trace_event_is_vcpu(ev) && - trace_event_get_state_static(ev) && - trace_event_get_vcpu_state_dynamic(vcpu, ev)) { - /* must disable to affect the global counter */ - trace_event_set_vcpu_state_dynamic(vcpu, ev, false); - } - } -} - bool trace_init_backends(void) { #ifdef CONFIG_TRACE_SIMPLE diff --git a/trace/control.h b/trace/control.h index 23b8393b29..dfd209edd8 100644 --- a/trace/control.h +++ b/trace/control.h @@ -89,23 +89,6 @@ static bool trace_event_is_pattern(const char *str); */ static uint32_t trace_event_get_id(TraceEvent *ev); -/** - * trace_event_get_vcpu_id: - * - * Get the per-vCPU identifier of an event. - * - * Special value #TRACE_VCPU_EVENT_NONE means the event is not vCPU-specific - * (does not have the "vcpu" property). - */ -static uint32_t trace_event_get_vcpu_id(TraceEvent *ev); - -/** - * trace_event_is_vcpu: - * - * Whether this is a per-vCPU event. - */ -static bool trace_event_is_vcpu(TraceEvent *ev); - /** * trace_event_get_name: * @@ -172,21 +155,6 @@ static bool trace_event_get_state_dynamic(TraceEvent *ev); */ void trace_event_set_state_dynamic(TraceEvent *ev, bool state); -/** - * trace_event_set_vcpu_state_dynamic: - * - * Set the dynamic tracing state of an event for the given vCPU. - * - * Pre-condition: trace_event_get_vcpu_state_static(ev) == true - * - * Note: Changes for execution-time events with the 'tcg' property will not be - * propagated until the next TB is executed (iff executing in TCG mode). - */ -void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, - TraceEvent *ev, bool state); - - - /** * trace_init_backends: * @@ -205,22 +173,6 @@ bool trace_init_backends(void); */ void trace_init_file(void); -/** - * trace_init_vcpu: - * @vcpu: Added vCPU. - * - * Set initial dynamic event state for a hot-plugged vCPU. - */ -void trace_init_vcpu(CPUState *vcpu); - -/** - * trace_fini_vcpu: - * @vcpu: Removed vCPU. - * - * Disable dynamic event state for a hot-unplugged vCPU. - */ -void trace_fini_vcpu(CPUState *vcpu); - /** * trace_list_events: * @f: Where to send output. diff --git a/trace/event-internal.h b/trace/event-internal.h index f63500b37e..0c24e01b52 100644 --- a/trace/event-internal.h +++ b/trace/event-internal.h @@ -19,7 +19,6 @@ /** * TraceEvent: * @id: Unique event identifier. - * @vcpu_id: Unique per-vCPU event identifier. * @name: Event name. * @sstate: Static tracing state. * @dstate: Dynamic tracing state @@ -33,7 +32,6 @@ */ typedef struct TraceEvent { uint32_t id; - uint32_t vcpu_id; const char * name; const bool sstate; uint16_t *dstate; diff --git a/trace/qmp.c b/trace/qmp.c index 3b4f4702b4..3e3971c6a8 100644 --- a/trace/qmp.c +++ b/trace/qmp.c @@ -10,23 +10,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qapi-commands-trace.h" -#include "control-vcpu.h" +#include "control.h" -static CPUState *get_cpu(bool has_vcpu, int vcpu, Error **errp) -{ - if (has_vcpu) { - CPUState *cpu = qemu_get_cpu(vcpu); - if (cpu == NULL) { - error_setg(errp, "invalid vCPU index %u", vcpu); - } - return cpu; - } else { - return NULL; - } -} - -static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern, +static bool check_events(bool ignore_unavailable, bool is_pattern, const char *name, Error **errp) { if (!is_pattern) { @@ -38,12 +25,6 @@ static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern return false; } - /* error for non-vcpu event */ - if (has_vcpu && !trace_event_is_vcpu(ev)) { - error_setg(errp, "event \"%s\" is not vCPU-specific", name); - return false; - } - /* error for unavailable event */ if (!ignore_unavailable && !trace_event_get_state_static(ev)) { error_setg(errp, "event \"%s\" is disabled", name); @@ -70,22 +51,13 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name, bool has_vcpu, int64_t vcpu, Error **errp) { - Error *err = NULL; TraceEventInfoList *events = NULL; TraceEventIter iter; TraceEvent *ev; bool is_pattern = trace_event_is_pattern(name); - CPUState *cpu; - - /* Check provided vcpu */ - cpu = get_cpu(has_vcpu, vcpu, &err); - if (err) { - error_propagate(errp, err); - return NULL; - } /* Check events */ - if (!check_events(has_vcpu, true, is_pattern, name, errp)) { + if (!check_events(true, is_pattern, name, errp)) { return NULL; } @@ -93,33 +65,17 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name, trace_event_iter_init_pattern(&iter, name); while ((ev = trace_event_iter_next(&iter)) != NULL) { TraceEventInfo *value; - bool is_vcpu = trace_event_is_vcpu(ev); - if (has_vcpu && !is_vcpu) { - continue; - } value = g_new(TraceEventInfo, 1); - value->vcpu = is_vcpu; value->name = g_strdup(trace_event_get_name(ev)); if (!trace_event_get_state_static(ev)) { value->state = TRACE_EVENT_STATE_UNAVAILABLE; } else { - if (has_vcpu) { - if (is_vcpu) { - if (trace_event_get_vcpu_state_dynamic(cpu, ev)) { - value->state = TRACE_EVENT_STATE_ENABLED; - } else { - value->state = TRACE_EVENT_STATE_DISABLED; - } - } - /* else: already skipped above */ + if (trace_event_get_state_dynamic(ev)) { + value->state = TRACE_EVENT_STATE_ENABLED; } else { - if (trace_event_get_state_dynamic(ev)) { - value->state = TRACE_EVENT_STATE_ENABLED; - } else { - value->state = TRACE_EVENT_STATE_DISABLED; - } + value->state = TRACE_EVENT_STATE_DISABLED; } } QAPI_LIST_PREPEND(events, value); @@ -133,21 +89,12 @@ void qmp_trace_event_set_state(const char *name, bool enable, bool has_vcpu, int64_t vcpu, Error **errp) { - Error *err = NULL; TraceEventIter iter; TraceEvent *ev; bool is_pattern = trace_event_is_pattern(name); - CPUState *cpu; - - /* Check provided vcpu */ - cpu = get_cpu(has_vcpu, vcpu, &err); - if (err) { - error_propagate(errp, err); - return; - } /* Check events */ - if (!check_events(has_vcpu, has_ignore_unavailable && ignore_unavailable, + if (!check_events(has_ignore_unavailable && ignore_unavailable, is_pattern, name, errp)) { return; } @@ -155,14 +102,9 @@ void qmp_trace_event_set_state(const char *name, bool enable, /* Apply changes (all errors checked above) */ trace_event_iter_init_pattern(&iter, name); while ((ev = trace_event_iter_next(&iter)) != NULL) { - if (!trace_event_get_state_static(ev) || - (has_vcpu && !trace_event_is_vcpu(ev))) { + if (!trace_event_get_state_static(ev)) { continue; } - if (has_vcpu) { - trace_event_set_vcpu_state_dynamic(cpu, ev, enable); - } else { - trace_event_set_state_dynamic(ev, enable); - } + trace_event_set_state_dynamic(ev, enable); } } diff --git a/trace/trace-hmp-cmds.c b/trace/trace-hmp-cmds.c index 792876c34a..86211fce27 100644 --- a/trace/trace-hmp-cmds.c +++ b/trace/trace-hmp-cmds.c @@ -37,16 +37,10 @@ void hmp_trace_event(Monitor *mon, const QDict *qdict) { const char *tp_name = qdict_get_str(qdict, "name"); bool new_state = qdict_get_bool(qdict, "option"); - bool has_vcpu = qdict_haskey(qdict, "vcpu"); - int vcpu = qdict_get_try_int(qdict, "vcpu", 0); Error *local_err = NULL; - if (vcpu < 0) { - monitor_printf(mon, "argument vcpu must be positive"); - return; - } - - qmp_trace_event_set_state(tp_name, new_state, true, true, has_vcpu, vcpu, &local_err); + qmp_trace_event_set_state(tp_name, new_state, + true, true, false, 0, &local_err); if (local_err) { error_report_err(local_err); } @@ -80,8 +74,6 @@ void hmp_trace_file(Monitor *mon, const QDict *qdict) void hmp_info_trace_events(Monitor *mon, const QDict *qdict) { const char *name = qdict_get_try_str(qdict, "name"); - bool has_vcpu = qdict_haskey(qdict, "vcpu"); - int vcpu = qdict_get_try_int(qdict, "vcpu", 0); TraceEventInfoList *events; TraceEventInfoList *elem; Error *local_err = NULL; @@ -89,12 +81,8 @@ void hmp_info_trace_events(Monitor *mon, const QDict *qdict) if (name == NULL) { name = "*"; } - if (vcpu < 0) { - monitor_printf(mon, "argument vcpu must be positive"); - return; - } - events = qmp_trace_event_get_state(name, has_vcpu, vcpu, &local_err); + events = qmp_trace_event_get_state(name, false, 0, &local_err); if (local_err) { error_report_err(local_err); return; diff --git a/util/qsp.c b/util/qsp.c index 8562b14a87..2fe3764906 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -144,7 +144,7 @@ uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab) uint32_t e = callsite->line; uint32_t f = callsite->type; - return qemu_xxhash6(ab, cd, e, f); + return qemu_xxhash8(ab, cd, 0, e, f); } static inline