From 14180d6221502bd4b9d96fa5f1065e7cda4bcf00 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 May 2023 18:00:07 +0100 Subject: [PATCH 01/53] bswap: Add the ability to store to an unaligned 24 bit field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CXL has 24 bit unaligned fields which need to be stored to. CXL is specified as little endian. Define st24_le_p() and the supporting functions to store such a field from a 32 bit host native value. The use of b, w, l, q as the size specifier is limiting. So "24" was used for the size part of the function name. Reviewed-by: Fan Ni Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230526170010.574-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/devel/loads-stores.rst | 2 ++ include/qemu/bswap.h | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index d2cefc77a2..dab6dfa0ac 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -36,6 +36,7 @@ store: ``st{size}_{endian}_p(ptr, val)`` ``size`` - ``b`` : 8 bits - ``w`` : 16 bits + - ``24`` : 24 bits - ``l`` : 32 bits - ``q`` : 64 bits @@ -65,6 +66,7 @@ of size ``sz`` bytes. Regexes for git grep - ``\`` - ``\`` + - ``\`` - ``\`` - ``\`` diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index 15a78c0db5..933a66ee87 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -8,11 +8,23 @@ #undef bswap64 #define bswap64(_x) __builtin_bswap64(_x) +static inline uint32_t bswap24(uint32_t x) +{ + return (((x & 0x000000ffU) << 16) | + ((x & 0x0000ff00U) << 0) | + ((x & 0x00ff0000U) >> 16)); +} + static inline void bswap16s(uint16_t *s) { *s = __builtin_bswap16(*s); } +static inline void bswap24s(uint32_t *s) +{ + *s = bswap24(*s & 0x00ffffffU); +} + static inline void bswap32s(uint32_t *s) { *s = __builtin_bswap32(*s); @@ -26,11 +38,13 @@ static inline void bswap64s(uint64_t *s) #if HOST_BIG_ENDIAN #define be_bswap(v, size) (v) #define le_bswap(v, size) glue(__builtin_bswap, size)(v) +#define le_bswap24(v) bswap24(v) #define be_bswaps(v, size) #define le_bswaps(p, size) \ do { *p = glue(__builtin_bswap, size)(*p); } while (0) #else #define le_bswap(v, size) (v) +#define le_bswap24(v) (v) #define be_bswap(v, size) glue(__builtin_bswap, size)(v) #define le_bswaps(v, size) #define be_bswaps(p, size) \ @@ -176,6 +190,7 @@ CPU_CONVERT(le, 64, uint64_t) * size is: * b: 8 bits * w: 16 bits + * 24: 24 bits * l: 32 bits * q: 64 bits * @@ -248,6 +263,11 @@ static inline void stw_he_p(void *ptr, uint16_t v) __builtin_memcpy(ptr, &v, sizeof(v)); } +static inline void st24_he_p(void *ptr, uint32_t v) +{ + __builtin_memcpy(ptr, &v, 3); +} + static inline int ldl_he_p(const void *ptr) { int32_t r; @@ -297,6 +317,11 @@ static inline void stw_le_p(void *ptr, uint16_t v) stw_he_p(ptr, le_bswap(v, 16)); } +static inline void st24_le_p(void *ptr, uint32_t v) +{ + st24_he_p(ptr, le_bswap24(v)); +} + static inline void stl_le_p(void *ptr, uint32_t v) { stl_he_p(ptr, le_bswap(v, 32)); From 9547754f40ee5c5e3d1dbed0fbc972caacd075e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 May 2023 18:00:08 +0100 Subject: [PATCH 02/53] hw/cxl: QMP based poison injection support Inject poison using QMP command cxl-inject-poison to add an entry to the poison list. For now, the poison is not returned CXL.mem reads, but only via the mailbox command Get Poison List. So a normal memory read to an address that is on the poison list will not yet result in a synchronous exception (and similar for partial cacheline writes). That is left for a future patch. See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h) Kernel patches to use this interface here: https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofield@intel.com/ To inject poison using QMP (telnet to the QMP port) { "execute": "qmp_capabilities" } { "execute": "cxl-inject-poison", "arguments": { "path": "/machine/peripheral/cxl-pmem0", "start": 2048, "length": 256 } } Adjusted to select a device on your machine. Note that the poison list supported is kept short enough to avoid the complexity of state machine that is needed to handle the MORE flag. Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Acked-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20230526170010.574-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 90 +++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3.c | 56 +++++++++++++++++++++++ hw/mem/cxl_type3_stubs.c | 6 +++ include/hw/cxl/cxl.h | 1 + include/hw/cxl/cxl_device.h | 20 +++++++++ qapi/cxl.json | 21 +++++++++ 6 files changed, 194 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 702e16ca20..1f74b26ea2 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -62,6 +62,8 @@ enum { #define GET_PARTITION_INFO 0x0 #define GET_LSA 0x2 #define SET_LSA 0x3 + MEDIA_AND_POISON = 0x43, + #define GET_POISON_LIST 0x0 }; /* 8.2.8.4.5.1 Command Return Codes */ @@ -295,6 +297,10 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd, stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER); stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d)); + /* 256 poison records */ + st24_le_p(id->poison_list_max_mer, 256); + /* No limit - so limited by main poison record limit */ + stw_le_p(&id->inject_poison_limit, 0); *len = sizeof(*id); return CXL_MBOX_SUCCESS; @@ -384,6 +390,88 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * This is very inefficient, but good enough for now! + * Also the payload will always fit, so no need to handle the MORE flag and + * make this stateful. We may want to allow longer poison lists to aid + * testing that kernel functionality. + */ +static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct get_poison_list_pl { + uint64_t pa; + uint64_t length; + } QEMU_PACKED; + + struct get_poison_list_out_pl { + uint8_t flags; + uint8_t rsvd1; + uint64_t overflow_timestamp; + uint16_t count; + uint8_t rsvd2[0x14]; + struct { + uint64_t addr; + uint32_t length; + uint32_t resv; + } QEMU_PACKED records[]; + } QEMU_PACKED; + + struct get_poison_list_pl *in = (void *)cmd->payload; + struct get_poison_list_out_pl *out = (void *)cmd->payload; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + uint16_t record_count = 0, i = 0; + uint64_t query_start, query_length; + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + uint16_t out_pl_len; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignemnt required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + QLIST_FOREACH(ent, poison_list, node) { + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + record_count++; + } + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + memset(out, 0, out_pl_len); + QLIST_FOREACH(ent, poison_list, node) { + uint64_t start, stop; + + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + + /* Deal with overlap */ + start = MAX(ROUND_DOWN(ent->start, 64ull), query_start); + stop = MIN(ROUND_DOWN(ent->start, 64ull) + ent->length, + query_start + query_length); + stq_le_p(&out->records[i].addr, start | (ent->type & 0x7)); + stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE); + i++; + } + if (ct3d->poison_list_overflowed) { + out->flags = (1 << 1); + stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts); + } + stw_le_p(&out->count, record_count); + *len = out_pl_len; + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -411,6 +499,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", + cmd_media_get_poison_list, 16, 0 }, }; void cxl_process_mailbox(CXLDeviceState *cxl_dstate) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 2adacbd01b..ab600735eb 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -947,6 +947,62 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, */ } +void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) +{ + ct3d->poison_list_overflowed = true; + ct3d->poison_list_overflow_ts = + cxl_device_get_timestamp(&ct3d->cxl_dstate); +} + +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLType3Dev *ct3d; + CXLPoison *p; + + if (length % 64) { + error_setg(errp, "Poison injection must be in multiples of 64 bytes"); + return; + } + if (start % 64) { + error_setg(errp, "Poison start address must be 64 byte aligned"); + return; + } + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + + ct3d = CXL_TYPE3(obj); + + QLIST_FOREACH(p, &ct3d->poison_list, node) { + if (((start >= p->start) && (start < p->start + p->length)) || + ((start + length > p->start) && + (start + length <= p->start + p->length))) { + error_setg(errp, "Overlap with existing poisoned region not supported"); + return; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + return; + } + + p = g_new0(CXLPoison, 1); + p->length = length; + p->start = start; + p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */ + + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; +} + /* For uncorrectable errors include support for multiple header recording */ void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index d574c58f9a..fd1166a610 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -3,6 +3,12 @@ #include "qapi/error.h" #include "qapi/qapi-commands-cxl.h" +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} + void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, Error **errp) diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h index c453983e83..56c9e7676e 100644 --- a/include/hw/cxl/cxl.h +++ b/include/hw/cxl/cxl.h @@ -18,6 +18,7 @@ #include "cxl_component.h" #include "cxl_device.h" +#define CXL_CACHE_LINE_SIZE 64 #define CXL_COMPONENT_REG_BAR_IDX 0 #define CXL_DEVICE_REG_BAR_IDX 2 diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 02befda0f6..32c234ea91 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -242,6 +242,18 @@ typedef struct CXLError { typedef QTAILQ_HEAD(, CXLError) CXLErrorList; +typedef struct CXLPoison { + uint64_t start, length; + uint8_t type; +#define CXL_POISON_TYPE_EXTERNAL 0x1 +#define CXL_POISON_TYPE_INTERNAL 0x2 +#define CXL_POISON_TYPE_INJECTED 0x3 + QLIST_ENTRY(CXLPoison) node; +} CXLPoison; + +typedef QLIST_HEAD(, CXLPoison) CXLPoisonList; +#define CXL_POISON_LIST_LIMIT 256 + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -264,6 +276,12 @@ struct CXLType3Dev { /* Error injection */ CXLErrorList error_list; + + /* Poison Injection - cache */ + CXLPoisonList poison_list; + unsigned int poison_list_cnt; + bool poison_list_overflowed; + uint64_t poison_list_overflow_ts; }; #define TYPE_CXL_TYPE3 "cxl-type3" @@ -289,4 +307,6 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); +void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); + #endif diff --git a/qapi/cxl.json b/qapi/cxl.json index b21c9b4c1c..ed1c7eea3a 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -5,6 +5,27 @@ # = CXL devices ## +## +# @cxl-inject-poison: +# +# Poison records indicate that a CXL memory device knows that a +# particular memory region may be corrupted. This may be because of +# locally detected errors (e.g. ECC failure) or poisoned writes +# received from other components in the system. This injection +# mechanism enables testing of the OS handling of poison records which +# may be queried via the CXL mailbox. +# +# @path: CXL type 3 device canonical QOM path +# +# @start: Start address; must be 64 byte aligned. +# +# @length: Length of poison to inject; must be a multiple of 64 bytes. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-poison', + 'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }} + ## # @CxlUncorErrorType: # From ff04b207a0525b3f77b6352ce3a2e610b11ea34f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 May 2023 18:00:09 +0100 Subject: [PATCH 03/53] hw/cxl: Add poison injection via the mailbox. Very simple implementation to allow testing of corresponding kernel code. Note that for now we track each 64 byte section independently. Whilst a valid implementation choice, it may make sense to fuse entries so as to prove out more complex corners of the kernel code. Reviewed-by: Ira Weiny Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230526170010.574-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 1f74b26ea2..6c476ad7f4 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -64,6 +64,7 @@ enum { #define SET_LSA 0x3 MEDIA_AND_POISON = 0x43, #define GET_POISON_LIST 0x0 + #define INJECT_POISON 0x1 }; /* 8.2.8.4.5.1 Command Return Codes */ @@ -472,6 +473,45 @@ static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +static CXLRetCode cmd_media_inject_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + struct inject_poison_pl { + uint64_t dpa; + }; + struct inject_poison_pl *in = (void *)cmd->payload; + uint64_t dpa = ldq_le_p(&in->dpa); + CXLPoison *p; + + QLIST_FOREACH(ent, poison_list, node) { + if (dpa >= ent->start && + dpa + CXL_CACHE_LINE_SIZE <= ent->start + ent->length) { + return CXL_MBOX_SUCCESS; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + return CXL_MBOX_INJECT_POISON_LIMIT; + } + p = g_new0(CXLPoison, 1); + + p->length = CXL_CACHE_LINE_SIZE; + p->start = dpa; + p->type = CXL_POISON_TYPE_INJECTED; + + /* + * Possible todo: Merge with existing entry if next to it and if same type + */ + QLIST_INSERT_HEAD(poison_list, p, node); + ct3d->poison_list_cnt++; + + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -501,6 +541,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", cmd_media_get_poison_list, 16, 0 }, + [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON", + cmd_media_inject_poison, 8, 0 }, }; void cxl_process_mailbox(CXLDeviceState *cxl_dstate) From 6bda41a69bdcee8ff7dcf75df2f9647ce55908ab Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 May 2023 18:00:10 +0100 Subject: [PATCH 04/53] hw/cxl: Add clear poison mailbox command support. Current implementation is very simple so many of the corner cases do not exist (e.g. fragmenting larger poison list entries) Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230526170010.574-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 82 +++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3.c | 37 +++++++++++++++++ include/hw/cxl/cxl_device.h | 1 + 3 files changed, 120 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 6c476ad7f4..e3401b6be8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -65,6 +65,7 @@ enum { MEDIA_AND_POISON = 0x43, #define GET_POISON_LIST 0x0 #define INJECT_POISON 0x1 + #define CLEAR_POISON 0x2 }; /* 8.2.8.4.5.1 Command Return Codes */ @@ -512,6 +513,85 @@ static CXLRetCode cmd_media_inject_poison(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + struct clear_poison_pl { + uint64_t dpa; + uint8_t data[64]; + }; + CXLPoison *ent; + uint64_t dpa; + + struct clear_poison_pl *in = (void *)cmd->payload; + + dpa = ldq_le_p(&in->dpa); + if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) { + return CXL_MBOX_INVALID_PA; + } + + /* Clearing a region with no poison is not an error so always do so */ + if (cvc->set_cacheline) { + if (!cvc->set_cacheline(ct3d, dpa, in->data)) { + return CXL_MBOX_INTERNAL_ERROR; + } + } + + QLIST_FOREACH(ent, poison_list, node) { + /* + * Test for contained in entry. Simpler than general case + * as clearing 64 bytes and entries 64 byte aligned + */ + if ((dpa >= ent->start) && (dpa < ent->start + ent->length)) { + break; + } + } + if (!ent) { + return CXL_MBOX_SUCCESS; + } + + QLIST_REMOVE(ent, node); + ct3d->poison_list_cnt--; + + if (dpa > ent->start) { + CXLPoison *frag; + /* Cannot overflow as replacing existing entry */ + + frag = g_new0(CXLPoison, 1); + + frag->start = ent->start; + frag->length = dpa - ent->start; + frag->type = ent->type; + + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + + if (dpa + CXL_CACHE_LINE_SIZE < ent->start + ent->length) { + CXLPoison *frag; + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + } else { + frag = g_new0(CXLPoison, 1); + + frag->start = dpa + CXL_CACHE_LINE_SIZE; + frag->length = ent->start + ent->length - frag->start; + frag->type = ent->type; + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + } + /* Any fragments have been added, free original entry */ + g_free(ent); + + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -543,6 +623,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { cmd_media_get_poison_list, 16, 0 }, [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON", cmd_media_inject_poison, 8, 0 }, + [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON", + cmd_media_clear_poison, 72, 0 }, }; void cxl_process_mailbox(CXLDeviceState *cxl_dstate) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index ab600735eb..d751803188 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -947,6 +947,42 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, */ } +static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) +{ + MemoryRegion *vmr = NULL, *pmr = NULL; + AddressSpace *as; + + if (ct3d->hostvmem) { + vmr = host_memory_backend_get_memory(ct3d->hostvmem); + } + if (ct3d->hostpmem) { + pmr = host_memory_backend_get_memory(ct3d->hostpmem); + } + + if (!vmr && !pmr) { + return false; + } + + if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) { + return false; + } + + if (vmr) { + if (dpa_offset < memory_region_size(vmr)) { + as = &ct3d->hostvmem_as; + } else { + as = &ct3d->hostpmem_as; + dpa_offset -= memory_region_size(vmr); + } + } else { + as = &ct3d->hostpmem_as; + } + + address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, + CXL_CACHE_LINE_SIZE); + return true; +} + void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) { ct3d->poison_list_overflowed = true; @@ -1168,6 +1204,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) cvc->get_lsa_size = get_lsa_size; cvc->get_lsa = get_lsa; cvc->set_lsa = set_lsa; + cvc->set_cacheline = set_cacheline; } static const TypeInfo ct3d_info = { diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 32c234ea91..73328a52cf 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -298,6 +298,7 @@ struct CXLType3Class { uint64_t offset); void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size, uint64_t offset); + bool (*set_cacheline)(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data); }; MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, From d7b84ddc3b99ae4dcac052a03817aeeab9d12514 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 30 May 2023 14:35:57 +0100 Subject: [PATCH 05/53] hw/cxl/events: Add event status register The device status register block was defined. However, there were no individual registers nor any data wired up. Define the event status register [CXL 3.0; 8.2.8.3.1] as part of the device status register block. Wire up the register and initialize the event status for each log. To support CXL 3.0 the version of the device status register block needs to be 2. Change the macro to allow for setting the version. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-device-utils.c | 43 ++++++++++++++++++++++++++++++++----- include/hw/cxl/cxl_device.h | 23 +++++++++++++++++--- include/hw/cxl/cxl_events.h | 28 ++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 include/hw/cxl/cxl_events.h diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 86e1cea8ce..517f06d869 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -41,7 +41,20 @@ static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size) static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size) { - return 0; + CXLDeviceState *cxl_dstate = opaque; + + switch (size) { + case 1: + return cxl_dstate->dev_reg_state[offset]; + case 2: + return cxl_dstate->dev_reg_state16[offset / size]; + case 4: + return cxl_dstate->dev_reg_state32[offset / size]; + case 8: + return cxl_dstate->dev_reg_state64[offset / size]; + default: + g_assert_not_reached(); + } } static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) @@ -236,7 +249,27 @@ void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate) &cxl_dstate->memory_device); } -static void device_reg_init_common(CXLDeviceState *cxl_dstate) { } +void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, + bool available) +{ + if (available) { + cxl_dstate->event_status |= (1 << log_type); + } else { + cxl_dstate->event_status &= ~(1 << log_type); + } + + ARRAY_FIELD_DP64(cxl_dstate->dev_reg_state64, CXL_DEV_EVENT_STATUS, + EVENT_STATUS, cxl_dstate->event_status); +} + +static void device_reg_init_common(CXLDeviceState *cxl_dstate) +{ + CXLEventLogType log; + + for (log = 0; log < CXL_EVENT_TYPE_MAX; log++) { + cxl_event_set_status(cxl_dstate, log, false); + } +} static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) { @@ -258,13 +291,13 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); - cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1); + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, 2); device_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MAILBOX, 2); + cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1); mailbox_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1); memdev_reg_init_common(cxl_dstate); cxl_initialize_mailbox(cxl_dstate); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 73328a52cf..16993f7098 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -13,6 +13,7 @@ #include "hw/cxl/cxl_component.h" #include "hw/pci/pci_device.h" #include "hw/register.h" +#include "hw/cxl/cxl_events.h" /* * The following is how a CXL device's Memory Device registers are laid out. @@ -86,7 +87,16 @@ typedef struct cxl_device_state { MemoryRegion device_registers; /* mmio for device capabilities array - 8.2.8.2 */ - MemoryRegion device; + struct { + MemoryRegion device; + union { + uint8_t dev_reg_state[CXL_DEVICE_STATUS_REGISTERS_LENGTH]; + uint16_t dev_reg_state16[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 2]; + uint32_t dev_reg_state32[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 4]; + uint64_t dev_reg_state64[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 8]; + }; + uint64_t event_status; + }; MemoryRegion memory_device; struct { MemoryRegion caps; @@ -141,6 +151,9 @@ REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte access FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8) FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16) +void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, + bool available); + /* * Helper macro to initialize capability headers for CXL devices. * @@ -175,7 +188,7 @@ CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE, void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); void cxl_process_mailbox(CXLDeviceState *cxl_dstate); -#define cxl_device_cap_init(dstate, reg, cap_id) \ +#define cxl_device_cap_init(dstate, reg, cap_id, ver) \ do { \ uint32_t *cap_hdrs = dstate->caps_reg_state32; \ int which = R_CXL_DEV_##reg##_CAP_HDR0; \ @@ -183,7 +196,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate); FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \ CAP_ID, cap_id); \ cap_hdrs[which] = FIELD_DP32( \ - cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \ + cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, ver); \ cap_hdrs[which + 1] = \ FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \ CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \ @@ -192,6 +205,10 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate); CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \ } while (0) +/* CXL 3.0 8.2.8.3.1 Event Status Register */ +REG64(CXL_DEV_EVENT_STATUS, 0) + FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32) + /* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */ REG32(CXL_DEV_MAILBOX_CAP, 0) FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5) diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h new file mode 100644 index 0000000000..aeb3b0590e --- /dev/null +++ b/include/hw/cxl/cxl_events.h @@ -0,0 +1,28 @@ +/* + * QEMU CXL Events + * + * Copyright (c) 2022 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_EVENTS_H +#define CXL_EVENTS_H + +/* + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + * + * Define these as the bit position for the event status register for ease of + * setting the status. + */ +typedef enum CXLEventLogType { + CXL_EVENT_TYPE_INFO = 0, + CXL_EVENT_TYPE_WARN = 1, + CXL_EVENT_TYPE_FAIL = 2, + CXL_EVENT_TYPE_FATAL = 3, + CXL_EVENT_TYPE_DYNAMIC_CAP = 4, + CXL_EVENT_TYPE_MAX +} CXLEventLogType; + +#endif /* CXL_EVENTS_H */ From 2f6b8c8f420d90579b29a96f46630e241dd2c1cc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 30 May 2023 14:35:58 +0100 Subject: [PATCH 06/53] hw/cxl: Move CXLRetCode definition to cxl_device.h Following patches will need access to the mailbox return code type so move it to the header. Reviewed-by: Ira Weiny Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 28 ---------------------------- include/hw/cxl/cxl_device.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index e3401b6be8..d7e114aaae 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -68,34 +68,6 @@ enum { #define CLEAR_POISON 0x2 }; -/* 8.2.8.4.5.1 Command Return Codes */ -typedef enum { - CXL_MBOX_SUCCESS = 0x0, - CXL_MBOX_BG_STARTED = 0x1, - CXL_MBOX_INVALID_INPUT = 0x2, - CXL_MBOX_UNSUPPORTED = 0x3, - CXL_MBOX_INTERNAL_ERROR = 0x4, - CXL_MBOX_RETRY_REQUIRED = 0x5, - CXL_MBOX_BUSY = 0x6, - CXL_MBOX_MEDIA_DISABLED = 0x7, - CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, - CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, - CXL_MBOX_FW_AUTH_FAILED = 0xa, - CXL_MBOX_FW_INVALID_SLOT = 0xb, - CXL_MBOX_FW_ROLLEDBACK = 0xc, - CXL_MBOX_FW_REST_REQD = 0xd, - CXL_MBOX_INVALID_HANDLE = 0xe, - CXL_MBOX_INVALID_PA = 0xf, - CXL_MBOX_INJECT_POISON_LIMIT = 0x10, - CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, - CXL_MBOX_ABORTED = 0x12, - CXL_MBOX_INVALID_SECURITY_STATE = 0x13, - CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, - CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, - CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, - CXL_MBOX_MAX = 0x17 -} CXLRetCode; - struct cxl_cmd; typedef CXLRetCode (*opcode_handler)(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 16993f7098..9f8ee85f8a 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -83,6 +83,34 @@ (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \ CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH) +/* 8.2.8.4.5.1 Command Return Codes */ +typedef enum { + CXL_MBOX_SUCCESS = 0x0, + CXL_MBOX_BG_STARTED = 0x1, + CXL_MBOX_INVALID_INPUT = 0x2, + CXL_MBOX_UNSUPPORTED = 0x3, + CXL_MBOX_INTERNAL_ERROR = 0x4, + CXL_MBOX_RETRY_REQUIRED = 0x5, + CXL_MBOX_BUSY = 0x6, + CXL_MBOX_MEDIA_DISABLED = 0x7, + CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, + CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, + CXL_MBOX_FW_AUTH_FAILED = 0xa, + CXL_MBOX_FW_INVALID_SLOT = 0xb, + CXL_MBOX_FW_ROLLEDBACK = 0xc, + CXL_MBOX_FW_REST_REQD = 0xd, + CXL_MBOX_INVALID_HANDLE = 0xe, + CXL_MBOX_INVALID_PA = 0xf, + CXL_MBOX_INJECT_POISON_LIMIT = 0x10, + CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, + CXL_MBOX_ABORTED = 0x12, + CXL_MBOX_INVALID_SECURITY_STATE = 0x13, + CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, + CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, + CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, + CXL_MBOX_MAX = 0x17 +} CXLRetCode; + typedef struct cxl_device_state { MemoryRegion device_registers; From 22d7e3be0714f39bae43bd0c05f6e6d149a47b13 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 30 May 2023 14:35:59 +0100 Subject: [PATCH 07/53] hw/cxl/events: Wire up get/clear event mailbox commands CXL testing is benefited from an artificial event log injection mechanism. Add an event log infrastructure to insert, get, and clear events from the various logs available on a device. Replace the stubbed out CXL Get/Clear Event mailbox commands with commands that operate on the new infrastructure. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-events.c | 217 ++++++++++++++++++++++++++++++++++++ hw/cxl/cxl-mailbox-utils.c | 40 ++++++- hw/cxl/meson.build | 1 + hw/mem/cxl_type3.c | 1 + include/hw/cxl/cxl_device.h | 25 +++++ include/hw/cxl/cxl_events.h | 55 +++++++++ 6 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 hw/cxl/cxl-events.c diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c new file mode 100644 index 0000000000..5da1b76b97 --- /dev/null +++ b/hw/cxl/cxl-events.c @@ -0,0 +1,217 @@ +/* + * CXL Event processing + * + * Copyright(C) 2023 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "qemu/typedefs.h" +#include "qemu/error-report.h" +#include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" + +/* Artificial limit on the number of events a log can hold */ +#define CXL_TEST_EVENT_OVERFLOW 8 + +static void reset_overflow(CXLEventLog *log) +{ + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; +} + +void cxl_event_init(CXLDeviceState *cxlds) +{ + CXLEventLog *log; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + log = &cxlds->event_logs[i]; + log->next_handle = 1; + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; + qemu_mutex_init(&log->lock); + QSIMPLEQ_INIT(&log->events); + } +} + +static CXLEvent *cxl_event_get_head(CXLEventLog *log) +{ + return QSIMPLEQ_FIRST(&log->events); +} + +static CXLEvent *cxl_event_get_next(CXLEvent *entry) +{ + return QSIMPLEQ_NEXT(entry, node); +} + +static int cxl_event_count(CXLEventLog *log) +{ + CXLEvent *event; + int rc = 0; + + QSIMPLEQ_FOREACH(event, &log->events, node) { + rc++; + } + + return rc; +} + +static bool cxl_event_empty(CXLEventLog *log) +{ + return QSIMPLEQ_EMPTY(&log->events); +} + +static void cxl_event_delete_head(CXLDeviceState *cxlds, + CXLEventLogType log_type, + CXLEventLog *log) +{ + CXLEvent *entry = cxl_event_get_head(log); + + reset_overflow(log); + QSIMPLEQ_REMOVE_HEAD(&log->events, node); + if (cxl_event_empty(log)) { + cxl_event_set_status(cxlds, log_type, false); + } + g_free(entry); +} + +/* + * return true if an interrupt should be generated as a result + * of inserting this event. + */ +bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, + CXLEventRecordRaw *event) +{ + uint64_t time; + CXLEventLog *log; + CXLEvent *entry; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return false; + } + + time = cxl_device_get_timestamp(cxlds); + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + if (cxl_event_count(log) >= CXL_TEST_EVENT_OVERFLOW) { + if (log->overflow_err_count == 0) { + log->first_overflow_timestamp = time; + } + log->overflow_err_count++; + log->last_overflow_timestamp = time; + return false; + } + + entry = g_new0(CXLEvent, 1); + + memcpy(&entry->data, event, sizeof(*event)); + + entry->data.hdr.handle = cpu_to_le16(log->next_handle); + log->next_handle++; + /* 0 handle is never valid */ + if (log->next_handle == 0) { + log->next_handle++; + } + entry->data.hdr.timestamp = cpu_to_le64(time); + + QSIMPLEQ_INSERT_TAIL(&log->events, entry, node); + cxl_event_set_status(cxlds, log_type, true); + + /* Count went from 0 to 1 */ + return cxl_event_count(log) == 1; +} + +CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, + uint8_t log_type, int max_recs, + uint16_t *len) +{ + CXLEventLog *log; + CXLEvent *entry; + uint16_t nr; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < max_recs; nr++) { + memcpy(&pl->records[nr], &entry->data, CXL_EVENT_RECORD_SIZE); + entry = cxl_event_get_next(entry); + } + + if (!cxl_event_empty(log)) { + pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS; + } + + if (log->overflow_err_count) { + pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW; + pl->overflow_err_count = cpu_to_le16(log->overflow_err_count); + pl->first_overflow_timestamp = cpu_to_le64(log->first_overflow_timestamp); + pl->last_overflow_timestamp = cpu_to_le64(log->last_overflow_timestamp); + } + + pl->record_count = cpu_to_le16(nr); + *len = CXL_EVENT_PAYLOAD_HDR_SIZE + (CXL_EVENT_RECORD_SIZE * nr); + + return CXL_MBOX_SUCCESS; +} + +CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *pl) +{ + CXLEventLog *log; + uint8_t log_type; + CXLEvent *entry; + int nr; + + log_type = pl->event_log; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + /* + * Must itterate the queue twice. + * "The device shall verify the event record handles specified in the input + * payload are in temporal order. If the device detects an older event + * record that will not be cleared when Clear Event Records is executed, + * the device shall return the Invalid Handle return code and shall not + * clear any of the specified event records." + * -- CXL 3.0 8.2.9.2.3 + */ + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + uint16_t handle = pl->handle[nr]; + + /* NOTE: Both handles are little endian. */ + if (handle == 0 || entry->data.hdr.handle != handle) { + return CXL_MBOX_INVALID_INPUT; + } + entry = cxl_event_get_next(entry); + } + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + cxl_event_delete_head(cxlds, log_type, log); + entry = cxl_event_get_head(log); + } + + return CXL_MBOX_SUCCESS; +} diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index d7e114aaae..3f46538048 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" #include "hw/pci/pci.h" #include "qemu/cutils.h" #include "qemu/log.h" @@ -95,11 +96,46 @@ struct cxl_cmd { return CXL_MBOX_SUCCESS; \ } -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20); -DEFINE_MAILBOX_HANDLER_NOP(events_clear_records); DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); +static CXLRetCode cmd_events_get_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLGetEventPayload *pl; + uint8_t log_type; + int max_recs; + + if (cmd->in < sizeof(log_type)) { + return CXL_MBOX_INVALID_INPUT; + } + + log_type = *((uint8_t *)cmd->payload); + + pl = (CXLGetEventPayload *)cmd->payload; + memset(pl, 0, sizeof(*pl)); + + max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) / + CXL_EVENT_RECORD_SIZE; + if (max_recs > 0xFFFF) { + max_recs = 0xFFFF; + } + + return cxl_event_get_records(cxlds, pl, log_type, max_recs, len); +} + +static CXLRetCode cmd_events_clear_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLClearEventPayload *pl; + + pl = (CXLClearEventPayload *)cmd->payload; + *len = 0; + return cxl_event_clear_records(cxlds, pl); +} + /* 8.2.9.2.1 */ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build index 1f9aa2ea1f..e261ff3881 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -5,6 +5,7 @@ system_ss.add(when: 'CONFIG_CXL', 'cxl-mailbox-utils.c', 'cxl-host.c', 'cxl-cdat.c', + 'cxl-events.c', ), if_false: files( 'cxl-host-stubs.c', diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index d751803188..ec5a384885 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -724,6 +724,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) goto err_release_cdat; } + cxl_event_init(&ct3d->cxl_dstate); return; err_release_cdat: diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 9f8ee85f8a..d3aec1bc0e 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -111,6 +111,20 @@ typedef enum { CXL_MBOX_MAX = 0x17 } CXLRetCode; +typedef struct CXLEvent { + CXLEventRecordRaw data; + QSIMPLEQ_ENTRY(CXLEvent) node; +} CXLEvent; + +typedef struct CXLEventLog { + uint16_t next_handle; + uint16_t overflow_err_count; + uint64_t first_overflow_timestamp; + uint64_t last_overflow_timestamp; + QemuMutex lock; + QSIMPLEQ_HEAD(, CXLEvent) events; +} CXLEventLog; + typedef struct cxl_device_state { MemoryRegion device_registers; @@ -161,6 +175,8 @@ typedef struct cxl_device_state { uint64_t mem_size; uint64_t pmem_size; uint64_t vmem_size; + + CXLEventLog event_logs[CXL_EVENT_TYPE_MAX]; } CXLDeviceState; /* Initialize the register block for a device */ @@ -353,6 +369,15 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); +void cxl_event_init(CXLDeviceState *cxlds); +bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, + CXLEventRecordRaw *event); +CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, + uint8_t log_type, int max_recs, + uint16_t *len); +CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, + CXLClearEventPayload *pl); + void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); #endif diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index aeb3b0590e..d4aaa894f1 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -10,6 +10,8 @@ #ifndef CXL_EVENTS_H #define CXL_EVENTS_H +#include "qemu/uuid.h" + /* * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 * @@ -25,4 +27,57 @@ typedef enum CXLEventLogType { CXL_EVENT_TYPE_MAX } CXLEventLogType; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +#define CXL_EVENT_REC_HDR_RES_LEN 0xf +typedef struct CXLEventRecordHdr { + QemuUUID id; + uint8_t length; + uint8_t flags[3]; + uint16_t handle; + uint16_t related_handle; + uint64_t timestamp; + uint8_t maint_op_class; + uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN]; +} QEMU_PACKED CXLEventRecordHdr; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +typedef struct CXLEventRecordRaw { + CXLEventRecordHdr hdr; + uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH]; +} QEMU_PACKED CXLEventRecordRaw; +#define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw)) + +/* + * Get Event Records output payload + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + */ +#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) +#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) +typedef struct CXLGetEventPayload { + uint8_t flags; + uint8_t reserved1; + uint16_t overflow_err_count; + uint64_t first_overflow_timestamp; + uint64_t last_overflow_timestamp; + uint16_t record_count; + uint8_t reserved2[0xa]; + CXLEventRecordRaw records[]; +} QEMU_PACKED CXLGetEventPayload; +#define CXL_EVENT_PAYLOAD_HDR_SIZE (sizeof(CXLGetEventPayload)) + +/* + * Clear Event Records input payload + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + */ +typedef struct CXLClearEventPayload { + uint8_t event_log; /* CXLEventLogType */ + uint8_t clear_flags; + uint8_t nr_recs; + uint8_t reserved[3]; + uint16_t handle[]; +} CXLClearEventPayload; + #endif /* CXL_EVENTS_H */ From 6676bb973ba53d60886b06213ec98fbd736d66a1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 30 May 2023 14:36:00 +0100 Subject: [PATCH 08/53] hw/cxl/events: Add event interrupt support Replace the stubbed out CXL Get/Set Event interrupt policy mailbox commands. Enable those commands to control interrupts for each of the event log types. Skip the standard input mailbox length on the Set command due to DCD being optional. Perform the checks separately. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Reviewed-by: Davidlohr Bueso Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-events.c | 33 ++++++++++- hw/cxl/cxl-mailbox-utils.c | 106 +++++++++++++++++++++++++++++------- hw/mem/cxl_type3.c | 4 +- include/hw/cxl/cxl_device.h | 6 +- include/hw/cxl/cxl_events.h | 23 ++++++++ 5 files changed, 147 insertions(+), 25 deletions(-) diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index 5da1b76b97..d161d57456 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -13,6 +13,8 @@ #include "qemu/bswap.h" #include "qemu/typedefs.h" #include "qemu/error-report.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "hw/cxl/cxl.h" #include "hw/cxl/cxl_events.h" @@ -26,7 +28,7 @@ static void reset_overflow(CXLEventLog *log) log->last_overflow_timestamp = 0; } -void cxl_event_init(CXLDeviceState *cxlds) +void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num) { CXLEventLog *log; int i; @@ -37,9 +39,16 @@ void cxl_event_init(CXLDeviceState *cxlds) log->overflow_err_count = 0; log->first_overflow_timestamp = 0; log->last_overflow_timestamp = 0; + log->irq_enabled = false; + log->irq_vec = start_msg_num++; qemu_mutex_init(&log->lock); QSIMPLEQ_INIT(&log->events); } + + /* Override -- Dynamic Capacity uses the same vector as info */ + cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP].irq_vec = + cxlds->event_logs[CXL_EVENT_TYPE_INFO].irq_vec; + } static CXLEvent *cxl_event_get_head(CXLEventLog *log) @@ -215,3 +224,25 @@ CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload * return CXL_MBOX_SUCCESS; } + +void cxl_event_irq_assert(CXLType3Dev *ct3d) +{ + CXLDeviceState *cxlds = &ct3d->cxl_dstate; + PCIDevice *pdev = &ct3d->parent_obj; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + CXLEventLog *log = &cxlds->event_logs[i]; + + if (!log->irq_enabled || cxl_event_empty(log)) { + continue; + } + + /* Notifies interrupt, legacy IRQ is not supported */ + if (msix_enabled(pdev)) { + msix_notify(pdev, log->irq_vec); + } else if (msi_enabled(pdev)) { + msi_notify(pdev, log->irq_vec); + } + } +} diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 3f46538048..02f9b5a870 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -80,25 +80,6 @@ struct cxl_cmd { uint8_t *payload; }; -#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ - uint16_t __zero##name = size; \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - *len = __zero##name; \ - memset(cmd->payload, 0, *len); \ - return CXL_MBOX_SUCCESS; \ - } -#define DEFINE_MAILBOX_HANDLER_NOP(name) \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - return CXL_MBOX_SUCCESS; \ - } - -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); -DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); - static CXLRetCode cmd_events_get_records(struct cxl_cmd *cmd, CXLDeviceState *cxlds, uint16_t *len) @@ -136,6 +117,88 @@ static CXLRetCode cmd_events_clear_records(struct cxl_cmd *cmd, return cxl_event_clear_records(cxlds, pl); } +static CXLRetCode cmd_events_get_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + policy = (CXLEventInterruptPolicy *)cmd->payload; + memset(policy, 0, sizeof(*policy)); + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + if (log->irq_enabled) { + policy->info_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + if (log->irq_enabled) { + policy->warn_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + if (log->irq_enabled) { + policy->failure_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + if (log->irq_enabled) { + policy->fatal_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + if (log->irq_enabled) { + /* Dynamic Capacity borrows the same vector as info */ + policy->dyn_cap_settings = CXL_INT_MSI_MSIX; + } + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_events_set_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + if (*len < CXL_EVENT_INT_SETTING_MIN_LEN) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + policy = (CXLEventInterruptPolicy *)cmd->payload; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + log->irq_enabled = (policy->info_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + log->irq_enabled = (policy->warn_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + log->irq_enabled = (policy->failure_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + log->irq_enabled = (policy->fatal_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + /* DCD is optional */ + if (*len < sizeof(*policy)) { + return CXL_MBOX_SUCCESS; + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + log->irq_enabled = (policy->dyn_cap_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} + /* 8.2.9.2.1 */ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, @@ -611,9 +674,10 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", - cmd_events_get_interrupt_policy, 0, 0 }, + cmd_events_get_interrupt_policy, 0, 0 }, [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", - cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE }, + cmd_events_set_interrupt_policy, + ~0, IMMEDIATE_CONFIG_CHANGE }, [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index ec5a384885..c9e347f42b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -659,7 +659,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 1; + unsigned short msix_num = 6; int i, rc; QTAILQ_INIT(&ct3d->error_list); @@ -723,8 +723,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) if (rc) { goto err_release_cdat; } + cxl_event_init(&ct3d->cxl_dstate, 2); - cxl_event_init(&ct3d->cxl_dstate); return; err_release_cdat: diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index d3aec1bc0e..1978730fba 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -121,6 +121,8 @@ typedef struct CXLEventLog { uint16_t overflow_err_count; uint64_t first_overflow_timestamp; uint64_t last_overflow_timestamp; + bool irq_enabled; + int irq_vec; QemuMutex lock; QSIMPLEQ_HEAD(, CXLEvent) events; } CXLEventLog; @@ -369,7 +371,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); -void cxl_event_init(CXLDeviceState *cxlds); +void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, CXLEventRecordRaw *event); CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, @@ -378,6 +380,8 @@ CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *pl); +void cxl_event_irq_assert(CXLType3Dev *ct3d); + void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); #endif diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index d4aaa894f1..4bf8b7aa08 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -80,4 +80,27 @@ typedef struct CXLClearEventPayload { uint16_t handle[]; } CXLClearEventPayload; +/** + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +typedef enum CXLEventIntMode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02, + CXL_INT_RES = 0x03, +} CXLEventIntMode; +#define CXL_EVENT_INT_MODE_MASK 0x3 +#define CXL_EVENT_INT_SETTING(vector) ((((uint8_t)vector & 0xf) << 4) | CXL_INT_MSI_MSIX) +typedef struct CXLEventInterruptPolicy { + uint8_t info_settings; + uint8_t warn_settings; + uint8_t failure_settings; + uint8_t fatal_settings; + uint8_t dyn_cap_settings; +} QEMU_PACKED CXLEventInterruptPolicy; +/* DCD is optional but other fields are not */ +#define CXL_EVENT_INT_SETTING_MIN_LEN 4 + #endif /* CXL_EVENTS_H */ From ea9b6d647f2f4708708d19ba1cb17d332d3eff06 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 30 May 2023 14:36:01 +0100 Subject: [PATCH 09/53] hw/cxl/events: Add injection of General Media Events To facilitate testing provide a QMP command to inject a general media event. The event can be added to the log specified. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Acked-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 111 ++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3_stubs.c | 10 ++++ include/hw/cxl/cxl_events.h | 20 +++++++ qapi/cxl.json | 74 ++++++++++++++++++++++++ 4 files changed, 215 insertions(+) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index c9e347f42b..b1618779d2 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1181,6 +1181,117 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, pcie_aer_inject_error(PCI_DEVICE(obj), &err); } +static void cxl_assign_event_header(CXLEventRecordHdr *hdr, + const QemuUUID *uuid, uint32_t flags, + uint8_t length, uint64_t timestamp) +{ + st24_le_p(&hdr->flags, flags); + hdr->length = length; + memcpy(&hdr->id, uuid, sizeof(hdr->id)); + stq_le_p(&hdr->timestamp, timestamp); +} + +static const QemuUUID gen_media_uuid = { + .data = UUID(0xfbcd0a77, 0xc260, 0x417f, + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), +}; + +#define CXL_GMER_VALID_CHANNEL BIT(0) +#define CXL_GMER_VALID_RANK BIT(1) +#define CXL_GMER_VALID_DEVICE BIT(2) +#define CXL_GMER_VALID_COMPONENT BIT(3) + +static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log) +{ + switch (log) { + case CXL_EVENT_LOG_INFORMATIONAL: + return CXL_EVENT_TYPE_INFO; + case CXL_EVENT_LOG_WARNING: + return CXL_EVENT_TYPE_WARN; + case CXL_EVENT_LOG_FAILURE: + return CXL_EVENT_TYPE_FAIL; + case CXL_EVENT_LOG_FATAL: + return CXL_EVENT_TYPE_FATAL; +/* DCD not yet supported */ + default: + return -EINVAL; + } +} +/* Component ID is device specific. Define this as a string. */ +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventGenMedia gem; + CXLEventRecordHdr *hdr = &gem.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&gem, 0, sizeof(gem)); + cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + stq_le_p(&gem.phys_addr, dpa); + gem.descriptor = descriptor; + gem.type = type; + gem.transaction_type = transaction_type; + + if (has_channel) { + gem.channel = channel; + valid_flags |= CXL_GMER_VALID_CHANNEL; + } + + if (has_rank) { + gem.rank = rank; + valid_flags |= CXL_GMER_VALID_RANK; + } + + if (has_device) { + st24_le_p(gem.device, device); + valid_flags |= CXL_GMER_VALID_DEVICE; + } + + if (component_id) { + strncpy((char *)gem.component_id, component_id, + sizeof(gem.component_id) - 1); + valid_flags |= CXL_GMER_VALID_COMPONENT; + } + + stw_le_p(&gem.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) { + cxl_event_irq_assert(ct3d); + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index fd1166a610..4dfbdf9268 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -3,6 +3,16 @@ #include "qapi/error.h" #include "qapi/qapi-commands-cxl.h" +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) {} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index 4bf8b7aa08..b189193f4c 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -103,4 +103,24 @@ typedef struct CXLEventInterruptPolicy { /* DCD is optional but other fields are not */ #define CXL_EVENT_INT_SETTING_MIN_LEN 4 +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e +typedef struct CXLEventGenMedia { + CXLEventRecordHdr hdr; + uint64_t phys_addr; + uint8_t descriptor; + uint8_t type; + uint8_t transaction_type; + uint16_t validity_flags; + uint8_t channel; + uint8_t rank; + uint8_t device[3]; + uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE]; +} QEMU_PACKED CXLEventGenMedia; + #endif /* CXL_EVENTS_H */ diff --git a/qapi/cxl.json b/qapi/cxl.json index ed1c7eea3a..d509430844 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -5,6 +5,80 @@ # = CXL devices ## +## +# @CxlEventLog: +# +# CXL has a number of separate event logs for different types of +# events. Each such event log is handled and signaled independently. +# +# @informational: Information Event Log +# +# @warning: Warning Event Log +# +# @failure: Failure Event Log +# +# @fatal: Fatal Event Log +# +# Since: 8.1 +## +{ 'enum': 'CxlEventLog', + 'data': ['informational', + 'warning', + 'failure', + 'fatal'] + } + +## +# @cxl-inject-general-media-event: +# +# Inject an event record for a General Media Event (CXL r3.0 +# 8.2.9.2.1.1). This event type is reported via one of the event logs +# specified via the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-43 General +# Media Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-43 General Media Event +# Record, Memory Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-43 +# General Media Event Record, Memory Event Type for possible +# values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-43 General Media Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @device: Bitmask that represents all devices in the rank associated +# with the memory event location. +# +# @component-id: Device specific component identifier for the event. +# May describe a field replaceable sub-component of the device. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-general-media-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', + '*device': 'uint32', '*component-id': 'str' } } + ## # @cxl-inject-poison: # From b90a324eda7113b62b558aad43e2166eb52567d2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 30 May 2023 14:36:02 +0100 Subject: [PATCH 10/53] hw/cxl/events: Add injection of DRAM events Defined in CXL r3.0 8.2.9.2.1.2 DRAM Event Record, this event provides information related to DRAM devices. Example injection command in QMP: { "execute": "cxl-inject-dram-event", "arguments": { "path": "/machine/peripheral/cxl-mem0", "log": "informational", "flags": 1, "dpa": 1000, "descriptor": 3, "type": 3, "transaction-type": 192, "channel": 3, "rank": 17, "nibble-mask": 37421234, "bank-group": 7, "bank": 11, "row": 2, "column": 77, "correction-mask": [33, 44, 55,66] }} Acked-by: Markus Armbruster Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 116 ++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3_stubs.c | 13 ++++ include/hw/cxl/cxl_events.h | 23 +++++++ qapi/cxl.json | 61 +++++++++++++++++++ 4 files changed, 213 insertions(+) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index b1618779d2..3c07b1b7a3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1196,6 +1196,11 @@ static const QemuUUID gen_media_uuid = { 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), }; +static const QemuUUID dram_uuid = { + .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, + 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), +}; + #define CXL_GMER_VALID_CHANNEL BIT(0) #define CXL_GMER_VALID_RANK BIT(1) #define CXL_GMER_VALID_DEVICE BIT(2) @@ -1292,6 +1297,117 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, } } +#define CXL_DRAM_VALID_CHANNEL BIT(0) +#define CXL_DRAM_VALID_RANK BIT(1) +#define CXL_DRAM_VALID_NIBBLE_MASK BIT(2) +#define CXL_DRAM_VALID_BANK_GROUP BIT(3) +#define CXL_DRAM_VALID_BANK BIT(4) +#define CXL_DRAM_VALID_ROW BIT(5) +#define CXL_DRAM_VALID_COLUMN BIT(6) +#define CXL_DRAM_VALID_CORRECTION_MASK BIT(7) + +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventDram dram; + CXLEventRecordHdr *hdr = &dram.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&dram, 0, sizeof(dram)); + cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + stq_le_p(&dram.phys_addr, dpa); + dram.descriptor = descriptor; + dram.type = type; + dram.transaction_type = transaction_type; + + if (has_channel) { + dram.channel = channel; + valid_flags |= CXL_DRAM_VALID_CHANNEL; + } + + if (has_rank) { + dram.rank = rank; + valid_flags |= CXL_DRAM_VALID_RANK; + } + + if (has_nibble_mask) { + st24_le_p(dram.nibble_mask, nibble_mask); + valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK; + } + + if (has_bank_group) { + dram.bank_group = bank_group; + valid_flags |= CXL_DRAM_VALID_BANK_GROUP; + } + + if (has_bank) { + dram.bank = bank; + valid_flags |= CXL_DRAM_VALID_BANK; + } + + if (has_row) { + st24_le_p(dram.row, row); + valid_flags |= CXL_DRAM_VALID_ROW; + } + + if (has_column) { + stw_le_p(&dram.column, column); + valid_flags |= CXL_DRAM_VALID_COLUMN; + } + + if (has_correction_mask) { + int count = 0; + while (correction_mask && count < 4) { + stq_le_p(&dram.correction_mask[count], + correction_mask->value); + count++; + correction_mask = correction_mask->next; + } + valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK; + } + + stw_le_p(&dram.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) { + cxl_event_irq_assert(ct3d); + } + return; +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index 4dfbdf9268..e904c5d089 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -13,6 +13,19 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, const char *component_id, Error **errp) {} +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) {} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index b189193f4c..a39e30d973 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -123,4 +123,27 @@ typedef struct CXLEventGenMedia { uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE]; } QEMU_PACKED CXLEventGenMedia; +/* + * DRAM Event Record + * CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44 + * All fields little endian. + */ +typedef struct CXLEventDram { + CXLEventRecordHdr hdr; + uint64_t phys_addr; + uint8_t descriptor; + uint8_t type; + uint8_t transaction_type; + uint16_t validity_flags; + uint8_t channel; + uint8_t rank; + uint8_t nibble_mask[3]; + uint8_t bank_group; + uint8_t bank; + uint8_t row[3]; + uint16_t column; + uint64_t correction_mask[4]; + uint8_t reserved[0x17]; +} QEMU_PACKED CXLEventDram; + #endif /* CXL_EVENTS_H */ diff --git a/qapi/cxl.json b/qapi/cxl.json index d509430844..2ad310387c 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -79,6 +79,67 @@ '*channel': 'uint8', '*rank': 'uint8', '*device': 'uint32', '*component-id': 'str' } } +## +# @cxl-inject-dram-event: +# +# Inject an event record for a DRAM Event (CXL r3.0 8.2.9.2.1.2). +# This event type is reported via one of the event logs specified via +# the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-44 DRAM +# Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-44 DRAM Event Record, Memory +# Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-44 +# DRAM Event Record, Memory Event Type for possible values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-44 DRAM Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @nibble-mask: Identifies one or more nibbles that the error affects +# +# @bank-group: Bank group of the memory event location, incorporating +# a number of Banks. +# +# @bank: Bank of the memory event location. A single bank is accessed +# per read or write of the memory. +# +# @row: Row address within the DRAM. +# +# @column: Column address within the DRAM. +# +# @correction-mask: Bits within each nibble. Used in order of bits +# set in the nibble-mask. Up to 4 nibbles may be covered. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-dram-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32', + '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32', + '*column': 'uint16', '*correction-mask': [ 'uint64' ] + }} + ## # @cxl-inject-poison: # From bafe03083255da3a053144b77a5fbc7dbf9494f3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 30 May 2023 14:36:03 +0100 Subject: [PATCH 11/53] hw/cxl/events: Add injection of Memory Module Events These events include a copy of the device health information at the time of the event. Actually using the emulated device health would require a lot of controls to manipulate that state. Given the aim of this injection code is to just test the flows when events occur, inject the contents of the device health state as well. Future work may add more sophisticate device health emulation including direct generation of these records when events occur (such as a temperature threshold being crossed). That does not reduce the usefulness of this more basic generation of the events. Acked-by: Markus Armbruster Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 62 +++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3_stubs.c | 12 +++++++ include/hw/cxl/cxl_events.h | 19 ++++++++++++ qapi/cxl.json | 53 +++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 3c07b1b7a3..4e314748d3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1201,6 +1201,11 @@ static const QemuUUID dram_uuid = { 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), }; +static const QemuUUID memory_module_uuid = { + .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, + 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), +}; + #define CXL_GMER_VALID_CHANNEL BIT(0) #define CXL_GMER_VALID_RANK BIT(1) #define CXL_GMER_VALID_DEVICE BIT(2) @@ -1408,6 +1413,63 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, return; } +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventMemoryModule module; + CXLEventRecordHdr *hdr = &module.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&module, 0, sizeof(module)); + cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + module.type = type; + module.health_status = health_status; + module.media_status = media_status; + module.additional_status = additional_status; + module.life_used = life_used; + stw_le_p(&module.temperature, temperature); + stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count); + stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count); + stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) { + cxl_event_irq_assert(ct3d); + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index e904c5d089..f3e4a9fa72 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -26,6 +26,18 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, bool has_correction_mask, uint64List *correction_mask, Error **errp) {} +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) {} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index a39e30d973..089ba2091f 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -146,4 +146,23 @@ typedef struct CXLEventDram { uint8_t reserved[0x17]; } QEMU_PACKED CXLEventDram; +/* + * Memory Module Event Record + * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45 + * All fields little endian. + */ +typedef struct CXLEventMemoryModule { + CXLEventRecordHdr hdr; + uint8_t type; + uint8_t health_status; + uint8_t media_status; + uint8_t additional_status; + uint8_t life_used; + int16_t temperature; + uint32_t dirty_shutdown_count; + uint32_t corrected_volatile_error_count; + uint32_t corrected_persistent_error_count; + uint8_t reserved[0x3d]; +} QEMU_PACKED CXLEventMemoryModule; + #endif /* CXL_EVENTS_H */ diff --git a/qapi/cxl.json b/qapi/cxl.json index 2ad310387c..d5b5293eb5 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -140,6 +140,59 @@ '*column': 'uint16', '*correction-mask': [ 'uint64' ] }} +## +# @cxl-inject-memory-module-event: +# +# Inject an event record for a Memory Module Event (CXL r3.0 +# 8.2.9.2.1.3). This event includes a copy of the Device Health +# info at the time of the event. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event Log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module +# Event Record for bit definitions for bit definiions. +# +# @health-status: Overall health summary bitmap. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Health Status for bit +# definitions. +# +# @media-status: Overall media health summary. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Media Status for bit +# definitions. +# +# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output +# Payload, Additional Status for subfield definitions. +# +# @life-used: Percentage (0-100) of factory expected life span. +# +# @temperature: Device temperature in degrees Celsius. +# +# @dirty-shutdown-count: Number of times the device has been unable +# to determine whether data loss may have occurred. +# +# @corrected-volatile-error-count: Total number of correctable errors +# in volatile memory. +# +# @corrected-persistent-error-count: Total number of correctable +# errors in persistent memory +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-memory-module-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', + 'type': 'uint8', 'health-status': 'uint8', + 'media-status': 'uint8', 'additional-status': 'uint8', + 'life-used': 'uint8', 'temperature' : 'int16', + 'dirty-shutdown-count': 'uint32', + 'corrected-volatile-error-count': 'uint32', + 'corrected-persistent-error-count': 'uint32' + }} + ## # @cxl-inject-poison: # From 5c33f9783ace0b5e077060b220978d94fecb3e81 Mon Sep 17 00:00:00 2001 From: Gowrishankar Muthukrishnan Date: Tue, 16 May 2023 14:01:39 +0530 Subject: [PATCH 12/53] cryptodev-vhost-user: add asymmetric crypto support Add asymmetric crypto support in vhost_user backend. Signed-off-by: Gowrishankar Muthukrishnan Message-Id: <20230516083139.2349744-1-gmuthukrishn@marvell.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/cryptodev-vhost-user.c | 9 ++-- hw/virtio/vhost-user.c | 75 +++++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c index b1d9eb735f..c3283ba84a 100644 --- a/backends/cryptodev-vhost-user.c +++ b/backends/cryptodev-vhost-user.c @@ -232,9 +232,9 @@ static void cryptodev_vhost_user_init( backend->conf.max_auth_key_len = VHOST_USER_MAX_AUTH_KEY_LEN; } -static int64_t cryptodev_vhost_user_sym_create_session( +static int64_t cryptodev_vhost_user_crypto_create_session( CryptoDevBackend *backend, - CryptoDevBackendSymSessionInfo *sess_info, + CryptoDevBackendSessionInfo *sess_info, uint32_t queue_index, Error **errp) { CryptoDevBackendClient *cc = @@ -266,18 +266,17 @@ static int cryptodev_vhost_user_create_session( void *opaque) { uint32_t op_code = sess_info->op_code; - CryptoDevBackendSymSessionInfo *sym_sess_info; int64_t ret; Error *local_error = NULL; int status; switch (op_code) { case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: + case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION: case VIRTIO_CRYPTO_HASH_CREATE_SESSION: case VIRTIO_CRYPTO_MAC_CREATE_SESSION: case VIRTIO_CRYPTO_AEAD_CREATE_SESSION: - sym_sess_info = &sess_info->u.sym_sess_info; - ret = cryptodev_vhost_user_sym_create_session(backend, sym_sess_info, + ret = cryptodev_vhost_user_crypto_create_session(backend, sess_info, queue_index, &local_error); break; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 74a2a28663..2ad75a7964 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" @@ -163,13 +164,24 @@ typedef struct VhostUserConfig { #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 +#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 typedef struct VhostUserCryptoSession { + uint64_t op_code; + union { + struct { + CryptoDevBackendSymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; + uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; + } sym; + struct { + CryptoDevBackendAsymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; + } asym; + } u; + /* session id for success, -1 on errors */ int64_t session_id; - CryptoDevBackendSymSessionInfo session_setup_data; - uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; - uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; } VhostUserCryptoSession; static VhostUserConfig c __attribute__ ((unused)); @@ -2357,7 +2369,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, int ret; bool crypto_session = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); - CryptoDevBackendSymSessionInfo *sess_info = session_info; + CryptoDevBackendSessionInfo *backend_info = session_info; VhostUserMsg msg = { .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, .hdr.flags = VHOST_USER_VERSION, @@ -2371,16 +2383,53 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, return -ENOTSUP; } - memcpy(&msg.payload.session.session_setup_data, sess_info, - sizeof(CryptoDevBackendSymSessionInfo)); - if (sess_info->key_len) { - memcpy(&msg.payload.session.key, sess_info->cipher_key, - sess_info->key_len); - } - if (sess_info->auth_key_len > 0) { - memcpy(&msg.payload.session.auth_key, sess_info->auth_key, - sess_info->auth_key_len); + if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { + CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.asym.session_setup_data, sess, + sizeof(CryptoDevBackendAsymSessionInfo)); + if (sess->keylen) { + keylen = sizeof(msg.payload.session.u.asym.key); + if (sess->keylen > keylen) { + error_report("Unsupported asymmetric key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.asym.key, sess->key, + sess->keylen); + } + } else { + CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.sym.session_setup_data, sess, + sizeof(CryptoDevBackendSymSessionInfo)); + if (sess->key_len) { + keylen = sizeof(msg.payload.session.u.sym.key); + if (sess->key_len > keylen) { + error_report("Unsupported cipher key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, + sess->key_len); + } + + if (sess->auth_key_len > 0) { + keylen = sizeof(msg.payload.session.u.sym.auth_key); + if (sess->auth_key_len > keylen) { + error_report("Unsupported auth key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, + sess->auth_key_len); + } } + + msg.payload.session.op_code = backend_info->op_code; + msg.payload.session.session_id = backend_info->session_id; ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { error_report("vhost_user_write() return %d, create session failed", From b3b408ffb9291e887029051a522a2c968a816d22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:35 +0200 Subject: [PATCH 13/53] softmmu: Introduce qemu_target_page_mask() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since TARGET_PAGE_MASK is poisoned in target-agnostic code, introduce the qemu_target_page_mask() helper to get this value from target-agnostic code at runtime. Reviewed-by: Thomas Huth Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230524093744.88442-2-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée --- include/exec/target_page.h | 1 + softmmu/physmem.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/exec/target_page.h b/include/exec/target_page.h index bbf37aea17..98ffbb5c23 100644 --- a/include/exec/target_page.h +++ b/include/exec/target_page.h @@ -15,6 +15,7 @@ #define EXEC_TARGET_PAGE_H size_t qemu_target_page_size(void); +int qemu_target_page_mask(void); int qemu_target_page_bits(void); int qemu_target_page_bits_min(void); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 6bdd944fe8..bda475a719 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3359,6 +3359,11 @@ size_t qemu_target_page_size(void) return TARGET_PAGE_SIZE; } +int qemu_target_page_mask(void) +{ + return TARGET_PAGE_MASK; +} + int qemu_target_page_bits(void) { return TARGET_PAGE_BITS; From 8f691f1cb76f0591874e91eb18569a150bb3cfe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:36 +0200 Subject: [PATCH 14/53] hw/scsi: Introduce VHOST_SCSI_COMMON symbol in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of adding 'vhost-scsi-common.c' twice (for VHOST_SCSI and VHOST_USER_SCSI), have it depend on VHOST_SCSI_COMMON, selected by both symbols. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Thomas Huth Reviewed-by: Richard Henderson Message-Id: <20230524093744.88442-3-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée --- hw/scsi/Kconfig | 6 ++++++ hw/scsi/meson.build | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/scsi/Kconfig b/hw/scsi/Kconfig index e7b34dc8e2..1feab84c4c 100644 --- a/hw/scsi/Kconfig +++ b/hw/scsi/Kconfig @@ -48,13 +48,19 @@ config VIRTIO_SCSI depends on VIRTIO select SCSI +config VHOST_SCSI_COMMON + bool + depends on VIRTIO + config VHOST_SCSI bool default y + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_SCSI bool # Only PCI devices are provided for now default y if VIRTIO_PCI + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_USER && LINUX diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index 7a1e7f13f0..2a005420d2 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -17,8 +17,10 @@ specific_scsi_ss = ss.source_set() virtio_scsi_ss = ss.source_set() virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-scsi.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-user-scsi.c')) + +virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) From 04ca164ad3fc5309732e482534410a36b4713b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:37 +0200 Subject: [PATCH 15/53] hw/scsi: Rearrange meson.build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will modify this file shortly. Re-arrange it slightly first, declaring source sets first. No logical change. Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230524093744.88442-4-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth Reviewed-by: Alex Bennée --- hw/scsi/meson.build | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index 2a005420d2..d9b5673c14 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -1,4 +1,7 @@ scsi_ss = ss.source_set() +specific_scsi_ss = ss.source_set() +virtio_scsi_ss = ss.source_set() + scsi_ss.add(files( 'emulation.c', 'scsi-bus.c', @@ -11,18 +14,15 @@ scsi_ss.add(when: 'CONFIG_LSI_SCSI_PCI', if_true: files('lsi53c895a.c')) scsi_ss.add(when: 'CONFIG_MEGASAS_SCSI_PCI', if_true: files('megasas.c')) scsi_ss.add(when: 'CONFIG_MPTSAS_SCSI_PCI', if_true: files('mptsas.c', 'mptconfig.c', 'mptendian.c')) scsi_ss.add(when: 'CONFIG_VMW_PVSCSI_SCSI_PCI', if_true: files('vmw_pvscsi.c')) -system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) -specific_scsi_ss = ss.source_set() - -virtio_scsi_ss = ss.source_set() virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) - virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) + specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) +system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) specific_ss.add_all(when: 'CONFIG_SCSI', if_true: specific_scsi_ss) From 5268f5f5224ed67de63e0b91b05e410a97dbb976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:38 +0200 Subject: [PATCH 16/53] hw/scsi: Rename target-specific source set as 'specific_virtio_scsi_ss' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following the SCSI variable named '[specific_]scsi_ss', rename the target-specific VirtIO/SCSI set prefixed with 'specific_'. This will help when adding target-agnostic VirtIO/SCSI set in few commits. No logical change. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230524093744.88442-5-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth --- hw/scsi/meson.build | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index d9b5673c14..43746700be 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -1,6 +1,6 @@ scsi_ss = ss.source_set() specific_scsi_ss = ss.source_set() -virtio_scsi_ss = ss.source_set() +specific_virtio_scsi_ss = ss.source_set() scsi_ss.add(files( 'emulation.c', @@ -15,12 +15,12 @@ scsi_ss.add(when: 'CONFIG_MEGASAS_SCSI_PCI', if_true: files('megasas.c')) scsi_ss.add(when: 'CONFIG_MPTSAS_SCSI_PCI', if_true: files('mptsas.c', 'mptconfig.c', 'mptendian.c')) scsi_ss.add(when: 'CONFIG_VMW_PVSCSI_SCSI_PCI', if_true: files('vmw_pvscsi.c')) -virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) +specific_virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) +specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) +specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) +specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) -specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) +specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: specific_virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) From 6df956299a751c1eff03a8ea791a0182a688a7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:39 +0200 Subject: [PATCH 17/53] hw/virtio: Introduce VHOST_VSOCK_COMMON symbol in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of adding 'vhost-vsock-common.c' twice (for VHOST_VSOCK and VHOST_USER_VSOCK), have it depend on VHOST_VSOCK_COMMON, selected by both symbols. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Thomas Huth Message-Id: <20230524093744.88442-6-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée Reviewed-by: Stefano Garzarella --- hw/virtio/Kconfig | 6 ++++++ hw/virtio/meson.build | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 89e9e426d8..de7a35429a 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -56,14 +56,20 @@ config VIRTIO_MEM depends on VIRTIO_MEM_SUPPORTED select MEM_DEVICE +config VHOST_VSOCK_COMMON + bool + depends on VIRTIO + config VHOST_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_USER config VHOST_USER_I2C diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index e83c37fffd..a6ea5beae7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -23,8 +23,9 @@ specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-bal specific_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 'vhost-vsock-common.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c', 'vhost-vsock-common.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) From 21e6435066bd3818969b520b69415ba62a85cd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:40 +0200 Subject: [PATCH 18/53] hw/virtio/virtio-mem: Use qemu_ram_get_fd() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid accessing RAMBlock internals, use the provided qemu_ram_get_fd() getter to get the file descriptor. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Reviewed-by: Richard Henderson Message-Id: <20230524093744.88442-7-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée --- hw/virtio/virtio-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 538b695c29..74e63bd47a 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -135,7 +135,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) * anonymous RAM. In any other case, reading unplugged *can* populate a * fresh page, consuming actual memory. */ - return !qemu_ram_is_shared(rb) && rb->fd < 0 && + return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 && qemu_ram_pagesize(rb) == qemu_real_host_page_size(); } #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ From a64da64ac671e81f3bae0fbf4e99fe7f8b65b668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:41 +0200 Subject: [PATCH 19/53] hw/virtio/vhost-vsock: Include missing 'virtio/virtio-bus.h' header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of having "virtio/virtio-bus.h" implicitly included, explicitly include it, to avoid when rearranging headers: hw/virtio/vhost-vsock-common.c: In function ‘vhost_vsock_common_start’: hw/virtio/vhost-vsock-common.c:51:5: error: unknown type name ‘VirtioBusClass’; did you mean ‘VirtioDeviceClass’? 51 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | ^~~~~~~~~~~~~~ | VirtioDeviceClass hw/virtio/vhost-vsock-common.c:51:25: error: implicit declaration of function ‘VIRTIO_BUS_GET_CLASS’; did you mean ‘VIRTIO_DEVICE_CLASS’? [-Werror=implicit-function-declaration] 51 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | ^~~~~~~~~~~~~~~~~~~~ | VIRTIO_DEVICE_CLASS Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Thomas Huth Message-Id: <20230524093744.88442-8-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée Reviewed-by: Stefano Garzarella --- hw/virtio/vhost-vsock-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index d2b5519d5a..e89af9b329 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" +#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" From e414ed2c47da70381a66846cf9353f7612daa4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:42 +0200 Subject: [PATCH 20/53] hw/virtio/virtio-iommu: Use target-agnostic qemu_target_page_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to have virtio-iommu.c become target-agnostic, we need to avoid using TARGET_PAGE_MASK. Get it with the qemu_target_page_mask() helper. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Eric Auger Message-Id: <20230524093744.88442-9-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée --- hw/virtio/virtio-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1cd258135d..85905a9e3d 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/iov.h" +#include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" @@ -1164,7 +1165,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) * in vfio realize */ s->config.bypass = s->boot_bypass; - s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.page_size_mask = qemu_target_page_mask(); s->config.input_range.end = UINT64_MAX; s->config.domain_range.end = UINT32_MAX; s->config.probe_size = VIOMMU_PROBE_SIZE; From 4ee4667ded5841ff9278d4e4a4c765a3220023bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:43 +0200 Subject: [PATCH 21/53] hw/virtio: Remove unnecessary 'virtio-access.h' header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of these files use the VirtIO Load/Store API declared by "hw/virtio/virtio-access.h". This header probably crept in via copy/pasting, remove it. Note, "virtio-access.h" is target-specific, so any file including it also become tainted as target-specific. Signed-off-by: Philippe Mathieu-Daudé Acked-by: Richard Henderson Tested-by: Thomas Huth Message-Id: <20230524093744.88442-10-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Alex Bennée --- hw/block/dataplane/virtio-blk.c | 1 - hw/s390x/virtio-ccw.c | 1 - hw/scsi/vhost-scsi.c | 1 - hw/scsi/vhost-user-scsi.c | 1 - hw/scsi/virtio-scsi-dataplane.c | 1 - hw/virtio/vdpa-dev.c | 1 - hw/virtio/vhost-vdpa.c | 1 - hw/virtio/vhost-vsock-common.c | 1 - hw/virtio/vhost.c | 1 - hw/virtio/virtio-crypto.c | 1 - hw/virtio/virtio-iommu.c | 1 - hw/virtio/virtio-mem.c | 1 - 12 files changed, 12 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b90456c08c..c227b39408 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -19,7 +19,6 @@ #include "qemu/main-loop.h" #include "qemu/thread.h" #include "qemu/error-report.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-blk.h" #include "virtio-blk.h" #include "block/aio.h" diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index f44de1a8c1..17c548b84f 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -22,7 +22,6 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-bus.h" #include "hw/s390x/adapter.h" #include "hw/s390x/s390_flic.h" diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 6a0fd0dfb1..443f67daa4 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/fw-path-provider.h" #include "hw/qdev-properties.h" #include "qemu/cutils.h" diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index b7a71a802c..ee99b19e7a 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost-backend.h" #include "hw/virtio/vhost-user-scsi.h" #include "hw/virtio/virtio.h" -#include "hw/virtio/virtio-access.h" #include "chardev/char-fe.h" #include "sysemu/sysemu.h" diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index d55de4c8ca..1e684beebe 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -19,7 +19,6 @@ #include "hw/scsi/scsi.h" #include "scsi/constants.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" /* Context: QEMU global mutex held */ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 01b41eb0f1..e08e830006 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -25,7 +25,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/vdpa-dev.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index b3094e8a8b..3c575a9a6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -26,7 +26,6 @@ #include "cpu.h" #include "trace.h" #include "qapi/error.h" -#include "hw/virtio/virtio-access.h" /* * Return one past the end of the end of section. Be careful with uint64_t diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index e89af9b329..321262f6b3 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -12,7 +12,6 @@ #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost.h" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 23da579ce2..7f3c727777 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -23,7 +23,6 @@ #include "qemu/log.h" #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index c729a1f79e..a6d7e1e8ec 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -21,7 +21,6 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-crypto.h" #include "hw/qdev-properties.h" -#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/virtio_ids.h" #include "sysemu/cryptodev-vhost.h" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 85905a9e3d..1bbad23f4a 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -32,7 +32,6 @@ #include "standard-headers/linux/virtio_ids.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-iommu.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci.h" diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 74e63bd47a..12ea58d5ad 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -20,7 +20,6 @@ #include "sysemu/reset.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" From 7a0903f7ea8ac7e5b3191c9a2cfd1751b153f48c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 24 May 2023 11:37:44 +0200 Subject: [PATCH 22/53] hw/virtio: Build various target-agnostic objects just once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit remove the unnecessary "virtio-access.h" header. These files no longer have target-specific dependency. Move them to the generic 'softmmu_ss' source set. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Thomas Huth Message-Id: <20230524093744.88442-11-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/dataplane/meson.build | 2 +- hw/scsi/meson.build | 10 +++++++--- hw/virtio/meson.build | 11 ++++++----- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 78d7ac1a11..025b3b061b 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) +system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index 43746700be..bb7d289aa0 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -1,5 +1,6 @@ scsi_ss = ss.source_set() specific_scsi_ss = ss.source_set() +virtio_scsi_ss = ss.source_set() specific_virtio_scsi_ss = ss.source_set() scsi_ss.add(files( @@ -15,12 +16,15 @@ scsi_ss.add(when: 'CONFIG_MEGASAS_SCSI_PCI', if_true: files('megasas.c')) scsi_ss.add(when: 'CONFIG_MPTSAS_SCSI_PCI', if_true: files('mptsas.c', 'mptconfig.c', 'mptendian.c')) scsi_ss.add(when: 'CONFIG_VMW_PVSCSI_SCSI_PCI', if_true: files('vmw_pvscsi.c')) -specific_virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) +virtio_scsi_ss.add(files('virtio-scsi-dataplane.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) + +specific_virtio_scsi_ss.add(files('virtio-scsi.c')) specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) -specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) -specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: specific_virtio_scsi_ss) +scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index a6ea5beae7..f32b22f61b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -2,13 +2,18 @@ softmmu_virtio_ss = ss.source_set() softmmu_virtio_ss.add(files('virtio-bus.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) if have_vhost - specific_virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) + softmmu_virtio_ss.add(files('vhost.c')) + specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user specific_virtio_ss.add(files('vhost-user.c')) endif @@ -20,20 +25,16 @@ else endif specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) From 1e3ffb34f764f8ac4c003b2b2e6a775b2b073a16 Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Mon, 29 May 2023 17:13:32 +0530 Subject: [PATCH 23/53] vhost: release memory_listener object in error path vhost_dev_start function does not release memory_listener object in case of an error. This may crash the guest when vhost is unable to set memory table: stack trace of thread 125653: Program terminated with signal SIGSEGV, Segmentation fault #0 memory_listener_register (qemu-kvm + 0x6cda0f) #1 vhost_dev_start (qemu-kvm + 0x699301) #2 vhost_net_start (qemu-kvm + 0x45b03f) #3 virtio_net_set_status (qemu-kvm + 0x665672) #4 qmp_set_link (qemu-kvm + 0x548fd5) #5 net_vhost_user_event (qemu-kvm + 0x552c45) #6 tcp_chr_connect (qemu-kvm + 0x88d473) #7 tcp_chr_new_client (qemu-kvm + 0x88cf83) #8 tcp_chr_accept (qemu-kvm + 0x88b429) #9 qio_net_listener_channel_func (qemu-kvm + 0x7ac07c) #10 g_main_context_dispatch (libglib-2.0.so.0 + 0x54e2f) Release memory_listener objects in the error path. Signed-off-by: Prasad Pandit Message-Id: <20230529114333.31686-2-ppandit@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Xu Fixes: c471ad0e9b ("vhost_net: device IOTLB support") Cc: qemu-stable@nongnu.org Acked-by: Jason Wang --- hw/virtio/vhost.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 7f3c727777..7e1f556994 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2003,6 +2003,9 @@ fail_vq: } fail_mem: + if (vhost_dev_has_iommu(hdev)) { + memory_listener_unregister(&hdev->iommu_listener); + } fail_features: vdev->vhost_started = false; hdev->started = false; From 77ece20ba04582d94c345ac0107ddff2fd18d27a Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Mon, 29 May 2023 17:13:33 +0530 Subject: [PATCH 24/53] vhost: release virtqueue objects in error path vhost_dev_start function does not release virtqueue objects when event_notifier_init() function fails. Release virtqueue objects and log a message about function failure. Signed-off-by: Prasad Pandit Message-Id: <20230529114333.31686-3-ppandit@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Fixes: f9a09ca3ea ("vhost: add support for configure interrupt") Reviewed-by: Peter Xu Cc: qemu-stable@nongnu.org Acked-by: Jason Wang --- hw/virtio/vhost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 7e1f556994..fb7abc9769 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1941,7 +1941,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) r = event_notifier_init( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); if (r < 0) { - return r; + VHOST_OPS_DEBUG(r, "event_notifier_init failed"); + goto fail_vq; } event_notifier_test_and_clear( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); From 8eb85fb5ac60bfb6ee4c729cc087078d490670c4 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 22 May 2023 23:17:40 +0300 Subject: [PATCH 25/53] pci: ROM preallocation for incoming migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On incoming migration we have the following sequence to load option ROM: 1. On device realize we do normal load ROM from the file 2. Than, on incoming migration we rewrite ROM from the incoming RAM block. If sizes mismatch we fail, like this: Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument This is not ideal when we migrate to updated distribution: we have to keep old ROM files in new distribution and be careful around romfile property to load correct ROM file. Which is loaded actually just to allocate the ROM with correct length. Note, that romsize property doesn't really help: if we try to specify it when default romfile is larger, it fails with something like: romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536 Let's just ignore ROM file when romsize is specified and we are in incoming migration state. In other words, we need only to preallocate ROM of specified size, local ROM file is unrelated. This way: If romsize was specified on source, we just use same commandline as on source, and migration will work independently of local ROM files on target. If romsize was not specified on source (and we have mismatching local ROM file on target host), we have to specify romsize on target to match source romsize. romfile parameter may be kept same as on source or may be dropped, the file is not loaded anyway. As a bonus we avoid extra reading from ROM file on target. Note: when we don't have romsize parameter on source command line and need it for target, it may be calculated as aligned up to power of two size of ROM file on source (if we know, which file is it) or, alternatively it may be retrieved from source QEMU by QMP qom-get command, like { "execute": "qom-get", "arguments": { "path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]", "property": "size" } } Note: we have extra initialization of size variable to zero in pci_add_option_rom to avoid false-positive "error: ‘size’ may be used uninitialized" Suggested-by: Michael S. Tsirkin Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: David Hildenbrand Reviewed-by: Juan Quintela Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 79 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 33 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bf38905b7d..e2eb4c3b4a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -36,6 +36,7 @@ #include "migration/vmstate.h" #include "net/net.h" #include "sysemu/numa.h" +#include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/loader.h" #include "qemu/error-report.h" @@ -2308,12 +2309,18 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **errp) { - int64_t size; + int64_t size = 0; g_autofree char *path = NULL; - void *ptr; char name[32]; const VMStateDescription *vmsd; + /* + * In case of incoming migration ROM will come with migration stream, no + * reason to load the file. Neither we want to fail if local ROM file + * mismatches with specified romsize. + */ + bool load_file = !runstate_check(RUN_STATE_INMIGRATE); + if (!pdev->romfile || !strlen(pdev->romfile)) { return; } @@ -2343,32 +2350,35 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } - path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); - if (path == NULL) { - path = g_strdup(pdev->romfile); - } + if (load_file || pdev->romsize == -1) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); + if (path == NULL) { + path = g_strdup(pdev->romfile); + } - size = get_image_size(path); - if (size < 0) { - error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); - return; - } else if (size == 0) { - error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - return; - } else if (size > 2 * GiB) { - error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", - pdev->romfile); - return; - } - if (pdev->romsize != -1) { - if (size > pdev->romsize) { - error_setg(errp, "romfile \"%s\" (%u bytes) " - "is too large for ROM size %u", - pdev->romfile, (uint32_t)size, pdev->romsize); + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + return; + } else if (size > 2 * GiB) { + error_setg(errp, + "romfile \"%s\" too large (size cannot exceed 2 GiB)", + pdev->romfile); return; } - } else { - pdev->romsize = pow2ceil(size); + if (pdev->romsize != -1) { + if (size > pdev->romsize) { + error_setg(errp, "romfile \"%s\" (%u bytes) " + "is too large for ROM size %u", + pdev->romfile, (uint32_t)size, pdev->romsize); + return; + } + } else { + pdev->romsize = pow2ceil(size); + } } vmsd = qdev_get_vmsd(DEVICE(pdev)); @@ -2379,15 +2389,18 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); - ptr = memory_region_get_ram_ptr(&pdev->rom); - if (load_image_size(path, ptr, size) < 0) { - error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); - return; - } + if (load_file) { + void *ptr = memory_region_get_ram_ptr(&pdev->rom); - if (is_default_rom) { - /* Only the default rom images will be patched (if needed). */ - pci_patch_ids(pdev, ptr, size); + if (load_image_size(path, ptr, size) < 0) { + error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); + return; + } + + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } } pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); From 25c893037b89ddd4e42927a2a9b524dbbc0c34a3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 23 May 2023 20:30:36 +0200 Subject: [PATCH 26/53] virtio-mem: Simplify bitmap handling and virtio_mem_set_block_state() Let's separate plug and unplug handling to prepare for future changes and make the code a bit easier to read -- working on block states (plugged/unplugged) instead of on a bitmap. Cc: "Michael S. Tsirkin" Cc: Gavin Shan Signed-off-by: David Hildenbrand Message-Id: <20230523183036.517957-1-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mem.c | 112 +++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 12ea58d5ad..ec0ae32589 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -398,33 +398,46 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) } } -static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) { const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; unsigned long found_bit; /* We fake a shorter bitmap to avoid searching too far. */ - if (plugged) { - found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); - } else { - found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); - } + found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); return found_bit > last_bit; } -static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) +{ + const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; + unsigned long found_bit; + + /* We fake a shorter bitmap to avoid searching too far. */ + found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); + return found_bit > last_bit; +} + +static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) { const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long nbits = size / vmem->block_size; - if (plugged) { - bitmap_set(vmem->bitmap, bit, nbits); - } else { - bitmap_clear(vmem->bitmap, bit, nbits); - } + bitmap_set(vmem->bitmap, bit, nbits); +} + +static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) +{ + const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long nbits = size / vmem->block_size; + + bitmap_clear(vmem->bitmap, bit, nbits); } static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, @@ -474,6 +487,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, { const uint64_t offset = start_gpa - vmem->addr; RAMBlock *rb = vmem->memdev->mr.ram_block; + int ret = 0; if (virtio_mem_is_busy()) { return -EBUSY; @@ -484,42 +498,43 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, return -EBUSY; } virtio_mem_notify_unplug(vmem, offset, size); - } else { - int ret = 0; + virtio_mem_set_range_unplugged(vmem, start_gpa, size); + return 0; + } - if (vmem->prealloc) { - void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; - int fd = memory_region_get_fd(&vmem->memdev->mr); - Error *local_err = NULL; + if (vmem->prealloc) { + void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; + int fd = memory_region_get_fd(&vmem->memdev->mr); + Error *local_err = NULL; - qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); - if (local_err) { - static bool warned; + qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); + if (local_err) { + static bool warned; - /* - * Warn only once, we don't want to fill the log with these - * warnings. - */ - if (!warned) { - warn_report_err(local_err); - warned = true; - } else { - error_free(local_err); - } - ret = -EBUSY; + /* + * Warn only once, we don't want to fill the log with these + * warnings. + */ + if (!warned) { + warn_report_err(local_err); + warned = true; + } else { + error_free(local_err); } - } - if (!ret) { - ret = virtio_mem_notify_plug(vmem, offset, size); - } - - if (ret) { - /* Could be preallocation or a notifier populated memory. */ - ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); - return -EBUSY; + ret = -EBUSY; } } - virtio_mem_set_bitmap(vmem, start_gpa, size, plug); + + if (!ret) { + ret = virtio_mem_notify_plug(vmem, offset, size); + } + if (ret) { + /* Could be preallocation or a notifier populated memory. */ + ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); + return -EBUSY; + } + + virtio_mem_set_range_plugged(vmem, start_gpa, size); return 0; } @@ -538,7 +553,8 @@ static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, } /* test if really all blocks are in the opposite state */ - if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { + if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) || + (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) { return VIRTIO_MEM_RESP_ERROR; } @@ -651,9 +667,9 @@ static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, return; } - if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { + if (virtio_mem_is_range_plugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); - } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { + } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); } else { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); @@ -1372,7 +1388,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, return false; } - return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); + return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa); } struct VirtIOMEMReplayData { From 0f2bb0bf38b04e9dfdc32ce5c762dd317f5a4c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 26 May 2023 17:31:42 +0200 Subject: [PATCH 27/53] vdpa: return errno in vhost_vdpa_get_vring_group error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to tell in the caller, as some errors are expected in a normal workflow. In particular, parent drivers in recent kernels with VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, -ENOTSUP is returned. This is the case of vp_vdpa in Linux 6.2. Next patches in this series will use that information to know if it must abort or not. Also, next patches return properly an errp instead of printing with error_report. Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Message-Id: <20230526153143.470745-2-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 37cdc84562..3fb833fe76 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -362,6 +362,14 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; +/** + * Get vring virtqueue group + * + * @device_fd vdpa device fd + * @vq_index Virtqueue index + * + * Return -errno in case of error, or vq group if success. + */ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) { struct vhost_vring_state state = { @@ -370,6 +378,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); if (unlikely(r < 0)) { + r = -errno; error_report("Cannot get VQ %u group: %s", vq_index, g_strerror(errno)); return r; From 152128d646973ed298d41dafd7a5bccff43336c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 26 May 2023 17:31:43 +0200 Subject: [PATCH 28/53] vdpa: move CVQ isolation check to net_init_vhost_vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evaluating it at start time instead of initialization time may make the guest capable of dynamically adding or removing migration blockers. Also, moving to initialization reduces the number of ioctls in the migration, reducing failure possibilities. As a drawback we need to check for CVQ isolation twice: one time with no MQ negotiated and another one acking it, as long as the device supports it. This is because Vring ASID / group management is based on vq indexes, but we don't know the index of CVQ before negotiating MQ. Signed-off-by: Eugenio Pérez Message-Id: <20230526153143.470745-3-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- net/vhost-vdpa.c | 157 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 44 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 3fb833fe76..46778d5313 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { /* The device always have SVQ enabled */ bool always_svq; + + /* The device can isolate CVQ in its own ASID */ + bool cvq_isolated; + bool started; } VhostVDPAState; @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; -/** - * Get vring virtqueue group - * - * @device_fd vdpa device fd - * @vq_index Virtqueue index - * - * Return -errno in case of error, or vq group if success. - */ -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, + Error **errp) { struct vhost_vring_state state = { .index = vq_index, @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) if (unlikely(r < 0)) { r = -errno; - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); return r; } @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) { VhostVDPAState *s, *s0; struct vhost_vdpa *v; - uint64_t backend_features; int64_t cvq_group; - int cvq_index, r; + int r; + Error *err = NULL; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -502,40 +498,21 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) /* * If we early return in these cases SVQ will not be enabled. The migration * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. - * - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev - * yet. */ - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); - if (unlikely(r < 0)) { - error_report("Cannot get vdpa backend_features: %s(%d)", - g_strerror(errno), errno); - return -1; - } - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { return 0; } - /* - * Check if all the virtqueues of the virtio device are in a different vq - * than the last vq. VQ group of last group passed in cvq_group. - */ - cvq_index = v->dev->vq_index_end - 1; - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); - if (unlikely(cvq_group < 0)) { - return cvq_group; + if (!s->cvq_isolated) { + return 0; } - for (int i = 0; i < cvq_index; ++i) { - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); - if (unlikely(group < 0)) { - return group; - } - - if (group == cvq_group) { - return 0; - } + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + v->dev->vq_index_end - 1, + &err); + if (unlikely(cvq_group < 0)) { + error_report_err(err); + return cvq_group; } r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); @@ -799,6 +776,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { .avail_handler = vhost_vdpa_net_handle_ctrl_avail, }; +/** + * Probe if CVQ is isolated + * + * @device_fd The vdpa device fd + * @features Features offered by the device. + * @cvq_index The control vq pair index + * + * Returns <0 in case of failure, 0 if false and 1 if true. + */ +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, + int cvq_index, Error **errp) +{ + uint64_t backend_features; + int64_t cvq_group; + uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_FEATURES_OK; + int r; + + ERRP_GUARD(); + + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); + if (unlikely(r < 0)) { + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); + return r; + } + + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { + return 0; + } + + r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + if (unlikely(r)) { + error_setg_errno(errp, errno, "Cannot set features"); + } + + r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set device features"); + goto out; + } + + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); + if (unlikely(cvq_group < 0)) { + if (cvq_group != -ENOTSUP) { + r = cvq_group; + goto out; + } + + /* + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend + * support ASID even if the parent driver does not. The CVQ cannot be + * isolated in this case. + */ + error_free(*errp); + *errp = NULL; + r = 0; + goto out; + } + + for (int i = 0; i < cvq_index; ++i) { + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); + if (unlikely(group < 0)) { + r = group; + goto out; + } + + if (group == (int64_t)cvq_group) { + r = 0; + goto out; + } + } + + r = 1; + +out: + status = 0; + ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + return r; +} + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, const char *device, const char *name, @@ -808,16 +866,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool is_datapath, bool svq, struct vhost_vdpa_iova_range iova_range, - uint64_t features) + uint64_t features, + Error **errp) { NetClientState *nc = NULL; VhostVDPAState *s; int ret = 0; assert(name); + int cvq_isolated; + if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name); } else { + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, + queue_pair_index * 2, + errp); + if (unlikely(cvq_isolated < 0)) { + return NULL; + } + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } @@ -844,6 +912,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; /* * TODO: We cannot migrate devices with CVQ as there is no way to set @@ -972,7 +1041,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features); + iova_range, features, errp); if (!ncs[i]) goto err; } @@ -980,7 +1049,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features); + opts->x_svq, iova_range, features, errp); if (!nc) goto err; } From a1f85cff902f3260d85c74ec11d4c4b182cead80 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 3 May 2023 19:54:37 +0800 Subject: [PATCH 29/53] cryptodev: fix memory leak during stats query object_get_canonical_path already returns newly allocated memory, this means no additional g_strdup required. Remove g_strdup to avoid memory leak. Fixes: Coverity CID 1508074 Fixes: f2b901098 ("cryptodev: Support query-stats QMP command") Cc: Peter Maydell Signed-off-by: zhenwei pi Message-Id: <20230503115437.262469-1-pizhenwei@bytedance.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/cryptodev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/cryptodev.c b/backends/cryptodev.c index 94ca393cee..7d29517843 100644 --- a/backends/cryptodev.c +++ b/backends/cryptodev.c @@ -522,7 +522,7 @@ static int cryptodev_backend_stats_query(Object *obj, void *data) entry = g_new0(StatsResult, 1); entry->provider = STATS_PROVIDER_CRYPTODEV; - entry->qom_path = g_strdup(object_get_canonical_path(obj)); + entry->qom_path = object_get_canonical_path(obj); entry->stats = stats_list; QAPI_LIST_PREPEND(*stats_results, entry); From 42b1b9d7db48e72b10760869de20070b4afb6c2c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Wed, 7 Jun 2023 22:01:25 +0200 Subject: [PATCH 30/53] hw/acpi: Fix PM control register access On pegasos2 which has ACPI as part of VT8231 south bridge the board firmware writes PM control register by accessing the second byte so addr will be 1. This wasn't handled correctly and the write went to addr 0 instead. Remove the acpi_pm1_cnt_write() function which is used only once and does not take addr into account and handle non-zero address in acpi_pm_cnt_{read|write}. This fixes ACPI shutdown with pegasos2 firmware. The issue below is possibly related to the same memory core bug. Link: https://gitlab.com/qemu-project/qemu/-/issues/360 Reviewed-by: Igor Mammedov Signed-off-by: BALATON Zoltan Message-Id: <20230607200125.A9988746377@zero.eik.bme.hu> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/core.c | 56 +++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 6da275c599..00b1e79a30 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -551,8 +551,35 @@ void acpi_pm_tmr_reset(ACPIREGS *ar) } /* ACPI PM1aCNT */ -static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) +void acpi_pm1_cnt_update(ACPIREGS *ar, + bool sci_enable, bool sci_disable) { + /* ACPI specs 3.0, 4.7.2.5 */ + if (ar->pm1.cnt.acpi_only) { + return; + } + + if (sci_enable) { + ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE; + } else if (sci_disable) { + ar->pm1.cnt.cnt &= ~ACPI_BITMASK_SCI_ENABLE; + } +} + +static uint64_t acpi_pm_cnt_read(void *opaque, hwaddr addr, unsigned width) +{ + ACPIREGS *ar = opaque; + return ar->pm1.cnt.cnt >> addr * 8; +} + +static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + ACPIREGS *ar = opaque; + + if (addr == 1) { + val = val << 8 | (ar->pm1.cnt.cnt & 0xff); + } ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE); if (val & ACPI_BITMASK_SLEEP_ENABLE) { @@ -575,33 +602,6 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) } } -void acpi_pm1_cnt_update(ACPIREGS *ar, - bool sci_enable, bool sci_disable) -{ - /* ACPI specs 3.0, 4.7.2.5 */ - if (ar->pm1.cnt.acpi_only) { - return; - } - - if (sci_enable) { - ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE; - } else if (sci_disable) { - ar->pm1.cnt.cnt &= ~ACPI_BITMASK_SCI_ENABLE; - } -} - -static uint64_t acpi_pm_cnt_read(void *opaque, hwaddr addr, unsigned width) -{ - ACPIREGS *ar = opaque; - return ar->pm1.cnt.cnt; -} - -static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, - unsigned width) -{ - acpi_pm1_cnt_write(opaque, val); -} - static const MemoryRegionOps acpi_pm_cnt_ops = { .read = acpi_pm_cnt_read, .write = acpi_pm_cnt_write, From bf376f3020dfd7bcb2c4158b4ffa85c04d44f56d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 7 Jun 2023 15:57:16 -0500 Subject: [PATCH 31/53] hw/i386/pc: Default to use SMBIOS 3.0 for newer machine models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, pc-q35 and pc-i44fx machine models are default to use SMBIOS 2.8 (32-bit entry point). Since SMBIOS 3.0 (64-bit entry point) is now fully supported since QEMU 7.0, default to use SMBIOS 3.0 for newer machine models. This is necessary to avoid the following message when launching a VM with large number of vcpus. "SMBIOS 2.1 table length 66822 exceeds 65535" Signed-off-by: Suravee Suthikulpanit Message-Id: <20230607205717.737749-2-suravee.suthikulpanit@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé Reviewed-by: Igor Mammedov --- hw/i386/pc.c | 4 +++- hw/i386/pc_piix.c | 5 +++++ hw/i386/pc_q35.c | 5 +++++ include/hw/i386/pc.h | 1 + tests/qtest/bios-tables-test-allowed-diff.h | 1 + 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fc52772fdd..8d37567e08 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1856,6 +1856,7 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); #ifdef CONFIG_VMPORT pcms->vmport = ON_OFF_AUTO_AUTO; @@ -1863,7 +1864,7 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_OFF; #endif /* CONFIG_VMPORT */ pcms->max_ram_below_4g = 0; /* use default */ - pcms->smbios_entry_point_type = SMBIOS_ENTRY_POINT_TYPE_32; + pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; @@ -1975,6 +1976,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->nvdimm_supported = true; mc->smp_props.dies_supported = true; mc->default_ram_id = "pc.ram"; + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 44146e6ff5..f9947fbc10 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -506,11 +506,16 @@ DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL, static void pc_i440fx_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_8_1_machine_options(m); m->alias = NULL; m->is_default = false; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-i44fx-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; } DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a9a59ed42b..cf68b80974 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -387,10 +387,15 @@ DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL, static void pc_q35_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_8_1_machine_options(m); m->alias = NULL; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; } DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c661e9cc80..6eec0fc51d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -110,6 +110,7 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; + SmbiosEntryPointType default_smbios_ep_type; /* RAM / address space compat: */ bool gigabyte_align; diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..81148a604f 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/SSDT.dimmpxm", From c85cad8105e647e6aae1b0d8405c49d91994b158 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Jun 2023 05:54:41 -0400 Subject: [PATCH 32/53] tests/data/acpi: update after SMBIOS 2.0 change Switching to SMBIOS3.0 by default shifts some addresses, so we get this change in tests/data/acpi/q35/SSDT.dimmpxm : @@ -389,6 +389,6 @@ } } - Name (MEMA, 0x07FFE000) + Name (MEMA, 0x07FFF000) } update the expected file to match. Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/SSDT.dimmpxm | Bin 1815 -> 1815 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/SSDT.dimmpxm b/tests/data/acpi/q35/SSDT.dimmpxm index 9ea4e0d0ceaa8a5cbd706afb6d49de853fafe654..70f133412f5e0aa128ab210245a8de7304eeb843 100644 GIT binary patch delta 23 ecmbQvH=U0wIM^jboSlJzanD9BE_UVz|JeaVy9Ofw delta 23 ecmbQvH=U0wIM^jboSlJzam_|9E_UV*|JeaVTLvQl diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 81148a604f..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,2 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/q35/SSDT.dimmpxm", From e0001297eb2f8569e950e55dbda8ad686e4155fb Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 7 Jun 2023 15:57:17 -0500 Subject: [PATCH 33/53] pc: q35: Bump max_cpus to 1024 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since KVM_MAX_VCPUS is currently defined to 1024 for x86 as shown in arch/x86/include/asm/kvm_host.h, update QEMU limits to the same number. In case KVM could not support the specified number of vcpus, QEMU would return the following error message: qemu-system-x86_64: kvm_init_vcpu: kvm_get_vcpu failed (xxx): Invalid argument Also, keep max_cpus at 288 for machine version 8.0 and older. Cc: Igor Mammedov Cc: Daniel P. Berrangé Cc: Michael S. Tsirkin Cc: Julia Suvorova Reviewed-by: Igor Mammedov Signed-off-by: Suravee Suthikulpanit Message-Id: <20230607205717.737749-3-suravee.suthikulpanit@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/pc_q35.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index cf68b80974..11a7084ea1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -368,12 +368,12 @@ static void pc_q35_machine_options(MachineClass *m) m->default_nic = "e1000e"; m->default_kernel_irqchip_split = false; m->no_floppy = 1; + m->max_cpus = 1024; m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL); machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); - m->max_cpus = 288; } static void pc_q35_8_1_machine_options(MachineClass *m) @@ -396,6 +396,7 @@ static void pc_q35_8_0_machine_options(MachineClass *m) /* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + m->max_cpus = 288; } DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL, From 8bc0049eadafb984d305c847cedff550b58e5fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 2 Jun 2023 16:38:52 +0200 Subject: [PATCH 34/53] vdpa: do not block migration if device has cvq and x-svq=on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was a mistake to forbid in all cases, as SVQ is already able to send all the CVQ messages before start forwarding data vqs. It actually caused a regression, making impossible to migrate device previously migratable. Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") Signed-off-by: Eugenio Pérez Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- net/vhost-vdpa.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 46778d5313..4345f1e6de 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -915,13 +915,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->cvq_isolated = cvq_isolated; /* - * TODO: We cannot migrate devices with CVQ as there is no way to set - * the device state (MAC, MQ, etc) before starting the datapath. + * TODO: We cannot migrate devices with CVQ and no x-svq enabled as + * there is no way to set the device state (MAC, MQ, etc) before + * starting the datapath. * * Migration blocker ownership now belongs to s->vhost_vdpa. */ - error_setg(&s->vhost_vdpa.migration_blocker, - "net vdpa cannot migrate with CVQ feature"); + if (!svq) { + error_setg(&s->vhost_vdpa.migration_blocker, + "net vdpa cannot migrate with CVQ feature"); + } } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { From 915bf6ccd7a5c9b6cbea7a72f153597d1b98834f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 2 Jun 2023 16:38:53 +0200 Subject: [PATCH 35/53] vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to call it from resource cleanup context, as munmap needs the size of the mappings. Signed-off-by: Eugenio Pérez Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230602143854.1879091-3-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4345f1e6de..e425fabc34 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -120,6 +120,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) return s->vhost_net; } +static size_t vhost_vdpa_net_cvq_cmd_len(void) +{ + /* + * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. + * In buffer is always 1 byte, so it should fit here + */ + return sizeof(struct virtio_net_ctrl_hdr) + + 2 * sizeof(struct virtio_net_ctrl_mac) + + MAC_TABLE_ENTRIES * ETH_ALEN; +} + +static size_t vhost_vdpa_net_cvq_cmd_page_len(void) +{ + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); +} + static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) { uint64_t invalid_dev_features = @@ -427,22 +443,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) vhost_iova_tree_remove(tree, *map); } -static size_t vhost_vdpa_net_cvq_cmd_len(void) -{ - /* - * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. - * In buffer is always 1 byte, so it should fit here - */ - return sizeof(struct virtio_net_ctrl_hdr) + - 2 * sizeof(struct virtio_net_ctrl_mac) + - MAC_TABLE_ENTRIES * ETH_ALEN; -} - -static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -{ - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); -} - /** Map CVQ buffer. */ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, bool write) From babf8b87127ae809b31b3c0a117dcbc91aaf9aba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 2 Jun 2023 16:38:54 +0200 Subject: [PATCH 36/53] vdpa: map shadow vrings with MAP_SHARED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vdpa devices that use va addresses neeeds these maps shared. Otherwise, vhost_vdpa checks will refuse to accept the maps. The mmap call will always return a page aligned address, so removing the qemu_memalign call. Keeping the ROUND_UP for the size as we still need to DMA-map them in full. Not applying fixes tag as it never worked with va devices. Signed-off-by: Eugenio Pérez Message-Id: <20230602143854.1879091-4-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++--------- net/vhost-vdpa.c | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index bd7c12b6d3..1b1d85306c 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -649,7 +649,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq, VhostIOVATree *iova_tree) { - size_t desc_size, driver_size, device_size; + size_t desc_size; event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); svq->next_guest_avail_elem = NULL; @@ -662,14 +662,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); svq->num_free = svq->vring.num; - driver_size = vhost_svq_driver_area_size(svq); - device_size = vhost_svq_device_area_size(svq); - svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); desc_size = sizeof(vring_desc_t) * svq->vring.num; svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); - memset(svq->vring.desc, 0, driver_size); - svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); - memset(svq->vring.used, 0, device_size); + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); svq->desc_state = g_new0(SVQDescState, svq->vring.num); svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { @@ -712,8 +712,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) svq->vq = NULL; g_free(svq->desc_next); g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); + munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); + munmap(svq->vring.used, vhost_svq_device_area_size(svq)); event_notifier_set_handler(&svq->hdev_call, NULL); } diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index e425fabc34..8840ca2ea4 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -205,8 +205,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); + munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); + munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -903,12 +903,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); } else if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); - s->status = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); + s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; From 0a47810b09909a432b705e89e0cb5f9c1109465f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:13 +0800 Subject: [PATCH 37/53] include/hw/virtio: make some VirtIODevice const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VirtIODevice structure is not modified in virtio_vdev_has_feature(). Therefore, make it const to allow this function to accept const variables. Signed-off-by: Hawkins Jiawei Reviewed-by: Eugenio Pérez Martin Message-Id: <16c0561b921310a32c240a4fb6e8cee3ffee16fe.1685704856.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index af86ed7249..0492d26900 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -402,7 +402,7 @@ static inline bool virtio_has_feature(uint64_t features, unsigned int fbit) return !!(features & (1ULL << fbit)); } -static inline bool virtio_vdev_has_feature(VirtIODevice *vdev, +static inline bool virtio_vdev_has_feature(const VirtIODevice *vdev, unsigned int fbit) { return virtio_has_feature(vdev->guest_features, fbit); From 02d3bf099ba071f37145cfcc34d088e8f839072f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:14 +0800 Subject: [PATCH 38/53] vdpa: reuse virtio_vdev_has_feature() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use virtio_vdev_has_feature() instead of manually accessing the features. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 8840ca2ea4..c3ef0139a6 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -629,8 +629,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) { - uint64_t features = n->parent_obj.guest_features; - if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { + if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, n->mac, sizeof(n->mac)); @@ -648,10 +647,9 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n) { struct virtio_net_ctrl_mq mq; - uint64_t features = n->parent_obj.guest_features; ssize_t dev_written; - if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { return 0; } From 705e89cfaafc54491482742a756cf661b48608d2 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:15 +0800 Subject: [PATCH 39/53] hw/net/virtio-net: make some VirtIONet const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VirtIONet structure is not modified in virtio_net_supported_guest_offloads(). Therefore, make it const to allow this function to accept const variables. Signed-off-by: Hawkins Jiawei Reviewed-by: Eugenio Pérez Message-Id: <489b09c3998ac09b9135e57a7dd8c56a4be8cdf9.1685704856.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 6df6b7329d..7b27dad6c4 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -874,7 +874,7 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) return guest_offloads_mask & features; } -static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) +static inline uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) { VirtIODevice *vdev = VIRTIO_DEVICE(n); return virtio_net_guest_offloads_by_features(vdev->guest_features); From 0b545b1e42fc61f64071c7dd6f3ce1650f328007 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:16 +0800 Subject: [PATCH 40/53] virtio-net: expose virtio_net_supported_guest_offloads() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support restoring offloads state in vdpa, it is necessary to expose the function virtio_net_supported_guest_offloads(). According to VirtIO standard, "Upon feature negotiation corresponding offload gets enabled to preserve backward compatibility.". Therefore, QEMU uses this function to get the device supported offloads. This allows QEMU to know the device's defaults and skip the control message sending if these defaults align with the driver's configuration. Note that the device's defaults can mismatch the driver's configuration only at live migration. Signed-off-by: Hawkins Jiawei Message-Id: <43679506f3f039a7aa2bdd5b49785107b5dfd7d4.1685704856.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 2 +- include/hw/virtio/virtio-net.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7b27dad6c4..7e8897a8bc 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -874,7 +874,7 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) return guest_offloads_mask & features; } -static inline uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) +uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) { VirtIODevice *vdev = VIRTIO_DEVICE(n); return virtio_net_guest_offloads_by_features(vdev->guest_features); diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index ef234ffe7e..5f5dcb4572 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -227,5 +227,6 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, unsigned out_num); void virtio_net_set_netclient_name(VirtIONet *n, const char *name, const char *type); +uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n); #endif From 0b58d3686a07f505d02edc28cfaf272a4768481c Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:17 +0800 Subject: [PATCH 41/53] vdpa: Add vhost_vdpa_net_load_offloads() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces vhost_vdpa_net_load_offloads() to restore offloads state at device's startup. Signed-off-by: Hawkins Jiawei Message-Id: <7e2b5cad9c48c917df53d80dec27dbfeb513e1a3.1685704856.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index c3ef0139a6..99d0445d90 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -664,6 +664,44 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, return *s->status != VIRTIO_NET_OK; } +static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, + const VirtIONet *n) +{ + uint64_t offloads; + ssize_t dev_written; + + if (!virtio_vdev_has_feature(&n->parent_obj, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + return 0; + } + + if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) { + /* + * According to VirtIO standard, "Upon feature negotiation + * corresponding offload gets enabled to preserve + * backward compatibility.". + * + * Therefore, there is no need to send this CVQ command if the + * driver also enables all supported offloads, which aligns with + * the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + return 0; + } + + offloads = cpu_to_le64(n->curr_guest_offloads); + dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS, + VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, + &offloads, sizeof(offloads)); + if (unlikely(dev_written < 0)) { + return dev_written; + } + + return *s->status != VIRTIO_NET_OK; +} + static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -686,6 +724,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) if (unlikely(r)) { return r; } + r = vhost_vdpa_net_load_offloads(s, n); + if (unlikely(r)) { + return r; + } return 0; } From 4b4a1378b951930628398b3c67ccb036bdf6f012 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 2 Jun 2023 19:52:18 +0800 Subject: [PATCH 42/53] vdpa: Allow VIRTIO_NET_F_CTRL_GUEST_OFFLOADS in SVQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable SVQ with VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature. Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Message-Id: <778d642ecae6deed8a218b0e6232e4d7bb96b439.1685704856.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Tested-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 99d0445d90..49221937a1 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -89,6 +89,7 @@ const int vdpa_feature_bits[] = { static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | + BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | From 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 2 Jun 2023 18:27:35 +0200 Subject: [PATCH 43/53] vhost: fix vhost_dev_enable_notifiers() error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) fails, we call vhost_dev_disable_notifiers() that executes virtio_bus_set_host_notifier(false) on all queues, even on queues that have failed to be initialized. This triggers a core dump in memory_region_del_eventfd(): virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) vhost VQ 1 notifier binding failed: 24 .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. Fix the problem by providing to vhost_dev_disable_notifiers() the number of queues to disable. Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") Cc: longpeng2@huawei.com Signed-off-by: Laurent Vivier Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index fb7abc9769..d116c2d6a1 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1530,6 +1530,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memset(hdev, 0, sizeof(struct vhost_dev)); } +static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, + VirtIODevice *vdev, + unsigned int nvqs) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + int i, r; + + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + for (i = 0; i < nvqs; ++i) { + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + false); + if (r < 0) { + error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); + } + assert(r >= 0); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; ++i) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); + } + virtio_device_release_ioeventfd(vdev); +} + /* Stop processing guest IO notifications in qemu. * Start processing them in vhost in kernel. */ @@ -1559,7 +1593,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { error_report("vhost VQ %d notifier binding failed: %d", i, -r); memory_region_transaction_commit(); - vhost_dev_disable_notifiers(hdev, vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); return r; } } @@ -1576,34 +1610,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) */ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - int i, r; - - /* - * Batch all the host notifiers in a single transaction to avoid - * quadratic time complexity in address_space_update_ioeventfds(). - */ - memory_region_transaction_begin(); - - for (i = 0; i < hdev->nvqs; ++i) { - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, - false); - if (r < 0) { - error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); - } - assert (r >= 0); - } - - /* - * The transaction expects the ioeventfds to be open when it - * commits. Do it now, before the cleanup loop. - */ - memory_region_transaction_commit(); - - for (i = 0; i < hdev->nvqs; ++i) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); - } - virtio_device_release_ioeventfd(vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); } /* Test and clear event pending status. From 51e84244a7799172f4239482199e9b4bdcd23172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 2 Jun 2023 19:33:28 +0200 Subject: [PATCH 44/53] vdpa: mask _F_CTRL_GUEST_OFFLOADS for vhost vdpa devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU does not emulate it so it must be disabled as long as the backend does not support it. Signed-off-by: Eugenio Pérez Message-Id: <20230602173328.1917385-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 49221937a1..75352efa39 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -58,6 +58,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, VIRTIO_NET_F_GSO, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, From d45243bcfc61a3c34f96a4fc34bffcb9929daba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 2 Jun 2023 19:34:51 +0200 Subject: [PATCH 45/53] vdpa: fix not using CVQ buffer in case of error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug introducing when refactoring. Otherway, the guest never received the used buffer. Fixes: be4278b65fc1 ("vdpa: extract vhost_vdpa_net_cvq_add from vhost_vdpa_net_handle_ctrl_avail") Signed-off-by: Eugenio Pérez Message-Id: <20230602173451.1917999-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 75352efa39..9e92b3558c 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -794,7 +794,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, } if (*s->status != VIRTIO_NET_OK) { - return VIRTIO_NET_ERR; + goto out; } status = VIRTIO_NET_ERR; From abe10037b129615f3da80f6d7c4acc3a0ec48afa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 9 Jun 2023 11:41:07 -0500 Subject: [PATCH 46/53] hw/i386/pc: Clean up pc_machine_initfn To use the newly introduced PC machine class local variable. Suggested-by: Igor Mammedov Signed-off-by: Suravee Suthikulpanit Message-Id: <20230609164107.23404-1-suravee.suthikulpanit@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8d37567e08..f01d7de5ad 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1867,7 +1867,7 @@ static void pc_machine_initfn(Object *obj) pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type; /* acpi build is enabled by default if machine supports it */ - pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; + pcms->acpi_build_enabled = pcmc->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->i8042_enabled = true; From 535a3d9a32a9e37487984c16af0167bb3c3a2025 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sun, 11 Jun 2023 15:39:24 -0400 Subject: [PATCH 47/53] virtio-scsi: avoid dangling host notifier in ->ioeventfd_stop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit virtio_scsi_dataplane_stop() calls blk_drain_all(), which invokes ->drained_begin()/->drained_end() after we've already detached the host notifier. virtio_scsi_drained_end() currently attaches the host notifier again and leaves it dangling after dataplane has stopped. This results in the following assertion failure because virtio_scsi_defer_to_dataplane() is called from the IOThread instead of the main loop thread: qemu-system-x86_64: ../softmmu/memory.c:1111: memory_region_transaction_commit: Assertion `qemu_mutex_iothread_locked()' failed. Buglink: https://gitlab.com/qemu-project/qemu/-/issues/1680 Reported-by: Jean-Louis Dupond Signed-off-by: Stefan Hajnoczi Message-Id: <20230611193924.2444914-1-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/virtio-scsi.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 9c8ef0aaa6..45b95ea070 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1125,7 +1125,16 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane but the host notifier has + * already been detached. Detaching multiple times is a no-op if nothing + * else is using the monitoring same file descriptor, but avoid it just in + * case. + * + * Also, don't detach if dataplane has not even been started yet because + * the host notifier isn't attached. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } @@ -1143,7 +1152,14 @@ static void virtio_scsi_drained_end(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane. Keep the host notifier detached + * so it's not left dangling after dataplane is stopped. + * + * Also, don't attach if dataplane has not even been started yet. We're not + * ready. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } From f8ed3648b5b9c0cd77397ae4404f3e9e4be8a426 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 13 Jun 2023 11:08:48 +0300 Subject: [PATCH 48/53] vhost-user: fully use new backend/frontend naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slave/master nomenclature was replaced with backend/frontend in commit 1fc19b65279a ("vhost-user: Adopt new backend naming") This patch replaces all remaining uses of master and slave in the codebase. Signed-off-by: Emmanouil Pitsidianakis Message-Id: <20230613080849.2115347-1-manos.pitsidianakis@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau --- block/export/vhost-user-blk-server.c | 2 +- contrib/vhost-user-blk/vhost-user-blk.c | 2 +- hw/block/vhost-user-blk.c | 2 +- hw/display/vhost-user-gpu.c | 2 +- hw/input/vhost-user-input.c | 2 +- hw/net/virtio-net.c | 4 +- hw/virtio/vdpa-dev.c | 2 +- hw/virtio/vhost-user.c | 52 +++++++++++----------- hw/virtio/virtio-qmp.c | 2 +- include/hw/virtio/vhost-backend.h | 2 +- subprojects/libvhost-user/libvhost-user.c | 54 +++++++++++------------ subprojects/libvhost-user/libvhost-user.h | 20 +++++---- 12 files changed, 74 insertions(+), 72 deletions(-) diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 81b59761e3..f7b5073605 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -167,7 +167,7 @@ vu_blk_set_config(VuDev *vu_dev, const uint8_t *data, uint8_t wce; /* don't support live migration */ - if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { return -EINVAL; } diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 7941694e53..89e5f11a64 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -421,7 +421,7 @@ vub_set_config(VuDev *vu_dev, const uint8_t *data, int fd; /* don't support live migration */ - if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { return -1; } diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index aff4d2b8cb..eecf3f7a81 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -81,7 +81,7 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, offsetof(struct virtio_blk_config, wce), sizeof(blkcfg->wce), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 1386e869e5..15f9d99d09 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -452,7 +452,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&g->vhost->dev, config_data, 0, sizeof(struct virtio_gpu_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-gpu: set device config space failed"); return; diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index 1352e372ff..4ee3542106 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -69,7 +69,7 @@ static void vhost_input_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, 0, sizeof(virtio_input_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-input: set device config space failed"); return; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7e8897a8bc..aa421a908a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -211,7 +211,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 0, n->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); } } @@ -3733,7 +3733,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) struct virtio_net_config netcfg = {}; memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); vhost_net_set_config(get_vhost_net(nc->peer), - (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); + (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); } QTAILQ_INIT(&n->rsc_chains); n->qdev = dev; diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index e08e830006..363b625243 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -203,7 +203,7 @@ vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) int ret; ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 2ad75a7964..c4e0cbd702 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -124,13 +124,13 @@ typedef enum VhostUserRequest { VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { +typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_NONE = 0, VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_BACKEND_MAX -} VhostUserSlaveRequest; +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -245,8 +245,8 @@ struct vhost_user { struct vhost_dev *dev; /* Shared between vhost devs of the same virtio device */ VhostUserState *user; - QIOChannel *slave_ioc; - GSource *slave_src; + QIOChannel *backend_ioc; + GSource *backend_src; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; @@ -1495,7 +1495,7 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return vhost_user_write(dev, &msg, NULL, 0); } -static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) { if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { return -ENOSYS; @@ -1532,7 +1532,7 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, return n; } -static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) { @@ -1594,16 +1594,16 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } -static void close_slave_channel(struct vhost_user *u) +static void close_backend_channel(struct vhost_user *u) { - g_source_destroy(u->slave_src); - g_source_unref(u->slave_src); - u->slave_src = NULL; - object_unref(OBJECT(u->slave_ioc)); - u->slave_ioc = NULL; + g_source_destroy(u->backend_src); + g_source_unref(u->backend_src); + u->backend_src = NULL; + object_unref(OBJECT(u->backend_ioc)); + u->backend_ioc = NULL; } -static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { struct vhost_dev *dev = opaque; @@ -1645,10 +1645,10 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: - ret = vhost_user_slave_handle_config_change(dev); + ret = vhost_user_backend_handle_config_change(dev); break; case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: - ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; default: @@ -1684,7 +1684,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, goto fdcleanup; err: - close_slave_channel(u); + close_backend_channel(u); rc = G_SOURCE_REMOVE; fdcleanup: @@ -1696,7 +1696,7 @@ fdcleanup: return rc; } -static int vhost_setup_slave_channel(struct vhost_dev *dev) +static int vhost_setup_backend_channel(struct vhost_dev *dev) { VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, @@ -1725,10 +1725,10 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) error_report_err(local_err); return -ECONNREFUSED; } - u->slave_ioc = ioc; - u->slave_src = qio_channel_add_watch_source(u->slave_ioc, + u->backend_ioc = ioc; + u->backend_src = qio_channel_add_watch_source(u->backend_ioc, G_IO_IN | G_IO_HUP, - slave_read, dev, NULL, NULL); + backend_read, dev, NULL, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; @@ -1746,7 +1746,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) out: close(sv[1]); if (ret) { - close_slave_channel(u); + close_backend_channel(u); } return ret; @@ -2072,7 +2072,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " - "slave-req protocol features."); + "backend-req protocol features."); return -EINVAL; } @@ -2108,7 +2108,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (dev->vq_index == 0) { - err = vhost_setup_slave_channel(dev); + err = vhost_setup_backend_channel(dev); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); return -EPROTO; @@ -2138,8 +2138,8 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) close(u->postcopy_fd.fd); u->postcopy_fd.handler = NULL; } - if (u->slave_ioc) { - close_slave_channel(u); + if (u->backend_ioc) { + close_backend_channel(u); } g_free(u->region_rb); u->region_rb = NULL; @@ -2235,7 +2235,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return ret; } - /* If reply_ack supported, slave has to ack specified MTU is valid */ + /* If reply_ack supported, backend has to ack specified MTU is valid */ if (reply_supported) { return process_message_reply(dev, &msg); } diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 3528fc628d..3d32dbec8d 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -117,7 +117,7 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " "device configuration space supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD, \ - "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Slave fd communication " + "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Backend fd communication " "channel supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index ec3fbae58d..31a251a9f5 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -22,7 +22,7 @@ typedef enum VhostBackendType { } VhostBackendType; typedef enum VhostSetConfigType { - VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_FRONTEND = 0, VHOST_SET_CONFIG_TYPE_MIGRATION = 1, } VhostSetConfigType; diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 8fb61e2df2..0469a50101 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -421,8 +421,8 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) } /* - * Processes a reply on the slave channel. - * Entered with slave_mutex held and releases it before exit. + * Processes a reply on the backend channel. + * Entered with backend_mutex held and releases it before exit. * Returns true on success. */ static bool @@ -436,7 +436,7 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) goto out; } - if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) { + if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) { goto out; } @@ -449,7 +449,7 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) result = msg_reply.payload.u64 == 0; out: - pthread_mutex_unlock(&dev->slave_mutex); + pthread_mutex_unlock(&dev->backend_mutex); return result; } @@ -1393,13 +1393,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, return false; } - pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { - pthread_mutex_unlock(&dev->slave_mutex); + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, &vmsg)) { + pthread_mutex_unlock(&dev->backend_mutex); return false; } - /* Also unlocks the slave_mutex */ + /* Also unlocks the backend_mutex */ return vu_process_message_reply(dev, &vmsg); } @@ -1463,7 +1463,7 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) * a device implementation can return it in its callback * (get_protocol_features) if it wants to use this for * simulation, but it is otherwise not desirable (if even - * implemented by the master.) + * implemented by the frontend.) */ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ | 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | @@ -1508,7 +1508,7 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) * of the other features are required. * Theoretically, one could use only kick messages, or do them without * having F_REPLY_ACK, but too many (possibly pending) messages on the - * socket will eventually cause the master to hang, to avoid this in + * socket will eventually cause the frontend to hang, to avoid this in * scenarios where not desired enforce that the settings are in a way * that actually enables the simulation case. */ @@ -1550,18 +1550,18 @@ vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg) } static bool -vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg) +vu_set_backend_req_fd(VuDev *dev, VhostUserMsg *vmsg) { if (vmsg->fd_num != 1) { - vu_panic(dev, "Invalid slave_req_fd message (%d fd's)", vmsg->fd_num); + vu_panic(dev, "Invalid backend_req_fd message (%d fd's)", vmsg->fd_num); return false; } - if (dev->slave_fd != -1) { - close(dev->slave_fd); + if (dev->backend_fd != -1) { + close(dev->backend_fd); } - dev->slave_fd = vmsg->fds[0]; - DPRINT("Got slave_fd: %d\n", vmsg->fds[0]); + dev->backend_fd = vmsg->fds[0]; + DPRINT("Got backend_fd: %d\n", vmsg->fds[0]); return false; } @@ -1577,7 +1577,7 @@ vu_get_config(VuDev *dev, VhostUserMsg *vmsg) } if (ret) { - /* resize to zero to indicate an error to master */ + /* resize to zero to indicate an error to frontend */ vmsg->size = 0; } @@ -1917,7 +1917,7 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) case VHOST_USER_SET_VRING_ENABLE: return vu_set_vring_enable_exec(dev, vmsg); case VHOST_USER_SET_BACKEND_REQ_FD: - return vu_set_slave_req_fd(dev, vmsg); + return vu_set_backend_req_fd(dev, vmsg); case VHOST_USER_GET_CONFIG: return vu_get_config(dev, vmsg); case VHOST_USER_SET_CONFIG: @@ -2038,11 +2038,11 @@ vu_deinit(VuDev *dev) } vu_close_log(dev); - if (dev->slave_fd != -1) { - close(dev->slave_fd); - dev->slave_fd = -1; + if (dev->backend_fd != -1) { + close(dev->backend_fd); + dev->backend_fd = -1; } - pthread_mutex_destroy(&dev->slave_mutex); + pthread_mutex_destroy(&dev->backend_mutex); if (dev->sock != -1) { close(dev->sock); @@ -2080,8 +2080,8 @@ vu_init(VuDev *dev, dev->remove_watch = remove_watch; dev->iface = iface; dev->log_call_fd = -1; - pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; + pthread_mutex_init(&dev->backend_mutex, NULL); + dev->backend_fd = -1; dev->max_queues = max_queues; dev->vq = malloc(max_queues * sizeof(dev->vq[0])); @@ -2439,9 +2439,9 @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync) vmsg.flags |= VHOST_USER_NEED_REPLY_MASK; } - vu_message_write(dev, dev->slave_fd, &vmsg); + vu_message_write(dev, dev->backend_fd, &vmsg); if (ack) { - vu_message_read_default(dev, dev->slave_fd, &vmsg); + vu_message_read_default(dev, dev->backend_fd, &vmsg); } return; } @@ -2468,7 +2468,7 @@ void vu_config_change_msg(VuDev *dev) .flags = VHOST_USER_VERSION, }; - vu_message_write(dev, dev->slave_fd, &vmsg); + vu_message_write(dev, dev->backend_fd, &vmsg); } static inline void diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 49208cceaa..708370c5f5 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -39,7 +39,7 @@ #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) typedef enum VhostSetConfigType { - VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_FRONTEND = 0, VHOST_SET_CONFIG_TYPE_MIGRATION = 1, } VhostSetConfigType; @@ -112,7 +112,7 @@ typedef enum VhostUserRequest { VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { +typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_NONE = 0, VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, @@ -120,7 +120,7 @@ typedef enum VhostUserSlaveRequest { VHOST_USER_BACKEND_VRING_CALL = 4, VHOST_USER_BACKEND_VRING_ERR = 5, VHOST_USER_BACKEND_MAX -} VhostUserSlaveRequest; +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -296,8 +296,10 @@ typedef struct VuVirtqInflight { * Zero value indicates a vm reset happened. */ uint16_t version; - /* The size of VuDescStateSplit array. It's equal to the virtqueue - * size. Slave could get it from queue size field of VhostUserInflight. */ + /* + * The size of VuDescStateSplit array. It's equal to the virtqueue size. + * Backend could get it from queue size field of VhostUserInflight. + */ uint16_t desc_num; /* The head of list that track the last batch of used descriptors. */ @@ -384,9 +386,9 @@ struct VuDev { VuVirtq *vq; VuDevInflightInfo inflight_info; int log_call_fd; - /* Must be held while using slave_fd */ - pthread_mutex_t slave_mutex; - int slave_fd; + /* Must be held while using backend_fd */ + pthread_mutex_t backend_mutex; + int backend_fd; uint64_t log_size; uint8_t *log_table; uint64_t features; @@ -445,7 +447,7 @@ typedef struct VuVirtqElement { * vu_init: * @dev: a VuDev context * @max_queues: maximum number of virtqueues - * @socket: the socket connected to vhost-user master + * @socket: the socket connected to vhost-user frontend * @panic: a panic callback * @set_watch: a set_watch callback * @remove_watch: a remove_watch callback From e80c1e4c7d057fe5c96db588e651b934757a912e Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 15 Jun 2023 11:26:24 +0800 Subject: [PATCH 49/53] intel_iommu: Fix a potential issue in VFIO dirty page sync Peter Xu found a potential issue: "The other thing is when I am looking at the new code I found that we actually extended the replay() to be used also in dirty tracking of vfio, in vfio_sync_dirty_bitmap(). For that maybe it's already broken if unmap_all() because afaiu log_sync() can be called in migration thread anytime during DMA so I think it means the device is prone to DMA with the IOMMU pgtable quickly erased and rebuilt here, which means the DMA could fail unexpectedly. Copy Alex, Kirti and Neo." Fix it by replacing the unmap_all() to only evacuate the iova tree (keeping all host mappings untouched, IOW, don't notify UNMAP), and do a full resync in page walk which will notify all existing mappings as MAP. This way we don't interrupt with any existing mapping if there is (e.g. for the dirty sync case), meanwhile we keep sync too to latest (for moving a vfio device into an existing iommu group). Suggested-by: Peter Xu Signed-off-by: Zhenzhong Duan Reviewed-by: Peter Xu Message-Id: <20230615032626.314476-2-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 94d52f4205..34af12f392 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3825,13 +3825,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) IntelIOMMUState *s = vtd_as->iommu_state; uint8_t bus_n = pci_bus_num(vtd_as->bus); VTDContextEntry ce; + DMAMap map = { .iova = 0, .size = HWADDR_MAX }; - /* - * The replay can be triggered by either a invalidation or a newly - * created entry. No matter what, we release existing mappings - * (it means flushing caches for UNMAP-only registers). - */ - vtd_address_space_unmap(vtd_as, n); + /* replay is protected by BQL, page walk will re-setup it safely */ + iova_tree_remove(vtd_as->iova_tree, map); if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { trace_vtd_replay_ce_valid(s->root_scalable ? "scalable mode" : From ce735ff03349eeac9efe59c118d78f088a151ec4 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 15 Jun 2023 11:26:25 +0800 Subject: [PATCH 50/53] intel_iommu: Fix flag check in replay Replay doesn't notify registered notifiers but the one passed to it. So it's meaningless to check the registered notifier's synthetic flag. There is no issue currently as all replay use cases have MAP flag set, but let's be robust. Signed-off-by: Zhenzhong Duan Reviewed-by: Peter Xu Message-Id: <20230615032626.314476-3-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 34af12f392..f046f85913 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3837,7 +3837,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); - if (vtd_as_has_map_notifier(vtd_as)) { + if (n->notifier_flags & IOMMU_NOTIFIER_MAP) { /* This is required only for MAP typed notifiers */ vtd_page_walk_info info = { .hook_fn = vtd_replay_hook, From ebe1504e10f771f4fc5d005a6d1ed3f30e3ad428 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 15 Jun 2023 11:26:26 +0800 Subject: [PATCH 51/53] intel_iommu: Fix address space unmap During address space unmap, corresponding IOVA tree entries are also removed. But DMAMap is set beyond notifier's scope by 1, so in theory there is possibility to remove a continuous entry above the notifier's scope but falling in adjacent notifier's scope. There is no issue currently as no use cases allocate notifiers continuously, but let's be robust. Signed-off-by: Zhenzhong Duan Reviewed-by: Peter Xu Message-Id: <20230615032626.314476-4-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index f046f85913..dcc334060c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3791,7 +3791,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) n->start, size); map.iova = n->start; - map.size = size; + map.size = size - 1; /* Inclusive */ iova_tree_remove(as->iova_tree, map); } From 0e994668d00c9ca760817a4ae802a7bed0e29596 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 19 Jun 2023 09:45:01 +0530 Subject: [PATCH 52/53] vhost_net: add an assertion for TAP client backends An assertion was missing for tap vhost backends that enforces a non-null reference from get_vhost_net(). Both vhost-net-user and vhost-net-vdpa enforces this. Enforce the same for tap. Unit tests pass with this change. Signed-off-by: Ani Sinha Message-Id: <20230619041501.111655-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index c4eecc6f36..6db23ca323 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -507,6 +507,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) switch (nc->info->type) { case NET_CLIENT_DRIVER_TAP: vhost_net = tap_get_vhost_net(nc); + assert(vhost_net); break; #ifdef CONFIG_VHOST_NET_USER case NET_CLIENT_DRIVER_VHOST_USER: From a0d7215e339b61c7d7a7b3fcf754954d80d93eb8 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 19 Jun 2023 12:22:09 +0530 Subject: [PATCH 53/53] vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present When a peer nic is still attached to the vdpa backend, it is too early to free up the vhost-net and vdpa structures. If these structures are freed here, then QEMU crashes when the guest is being shut down. The following call chain would result in an assertion failure since the pointer returned from vhost_vdpa_get_vhost_net() would be NULL: do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> virtio_net_vhost_status() -> get_vhost_net(). Therefore, we defer freeing up the structures until at guest shutdown time when qemu_cleanup() calls net_cleanup() which then calls qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() again to free up the structures. This time, the loop in net_cleanup() ensures that vhost_vdpa_cleanup() will be called one last time when all the peer nics are detached and freed. All unit tests pass with this change. CC: imammedo@redhat.com CC: jusual@redhat.com CC: mst@redhat.com Fixes: CVE-2023-3301 Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 Signed-off-by: Ani Sinha Message-Id: <20230619065209.442185-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 9e92b3558c..e19ab063fa 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -207,6 +207,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + /* + * If a peer NIC is attached, do not cleanup anything. + * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() + * when the guest is shutting down. + */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { + return; + } munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); if (s->vhost_net) {