From 6e288b00ef536f87910f76cb1940a8caced69c54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 13:46:03 +0100 Subject: [PATCH 01/16] rcu: remove qatomic_mb_set, expand comments Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- include/qemu/rcu.h | 5 ++++- util/rcu.c | 24 +++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 313fc414bc..661c1a1468 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -87,7 +87,10 @@ static inline void rcu_read_lock(void) ctr = qatomic_read(&rcu_gp_ctr); qatomic_set(&p_rcu_reader->ctr, ctr); - /* Write p_rcu_reader->ctr before reading RCU-protected pointers. */ + /* + * Read rcu_gp_ptr and write p_rcu_reader->ctr before reading + * RCU-protected pointers. + */ smp_mb_placeholder(); } diff --git a/util/rcu.c b/util/rcu.c index b6d6c71cff..e5b6e52be6 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -83,12 +83,6 @@ static void wait_for_readers(void) */ qemu_event_reset(&rcu_gp_event); - /* Instead of using qatomic_mb_set for index->waiting, and - * qatomic_mb_read for index->ctr, memory barriers are placed - * manually since writes to different threads are independent. - * qemu_event_reset has acquire semantics, so no memory barrier - * is needed here. - */ QLIST_FOREACH(index, ®istry, node) { qatomic_set(&index->waiting, true); } @@ -96,6 +90,10 @@ static void wait_for_readers(void) /* Here, order the stores to index->waiting before the loads of * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(), * ensuring that the loads of index->ctr are sequentially consistent. + * + * If this is the last iteration, this barrier also prevents + * frees from seeping upwards, and orders the two wait phases + * on architectures with 32-bit longs; see synchronize_rcu(). */ smp_mb_global(); @@ -104,7 +102,7 @@ static void wait_for_readers(void) QLIST_REMOVE(index, node); QLIST_INSERT_HEAD(&qsreaders, index, node); - /* No need for mb_set here, worst of all we + /* No need for memory barriers here, worst of all we * get some extra futex wakeups. */ qatomic_set(&index->waiting, false); @@ -149,26 +147,26 @@ void synchronize_rcu(void) /* Write RCU-protected pointers before reading p_rcu_reader->ctr. * Pairs with smp_mb_placeholder() in rcu_read_lock(). + * + * Also orders write to RCU-protected pointers before + * write to rcu_gp_ctr. */ smp_mb_global(); QEMU_LOCK_GUARD(&rcu_registry_lock); if (!QLIST_EMPTY(®istry)) { - /* In either case, the qatomic_mb_set below blocks stores that free - * old RCU-protected pointers. - */ if (sizeof(rcu_gp_ctr) < 8) { /* For architectures with 32-bit longs, a two-subphases algorithm * ensures we do not encounter overflow bugs. * * Switch parity: 0 -> 1, 1 -> 0. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); wait_for_readers(); - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); } else { /* Increment current grace period. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); } wait_for_readers(); From 4f7335e21d5170986e20001b9ddb906fe24413f1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 11:57:06 +0100 Subject: [PATCH 02/16] test-aio-multithread: do not use mb_read/mb_set for simple flags The remaining use of mb_read/mb_set is just to force a thread to exit eventually. It does not order two memory accesses and therefore can be just read/set. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- tests/unit/test-aio-multithread.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c index a555cc8835..3c61526a0b 100644 --- a/tests/unit/test-aio-multithread.c +++ b/tests/unit/test-aio-multithread.c @@ -202,7 +202,7 @@ static CoMutex comutex; static void coroutine_fn test_multi_co_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_co_mutex_lock(&comutex); counter++; qemu_co_mutex_unlock(&comutex); @@ -236,7 +236,7 @@ static void test_multi_co_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -327,7 +327,7 @@ static void mcs_mutex_unlock(void) static void test_multi_fair_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { mcs_mutex_lock(); counter++; mcs_mutex_unlock(); @@ -355,7 +355,7 @@ static void test_multi_fair_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -383,7 +383,7 @@ static QemuMutex mutex; static void test_multi_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_mutex_lock(&mutex); counter++; qemu_mutex_unlock(&mutex); @@ -411,7 +411,7 @@ static void test_multi_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } From 355635c0187d3cc2a0cbb3381e06d61c0bf683ba Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 11:56:07 +0100 Subject: [PATCH 03/16] test-aio-multithread: simplify test_multi_co_schedule Instead of using qatomic_mb_{read,set} mindlessly, just use a per-coroutine flag that requires no synchronization. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- tests/unit/test-aio-multithread.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c index 3c61526a0b..80c5d4e2e6 100644 --- a/tests/unit/test-aio-multithread.c +++ b/tests/unit/test-aio-multithread.c @@ -107,8 +107,7 @@ static void test_lifecycle(void) /* aio_co_schedule test. */ static Coroutine *to_schedule[NUM_CONTEXTS]; - -static bool now_stopping; +static bool stop[NUM_CONTEXTS]; static int count_retry; static int count_here; @@ -136,6 +135,7 @@ static bool schedule_next(int n) static void finish_cb(void *opaque) { + stop[id] = true; schedule_next(id); } @@ -143,13 +143,19 @@ static coroutine_fn void test_multi_co_schedule_entry(void *opaque) { g_assert(to_schedule[id] == NULL); - while (!qatomic_mb_read(&now_stopping)) { + /* + * The next iteration will set to_schedule[id] again, but once finish_cb + * is scheduled there is no guarantee that it will actually be woken up, + * so at that point it must not go to sleep. + */ + while (!stop[id]) { int n; n = g_test_rand_int_range(0, NUM_CONTEXTS); schedule_next(n); qatomic_mb_set(&to_schedule[id], qemu_coroutine_self()); + /* finish_cb can run here. */ qemu_coroutine_yield(); g_assert(to_schedule[id] == NULL); } @@ -161,7 +167,6 @@ static void test_multi_co_schedule(int seconds) int i; count_here = count_other = count_retry = 0; - now_stopping = false; create_aio_contexts(); for (i = 0; i < NUM_CONTEXTS; i++) { @@ -171,10 +176,10 @@ static void test_multi_co_schedule(int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + /* Guarantee that each AioContext is woken up from its last wait. */ for (i = 0; i < NUM_CONTEXTS; i++) { ctx_run(i, finish_cb, NULL); - to_schedule[i] = NULL; + g_assert(to_schedule[i] == NULL); } join_aio_contexts(); @@ -199,6 +204,7 @@ static uint32_t atomic_counter; static uint32_t running; static uint32_t counter; static CoMutex comutex; +static bool now_stopping; static void coroutine_fn test_multi_co_mutex_entry(void *opaque) { From 8f593ba9c5c96b1790cc6aceb95b5b83bbac92cd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 11:14:15 +0100 Subject: [PATCH 04/16] call_rcu: stop using mb_set/mb_read Use a store-release when enqueuing a new call_rcu, and a load-acquire when dequeuing; and read the tail after checking that node->next is consistent, which is the standard message passing pattern and it is clearer than mb_read/mb_set. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- util/rcu.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/util/rcu.c b/util/rcu.c index e5b6e52be6..30a7e22026 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -189,8 +189,22 @@ static void enqueue(struct rcu_head *node) struct rcu_head **old_tail; node->next = NULL; + + /* + * Make this node the tail of the list. The node will be + * used by further enqueue operations, but it will not + * be dequeued yet... + */ old_tail = qatomic_xchg(&tail, &node->next); - qatomic_mb_set(old_tail, node); + + /* + * ... until it is pointed to from another item in the list. + * In the meantime, try_dequeue() will find a NULL next pointer + * and loop. + * + * Synchronizes with qatomic_load_acquire() in try_dequeue(). + */ + qatomic_store_release(old_tail, node); } static struct rcu_head *try_dequeue(void) @@ -198,26 +212,31 @@ static struct rcu_head *try_dequeue(void) struct rcu_head *node, *next; retry: - /* Test for an empty list, which we do not expect. Note that for + /* Head is only written by this thread, so no need for barriers. */ + node = head; + + /* + * If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + next = qatomic_load_acquire(&node->next); + if (!next) { + return NULL; + } + + /* + * Test for an empty list, which we do not expect. Note that for * the consumer head and tail are always consistent. The head * is consistent because only the consumer reads/writes it. * The tail, because it is the first step in the enqueuing. * It is only the next pointers that might be inconsistent. */ - if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { + if (head == &dummy && qatomic_read(&tail) == &dummy.next) { abort(); } - /* If the head node has NULL in its next pointer, the value is - * wrong and we need to wait until its enqueuer finishes the update. - */ - node = head; - next = qatomic_mb_read(&head->next); - if (!next) { - return NULL; - } - - /* Since we are the sole consumer, and we excluded the empty case + /* + * Since we are the sole consumer, and we excluded the empty case * above, the queue will always have at least two nodes: the * dummy node, and the one being removed. So we do not need to update * the tail pointer. From 20f46806b3858b92e9d1b5cf586558d62bd5a913 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 13:12:50 +0100 Subject: [PATCH 05/16] tb-maint: do not use mb_read/mb_set The load side can use a relaxed load, which will surely happen before the work item is run by async_safe_run_on_cpu() or before double-checking under mmap_lock. The store side can use an atomic RMW operation. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- accel/tcg/tb-maint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index cb1f806f00..0dd173fbf0 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -746,7 +746,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) tcg_region_reset_all(); /* XXX: flush processor icache at this point if cache flush is expensive */ - qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); + qatomic_inc(&tb_ctx.tb_flush_count); done: mmap_unlock(); @@ -758,7 +758,7 @@ done: void tb_flush(CPUState *cpu) { if (tcg_enabled()) { - unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count); + unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count); if (cpu_in_exclusive_context(cpu)) { do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); From 2f38ff79abac8a0b779e73a025af0d0ec8911a7e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 May 2023 19:29:17 +0200 Subject: [PATCH 06/16] MAINTAINERS: add stanza for Kconfig files Signed-off-by: Paolo Bonzini --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 55102f4761..f757369373 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3910,6 +3910,16 @@ F: configure F: scripts/mtest2make.py F: tests/Makefile.include +Kconfig +M: Paolo Bonzini +S: Maintained +F: scripts/minikconf.py +F: docs/devel/kconfig.rst +F: Kconfig* +F: */Kconfig* +F: hw/*/Kconfig* +F: target/*/Kconfig* + GIT submodules M: Daniel P. Berrange S: Odd Fixes From 8cbfc530bc10a72874ab241faaba8c56e5515532 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 4 May 2023 10:13:51 +0200 Subject: [PATCH 07/16] include/qemu/osdep.h: Bump _WIN32_WINNT to the Windows 8 API Commit cf60ccc330 ("cutils: Introduce bundle mechanism") abandoned compatibility with Windows older than 8 - we should reflect this in our _WIN32_WINNT and set it to the value that corresponds to Windows 8. Signed-off-by: Thomas Huth Message-Id: <20230504081351.125140-1-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/osdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9eff0be95b..cc61b00ba9 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int); #ifdef _WIN32 /* as defined in sdkddkver.h */ #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */ +#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be >= the one from glib) */ #endif /* reduces the number of implicitly included headers */ #ifndef WIN32_LEAN_AND_MEAN From cca0a000d06f897411a8af4402e5d0522bbe450b Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 4 May 2023 15:53:06 -0500 Subject: [PATCH 08/16] target/i386: allow versioned CPUs to specify new cache_info New EPYC CPUs versions require small changes to their cache_info's. Because current QEMU x86 CPU definition does not support versioned cach_info, we would have to declare a new CPU type for each such case. To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", to allow new cache_info pointers to be specified for a new CPU version. Co-developed-by: Wei Huang Signed-off-by: Wei Huang Signed-off-by: Michael Roth Signed-off-by: Babu Moger Acked-by: Michael S. Tsirkin Message-Id: <20230504205313.225073-2-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 823320fe42..855d5abc7d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1620,6 +1620,7 @@ typedef struct X86CPUVersionDefinition { const char *alias; const char *note; PropValue *props; + const CPUCaches *const cache_info; } X86CPUVersionDefinition; /* Base definition for a CPU model */ @@ -5225,6 +5226,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) assert(vdef->version == version); } +static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, + X86CPUModel *model) +{ + const X86CPUVersionDefinition *vdef; + X86CPUVersion version = x86_cpu_model_resolve_version(model); + const CPUCaches *cache_info = model->cpudef->cache_info; + + if (version == CPU_VERSION_LEGACY) { + return cache_info; + } + + for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { + if (vdef->cache_info) { + cache_info = vdef->cache_info; + } + + if (vdef->version == version) { + break; + } + } + + assert(vdef->version == version); + return cache_info; +} + /* * Load data from X86CPUDefinition into a X86CPU object. * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. @@ -5257,7 +5283,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) } /* legacy-cache defaults to 'off' if CPU model provides cache info */ - cpu->legacy_cache = !def->cache_info; + cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; @@ -6736,14 +6762,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) /* Cache information initialization */ if (!cpu->legacy_cache) { - if (!xcc->model || !xcc->model->cpudef->cache_info) { + const CPUCaches *cache_info = + x86_cpu_get_versioned_cache_info(cpu, xcc->model); + + if (!xcc->model || !cache_info) { g_autofree char *name = x86_cpu_class_get_model_name(xcc); error_setg(errp, "CPU model '%s' doesn't support legacy-cache=off", name); return; } env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *xcc->model->cpudef->cache_info; + *cache_info; } else { /* Build legacy cache information */ env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; From d7c72735f618a7ee27ee109d8b1468193734606a Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 4 May 2023 15:53:07 -0500 Subject: [PATCH 09/16] target/i386: Add new EPYC CPU versions with updated cache_info Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. The only difference vs. older models is an updated cache_info with the 'complex_indexing' bit unset, since this bit is not currently defined for AMD and may cause problems should it be used for something else in the future. Setting this bit will also cause CPUID validation failures when running SEV-SNP guests. Signed-off-by: Michael Roth Signed-off-by: Babu Moger Acked-by: Michael S. Tsirkin Message-Id: <20230504205313.225073-3-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 855d5abc7d..90a650a836 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1729,6 +1729,56 @@ static const CPUCaches epyc_cache_info = { }, }; +static CPUCaches epyc_v4_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -1779,6 +1829,56 @@ static const CPUCaches epyc_rome_cache_info = { }, }; +static const CPUCaches epyc_rome_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -4113,6 +4213,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-v4 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v4_cache_info + }, { /* end of list */ } } }, @@ -4232,6 +4341,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Rome-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v3_cache_info + }, { /* end of list */ } } }, From bb039a230e6a7920d71d21fa9afee2653a678c48 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 May 2023 15:53:08 -0500 Subject: [PATCH 10/16] target/i386: Add a couple of feature bits in 8000_0008_EBX Add the following feature bits. amd-psfd : Predictive Store Forwarding Disable: PSF is a hardware-based micro-architectural optimization designed to improve the performance of code execution by predicting address dependencies between loads and stores. While SSBD (Speculative Store Bypass Disable) disables both PSF and speculative store bypass, PSFD only disables PSF. PSFD may be desirable for the software which is concerned with the speculative behavior of PSF but desires a smaller performance impact than setting SSBD. Depends on the following kernel commit: b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") stibp-always-on : Single Thread Indirect Branch Prediction mode has enhanced performance and may be left always on. The documentation for the features are available in the links below. a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, Revision B1 Processors b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING Signed-off-by: Babu Moger Acked-by: Michael S. Tsirkin Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip Message-Id: <20230504205313.225073-4-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- target/i386/cpu.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 90a650a836..25ba7d0837 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -933,10 +933,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, "wbnoinvd", NULL, NULL, "ibpb", NULL, "ibrs", "amd-stibp", - NULL, NULL, NULL, NULL, + NULL, "stibp-always-on", NULL, NULL, NULL, NULL, NULL, NULL, "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, - NULL, NULL, NULL, NULL, + "amd-psfd", NULL, NULL, NULL, }, .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, .tcg_features = 0, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8504aaac68..b45a97275b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -946,8 +946,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_8000_0008_EBX_IBRS (1U << 14) /* Single Thread Indirect Branch Predictors */ #define CPUID_8000_0008_EBX_STIBP (1U << 15) +/* STIBP mode has enhanced performance and may be left always on */ +#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) /* Speculative Store Bypass Disable */ #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Predictive Store Forwarding Disable */ +#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) From b70eec312b185197d639bff689007727e596afd1 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 May 2023 15:53:09 -0500 Subject: [PATCH 11/16] target/i386: Add feature bits for CPUID_Fn80000021_EAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following feature bits. no-nested-data-bp : Processor ignores nested data breakpoints. lfence-always-serializing : LFENCE instruction is always serializing. null-sel-cls-base : Null Selector Clears Base. When this bit is set, a null segment load clears the segment base. The documentation for the features are available in the links below. a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, Revision B1 Processors b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision 40332 4.05 Date October 2022 Signed-off-by: Babu Moger Acked-by: Michael S. Tsirkin Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf Message-Id: <20230504205313.225073-5-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 24 ++++++++++++++++++++++++ target/i386/cpu.h | 8 ++++++++ 2 files changed, 32 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 25ba7d0837..fd3909b5a3 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -942,6 +942,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_XSAVE] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -6168,6 +6184,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } break; + case 0x80000021: + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = *ecx = *edx = 0; + break; default: /* reserved values: zero */ *eax = 0; @@ -6597,6 +6617,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); } + if (env->features[FEAT_8000_0021_EAX]) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); + } + /* SGX requires CPUID[0x12] for EPC enumeration */ if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b45a97275b..b8c56936bd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -600,6 +600,7 @@ typedef enum FeatureWord { FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ + FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ @@ -953,6 +954,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Predictive Store Forwarding Disable */ #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) +/* Processor ignores nested data breakpoints */ +#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) +/* LFENCE is always serializing */ +#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Null Selector Clears Base */ +#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) From 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 May 2023 15:53:10 -0500 Subject: [PATCH 12/16] target/i386: Add missing feature bits in EPYC-Milan model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following feature bits for EPYC-Milan model and bump the version. vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support vpclmulqdq : Vector VPCLMULQDQ instruction support stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced performance and may be left Always on amd-psfd : Predictive Store Forward Disable no-nested-data-bp : Processor ignores nested data breakpoints lfence-always-serializing : LFENCE instruction is always serializing null-sel-clr-base : Null Selector Clears Base. When this bit is set, a null segment load clears the segment base These new features will be added in EPYC-Milan-v2. The "-cpu help" output after the change will be. x86 EPYC-Milan (alias configured by machine type) x86 EPYC-Milan-v1 AMD EPYC-Milan Processor x86 EPYC-Milan-v2 AMD EPYC-Milan Processor The documentation for the features are available in the links below. a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, Revision B1 Processors b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision 40332 4.05 Date October 2022 Signed-off-by: Babu Moger Acked-by: Michael S. Tsirkin Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf Message-Id: <20230504205313.225073-6-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index fd3909b5a3..3970463114 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1945,6 +1945,56 @@ static const CPUCaches epyc_milan_cache_info = { }, }; +static const CPUCaches epyc_milan_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -4423,6 +4473,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000001E, .model_id = "AMD EPYC-Milan Processor", .cache_info = &epyc_milan_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Milan-v2 Processor" }, + { "vaes", "on" }, + { "vpclmulqdq", "on" }, + { "stibp-always-on", "on" }, + { "amd-psfd", "on" }, + { "no-nested-data-bp", "on" }, + { "lfence-always-serializing", "on" }, + { "null-sel-clr-base", "on" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v2_cache_info + }, + { /* end of list */ } + } }, }; From 62a798d4bc2c3e767d94670776c77a7df274d7c5 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 May 2023 15:53:11 -0500 Subject: [PATCH 13/16] target/i386: Add VNMI and automatic IBRS feature bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following featute bits. vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the guest without using Event Injection mechanism meaning not required to track the guest NMI and intercepting the IRET. The presence of this feature is indicated via the CPUID function 0x8000000A_EDX[25]. automatic-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. It is a "set-and-forget" feature that means that, unlike e.g., s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation resources automatically across CPL transitions. The presence of this feature is indicated via the CPUID function 0x80000021_EAX[8]. The documention for the features are available in the links below. a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, Revision B1 Processors b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision 40332 4.05 Date October 2022 Signed-off-by: Santosh Shukla Signed-off-by: Kim Phillips Signed-off-by: Babu Moger Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf Message-Id: <20230504205313.225073-7-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- target/i386/cpu.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3970463114..6836d7fd1c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -809,7 +809,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "pfthreshold", "avic", NULL, "v-vmsave-vmload", "vgif", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "vnmi", NULL, NULL, "svme-addr-chk", NULL, NULL, NULL, }, .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, @@ -947,7 +947,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, NULL, NULL, "null-sel-clr-base", NULL, - NULL, NULL, NULL, NULL, + "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b8c56936bd..8ade71ab55 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -774,6 +774,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_SVM_AVIC (1U << 13) #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) #define CPUID_SVM_VGIF (1U << 16) +#define CPUID_SVM_VNMI (1U << 25) #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ @@ -960,6 +961,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) +/* Automatic IBRS */ +#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) From 166b1741884dd4fd7090b753cd7333868457a29b Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 May 2023 15:53:12 -0500 Subject: [PATCH 14/16] target/i386: Add EPYC-Genoa model to support Zen 4 processor series Adds the support for AMD EPYC Genoa generation processors. The model display for the new processor will be EPYC-Genoa. Adds the following new feature bits on top of the feature bits from the previous generation EPYC models. avx512f : AVX-512 Foundation instruction avx512dq : AVX-512 Doubleword & Quadword Instruction avx512ifma : AVX-512 Integer Fused Multiply Add instruction avx512cd : AVX-512 Conflict Detection instruction avx512bw : AVX-512 Byte and Word Instructions avx512vl : AVX-512 Vector Length Extension Instructions avx512vbmi : AVX-512 Vector Byte Manipulation Instruction avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction gfni : AVX-512 Galois Field New Instructions avx512_vnni : AVX-512 Vector Neural Network Instructions avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and Quadword Instructions avx512_bf16 : AVX-512 BFLOAT16 instructions la57 : 57-bit virtual address support (5-level Page Tables) vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the guest without using Event Injection mechanism meaning not required to track the guest NMI and intercepting the IRET. auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. It is a "set-and-forget" feature that means that, unlike e.g., s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation resources automatically across CPL transitions. Signed-off-by: Babu Moger Message-Id: <20230504205313.225073-8-babu.moger@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6836d7fd1c..4187759f10 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1995,6 +1995,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_genoa_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -4494,6 +4544,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .name = "EPYC-Genoa", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 25, + .model = 17, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_No_NESTED_DATA_BP | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | + CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Genoa Processor", + .cache_info = &epyc_genoa_cache_info, + }, }; /* From 23b2a3be999bd53cfac63325b8bc02a205f1fe5b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 May 2023 19:30:40 +0200 Subject: [PATCH 15/16] docs: clarify --without-default-devices --without-default-devices is a specialized option that should only be used when configs/devices/ is changed manually. Explain the model towards which we should tend, with respect to failures to start guests and to run "make check". Signed-off-by: Paolo Bonzini --- docs/devel/kconfig.rst | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index ac9453eba9..e3a544e463 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -282,9 +282,19 @@ want to change some lines in the first group, for example like this:: CONFIG_PCI_DEVICES=y #CONFIG_TEST_DEVICES=n -and/or pick a subset of the devices in those device groups. Right now -there is no single place that lists all the optional devices for -``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, +and/or pick a subset of the devices in those device groups. Without +further modifications to ``configs/devices/``, a system emulator built +without default devices might not do much more than start an empty +machine, and even then only if ``--nodefaults`` is specified on the +command line. Starting a VM *without* ``--nodefaults`` is allowed to +fail, but should never abort. Failures in ``make check`` with +``--without-default-devices`` are considered bugs in the test code: +the tests should either use ``--nodefaults``, and should be skipped +if a necessary device is not present in the build. Such failures +should not be worked around with ``select`` directives. + +Right now there is no single place that lists all the optional devices +for ``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, we expect that ``.mak`` files will be automatically generated, so that they will include all these symbols and some help text on what they do. From ef709860ea12ec59c4cd7373bd2fd7a4e50143ee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 May 2023 10:20:46 +0200 Subject: [PATCH 16/16] meson: leave unnecessary modules out of the build meson.build files choose whether to build modules based on foo.found() expressions. If a feature is enabled (e.g. --enable-gtk), these expressions are true even if the code is not used by any emulator, and this results in an unexpected difference between modular and non-modular builds. For non-modular builds, the files are not included in any binary, and therefore the source files are never processed. For modular builds, however, all .so files are unconditionally built by default, and therefore a normal "make" tries to build them. However, the corresponding trace-*.h files are absent due to this conditional: if have_system trace_events_subdirs += [ ... 'ui', ... ] endif which was added to avoid wasting time running tracetool on unused trace-events files. This causes a compilation failure; fix it by skipping module builds entirely if (depending on the module directory) have_block or have_system are false. Reported-by: Michael Tokarev Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- meson.build | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/meson.build b/meson.build index 229eb585f7..27782f8f52 100644 --- a/meson.build +++ b/meson.build @@ -3217,6 +3217,10 @@ modinfo_files = [] block_mods = [] softmmu_mods = [] foreach d, list : modules + if not (d == 'block' ? have_block : have_system) + continue + endif + foreach m, module_ss : list if enable_modules and targetos != 'windows' module_ss = module_ss.apply(config_all, strict: false)