From fb802acdc8b162084e9e60d42aeba79097d14d2b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 18 Mar 2025 15:03:48 +1000 Subject: [PATCH 01/12] ppc/spapr: Fix RTAS stopped state This change takes the CPUPPCState 'quiesced' field added for powernv hardware CPU core controls (used to stop and start cores), and extends it to spapr to model the "RTAS stopped" state. This prevents the schedulers attempting to run stopped CPUs unexpectedly, which can cause hangs and possibly other unexpected behaviour. The detail of the problematic situation is this: A KVM spapr guest boots with all secondary CPUs defined to be in the "RTAS stopped" state. In this state, the CPU is only responsive to the start-cpu RTAS call. This behaviour is modeled in QEMU with the start_powered_off feature, which sets ->halted on secondary CPUs at boot. ->halted=true looks like an idle / sleep / power-save state which typically is responsive to asynchronous interrupts, but spapr clears wake-on-interrupt bits in the LPCR SPR. This more-or-less works. Commit e8291ec16da8 ("target/ppc: fix timebase register reset state") recently caused the decrementer to expire sooner at boot, causing a decrementer exception on secondary CPUs in RTAS stopped state. This was not a problem on TCG, but KVM limits how a guest can modify LPCR, in particular it prevents the clearing of wake-on-interrupt bits, and so in the course of CPU register synchronisation, the LPCR as set by spapr to model the RTAS stopped state is overwritten with KVM's LPCR value, and that then causes QEMU's interrupt code to notice the expired decrementer exception, turn that into an interrupt, and set CPU_INTERRUPT_HARD. That causes the CPU to be kicked, and the KVM vCPU thread to loop calling kvm_cpu_exec(). kvm_cpu_exec() calls kvm_arch_process_async_events(), which on ppc just returns ->halted. This is still true, so it returns immediately with EXCP_HLT, and the vCPU never goes to sleep because qemu_wait_io_event() sees CPU_INTERRUPT_HARD is set. All this while the vCPU holds the bql. This causes the boot CPU to eventually lock up when it needs the bql. So make 'quiesced' represent the "RTAS stopped" state, and have it explicitly not respond to exceptions (interrupt conditions) rather than rely on machine register state to model that state. This matches the powernv quiesced state very well because it essentially turns off the CPU core via a side-band control unit. There are still issues with QEMU and KVM idea of LPCR diverging and that is quite ugly and fragile that should be fixed. spapr should synchronize its LPCR properly with KVM, and not try to use values that KVM does not support. Reported-by: Misbah Anjum N Tested-by: Misbah Anjum N Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 6 +++++- hw/ppc/spapr_cpu_core.c | 6 ++++++ hw/ppc/spapr_rtas.c | 5 ++++- target/ppc/cpu.h | 11 +++++++++++ target/ppc/excp_helper.c | 4 ++++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 99d9644ee3..a33977da18 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -248,21 +248,25 @@ static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr, if (val & PPC_BIT(7 + 8 * i)) { /* stop */ val &= ~PPC_BIT(7 + 8 * i); - cpu_pause(cs); env->quiesced = true; + ppc_maybe_interrupt(env); + cpu_pause(cs); } if (val & PPC_BIT(6 + 8 * i)) { /* start */ val &= ~PPC_BIT(6 + 8 * i); env->quiesced = false; + ppc_maybe_interrupt(env); cpu_resume(cs); } if (val & PPC_BIT(4 + 8 * i)) { /* sreset */ val &= ~PPC_BIT(4 + 8 * i); env->quiesced = false; + ppc_maybe_interrupt(env); pnv_cpu_do_nmi_resume(cs); } if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */ env->quiesced = false; + ppc_maybe_interrupt(env); /* * Hardware has very particular cases for where clear maint * must be used and where start must be used to resume a diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 0671d9e44b..faf9170ba6 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -37,6 +37,9 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) cpu_reset(cs); + env->quiesced = true; /* set "RTAS stopped" state. */ + ppc_maybe_interrupt(env); + /* * "PowerPC Processor binding to IEEE 1275" defines the initial MSR state * as 32bit (MSR_SF=0) with MSR_ME=1 and MSR_FP=1 in "8.2.1. Initial @@ -98,6 +101,9 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, CPU(cpu)->halted = 0; /* Enable Power-saving mode Exit Cause exceptions */ ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm); + + env->quiesced = false; /* clear "RTAS stopped" state. */ + ppc_maybe_interrupt(env); } /* diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 503d441b48..78309dbb09 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -110,7 +110,8 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, id = rtas_ld(args, 0); cpu = spapr_find_cpu(id); if (cpu != NULL) { - if (CPU(cpu)->halted) { + CPUPPCState *env = &cpu->env; + if (env->quiesced) { rtas_st(rets, 1, 0); } else { rtas_st(rets, 1, 2); @@ -215,6 +216,8 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr, * For the same reason, set PSSCR_EC. */ env->spr[SPR_PSSCR] |= PSSCR_EC; + env->quiesced = true; /* set "RTAS stopped" state. */ + ppc_maybe_interrupt(env); cs->halted = 1; ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm); kvmppc_set_reg_ppc_online(cpu, 0); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index efab54a068..3ee83517dc 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1356,6 +1356,17 @@ struct CPUArchState { * special way (such as routing some resume causes to 0x100, i.e. sreset). */ bool resume_as_sreset; + + /* + * On powernv, quiesced means the CPU has been stopped using PC direct + * control xscom registers. + * + * On spapr, quiesced means it is in the "RTAS stopped" state. + * + * The core halted/stopped variables aren't sufficient for this, because + * they can be changed with various side-band operations like qmp cont, + * powersave interrupts, etc. + */ bool quiesced; #endif diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 44e19aacd8..c941c89806 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1951,6 +1951,10 @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) target_ulong lpcr = env->spr[SPR_LPCR]; bool async_deliver; + if (unlikely(env->quiesced)) { + return 0; + } + #ifdef TARGET_PPC64 switch (env->excp_model) { case POWERPC_EXCP_POWER7: From 033a5649b45690d09bde5cdf15cb83453f6ac811 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 13:20:02 +1000 Subject: [PATCH 02/12] ppc/xive: Fix typo in crowd block level calculation I introduced this bug when "tidying" the original patch, not Frederic. Paper bag for me. Fixes: 9cb7f6ebed60 ("ppc/xive2: Support group-matching when looking for target") Signed-off-by: Nicholas Piggin --- hw/intc/xive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/intc/xive.c b/hw/intc/xive.c index c77df2c1f8..e86f274932 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -1686,7 +1686,7 @@ static uint8_t xive_get_group_level(bool crowd, bool ignore, * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported. * HW will encode level 4 as the value 3. See xive2_pgofnext(). */ - switch (level) { + switch (blk) { case 1: case 2: break; From 344921309d933547974c2e85c52e2294513d9c45 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 13:18:29 +1000 Subject: [PATCH 03/12] pnv/xive: Fix possible undefined shift error in group size calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity discovered a potential shift overflow in group size calculation in the case of a guest error. Add checks and logs to ensure a issues are caught. Make the group and crowd error checking code more similar to one another while here. Resolves: Coverity CID 1593724 Fixes: 9cb7f6ebed60 ("ppc/xive2: Support group-matching when looking for target") Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/xive.c | 27 ++++++++++++++++++++++++--- hw/intc/xive2.c | 19 ++++++++++++++----- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/hw/intc/xive.c b/hw/intc/xive.c index e86f274932..3eb28c2265 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -1662,12 +1662,20 @@ uint32_t xive_get_vpgroup_size(uint32_t nvp_index) * (starting with the least significant bits) in the NVP index * gives the size of the group. */ - return 1 << (ctz32(~nvp_index) + 1); + int first_zero = cto32(nvp_index); + if (first_zero >= 31) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x", + nvp_index); + return 0; + } + + return 1U << (first_zero + 1); } static uint8_t xive_get_group_level(bool crowd, bool ignore, uint32_t nvp_blk, uint32_t nvp_index) { + int first_zero; uint8_t level; if (!ignore) { @@ -1675,12 +1683,25 @@ static uint8_t xive_get_group_level(bool crowd, bool ignore, return 0; } - level = (ctz32(~nvp_index) + 1) & 0b1111; + first_zero = cto32(nvp_index); + if (first_zero >= 31) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x", + nvp_index); + return 0; + } + + level = (first_zero + 1) & 0b1111; if (crowd) { uint32_t blk; /* crowd level is bit position of first 0 from the right in nvp_blk */ - blk = ctz32(~nvp_blk) + 1; + first_zero = cto32(nvp_blk); + if (first_zero >= 31) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd block 0x%08x", + nvp_blk); + return 0; + } + blk = first_zero + 1; /* * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported. diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index f8ef615487..311b42e15d 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -1153,13 +1153,15 @@ static bool xive2_vp_match_mask(uint32_t cam1, uint32_t cam2, static uint8_t xive2_get_vp_block_mask(uint32_t nvt_blk, bool crowd) { - uint8_t size, block_mask = 0b1111; + uint8_t block_mask = 0b1111; /* 3 supported crowd sizes: 2, 4, 16 */ if (crowd) { - size = xive_get_vpgroup_size(nvt_blk); - if (size == 8) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of 8n"); + uint32_t size = xive_get_vpgroup_size(nvt_blk); + + if (size != 2 && size != 4 && size != 16) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of %d", + size); return block_mask; } block_mask &= ~(size - 1); @@ -1172,7 +1174,14 @@ static uint32_t xive2_get_vp_index_mask(uint32_t nvt_index, bool cam_ignore) uint32_t index_mask = 0xFFFFFF; /* 24 bits */ if (cam_ignore) { - index_mask &= ~(xive_get_vpgroup_size(nvt_index) - 1); + uint32_t size = xive_get_vpgroup_size(nvt_index); + + if (size < 2) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group size of %d", + size); + return index_mask; + } + index_mask &= ~(size - 1); } return index_mask; } From e0b9357337e4005d7915d8c746eb3ce66c61fac0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 13:20:49 +1000 Subject: [PATCH 04/12] ppc/xive2: Fix logical / bitwise comparison typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comparison as written is always false (perhaps confusingly, because the functions/macros are not really booleans but return 0 or the tested bit value). Change to use logical-and. Resolves: Coverity CID 1593721 Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/xive2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index 311b42e15d..7d584dfafa 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -1344,7 +1344,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - if (xive2_end_is_crowd(&end) & !xive2_end_is_ignore(&end)) { + if (xive2_end_is_crowd(&end) && !xive2_end_is_ignore(&end)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END, 'crowd' bit requires 'ignore' bit\n"); return; From 965797d19a0d0b5dbe73f1afa110576589d25003 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 13:49:36 +1000 Subject: [PATCH 05/12] ppc/spapr: Fix possible pa_features memory overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity reports a possible memory overflow in spapr_dt_pa_features(). This should not be a true bug since DAWR1 cap is only be true for CPU_POWERPC_LOGICAL_3_10. Add an assertion to ensure any bug there is caught. Resolves: Coverity CID 1593722 Fixes: 5f361ea187ba ("ppc: spapr: Enable 2nd DAWR on Power10 pSeries machine") Reviewed-By: Shivaprasad G Bhat Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a415e51d07..9865d7147f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -296,6 +296,7 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr, pa_features[40 + 2] &= ~0x80; /* Radix MMU */ } if (spapr_get_cap(spapr, SPAPR_CAP_DAWR1)) { + g_assert(pa_size > 66); pa_features[66] |= 0x80; } From ce5a32d18054fe468e3536f0a63026d5b196057b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 14:01:25 +1000 Subject: [PATCH 06/12] ppc/pnv: Move the PNOR LPC address into struct PnvPnor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than use the hardcoded define throughout the tree for the PNOR LPC address, keep it within the PnvPnor object. This should solve a dead code issue in the BMC HIOMAP checks where Coverity (correctly) reported that the sanity checks are dead code. We would like to keep the sanity checks without turning them into a compile time assert in case we would like to make them configurable in future. Fixes: 4c84a0a4a6e5 ("ppc/pnv: Add a PNOR address and size sanity checks") Resolves: Coverity CID 1593723 Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 2 +- hw/ppc/pnv_bmc.c | 4 ++-- hw/ppc/pnv_pnor.c | 2 ++ include/hw/ppc/pnv_pnor.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 59365370c3..63f2232f32 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1191,7 +1191,7 @@ static void pnv_init(MachineState *machine) * Since we can not reach the remote BMC machine with LPC memops, * map it always for now. */ - memory_region_add_subregion(pnv->chips[0]->fw_mr, PNOR_SPI_OFFSET, + memory_region_add_subregion(pnv->chips[0]->fw_mr, pnv->pnor->lpc_address, &pnv->pnor->mmio); /* diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c index 811ba3d7a4..fb70a8c1f2 100644 --- a/hw/ppc/pnv_bmc.c +++ b/hw/ppc/pnv_bmc.c @@ -174,8 +174,8 @@ static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, { PnvPnor *pnor = PNV_PNOR(object_property_get_link(OBJECT(ibs), "pnor", &error_abort)); + uint32_t pnor_addr = pnor->lpc_address; uint32_t pnor_size = pnor->size; - uint32_t pnor_addr = PNOR_SPI_OFFSET; bool readonly = false; rsp_buffer_push(rsp, cmd[2]); @@ -251,8 +251,8 @@ static const IPMINetfn hiomap_netfn = { void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor) { + uint32_t pnor_addr = pnor->lpc_address; uint32_t pnor_size = pnor->size; - uint32_t pnor_addr = PNOR_SPI_OFFSET; if (!pnv_bmc_is_simulator(bmc)) { return; diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c index 863e2e70ac..9db44ca21d 100644 --- a/hw/ppc/pnv_pnor.c +++ b/hw/ppc/pnv_pnor.c @@ -108,6 +108,8 @@ static void pnv_pnor_realize(DeviceState *dev, Error **errp) memset(s->storage, 0xFF, s->size); } + s->lpc_address = PNOR_SPI_OFFSET; + memory_region_init_io(&s->mmio, OBJECT(s), &pnv_pnor_ops, s, TYPE_PNV_PNOR, s->size); } diff --git a/include/hw/ppc/pnv_pnor.h b/include/hw/ppc/pnv_pnor.h index 19c2d642e8..b44cafe918 100644 --- a/include/hw/ppc/pnv_pnor.h +++ b/include/hw/ppc/pnv_pnor.h @@ -28,6 +28,7 @@ struct PnvPnor { BlockBackend *blk; uint8_t *storage; + uint32_t lpc_address; /* Offset within LPC FW space */ int64_t size; MemoryRegion mmio; }; From d8b1c3eaed5cf11d2db702a415df082dc1754b2c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Mar 2025 14:12:45 +1000 Subject: [PATCH 07/12] ppc/pnv: Fix system symbols in HOMER structure definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These definitions were taken from skiboot firmware. I naively thought it would be nicer to keep the code similar by using the preprocessor, but it was pointed out that system headers might still use those symbols and cause something unexpected. Also just nicer to keep the QEMU tree clean. Cc: "Philippe Mathieu-Daudé" Cc: "Stefan Hajnoczi" Reviewed-by: Thomas Huth Fixes: 70bc5c2498f46 ("ppc/pnv: Make HOMER memory a RAM region") Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_occ.c | 201 ++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 105 deletions(-) diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c index bda6b23ad3..177c5e514b 100644 --- a/hw/ppc/pnv_occ.c +++ b/hw/ppc/pnv_occ.c @@ -364,7 +364,12 @@ static void pnv_occ_register_types(void) type_init(pnv_occ_register_types); -/* From skiboot/hw/occ.c with tab to space conversion */ +/* + * From skiboot/hw/occ.c with following changes: + * - tab to space conversion + * - Type conversions u8->uint8_t s8->int8_t __be16->uint16_t etc + * - __packed -> QEMU_PACKED + */ /* OCC Communication Area for PStates */ #define OPAL_DYNAMIC_DATA_OFFSET 0x0B80 @@ -384,20 +389,6 @@ type_init(pnv_occ_register_types); #define FREQ_MAX_IN_DOMAIN 0 #define FREQ_MOST_RECENTLY_SET 1 -#define u8 uint8_t -#define s8 int8_t -#define u16 uint16_t -#define s16 int16_t -#define u32 uint32_t -#define s32 int32_t -#define u64 uint64_t -#define s64 int64_t -#define __be16 uint16_t -#define __be32 uint32_t -#ifndef __packed -#define __packed QEMU_PACKED -#endif /* !__packed */ - /** * OCC-OPAL Shared Memory Region * @@ -434,69 +425,69 @@ type_init(pnv_occ_register_types); * @spare/reserved/pad: Unused data */ struct occ_pstate_table { - u8 valid; - u8 version; - union __packed { - struct __packed { /* Version 0x01 and 0x02 */ - u8 throttle; - s8 pstate_min; - s8 pstate_nom; - s8 pstate_turbo; - s8 pstate_ultra_turbo; - u8 spare; - u64 reserved; - struct __packed { - s8 id; - u8 flags; - u8 vdd; - u8 vcs; - __be32 freq_khz; + uint8_t valid; + uint8_t version; + union QEMU_PACKED { + struct QEMU_PACKED { /* Version 0x01 and 0x02 */ + uint8_t throttle; + int8_t pstate_min; + int8_t pstate_nom; + int8_t pstate_turbo; + int8_t pstate_ultra_turbo; + uint8_t spare; + uint64_t reserved; + struct QEMU_PACKED { + int8_t id; + uint8_t flags; + uint8_t vdd; + uint8_t vcs; + uint32_t freq_khz; } pstates[MAX_PSTATES]; - s8 core_max[MAX_P8_CORES]; - u8 pad[100]; + int8_t core_max[MAX_P8_CORES]; + uint8_t pad[100]; } v2; - struct __packed { /* Version 0x90 */ - u8 occ_role; - u8 pstate_min; - u8 pstate_nom; - u8 pstate_turbo; - u8 pstate_ultra_turbo; - u8 spare; - u64 reserved1; - u64 reserved2; - struct __packed { - u8 id; - u8 flags; - u16 reserved; - __be32 freq_khz; + struct QEMU_PACKED { /* Version 0x90 */ + uint8_t occ_role; + uint8_t pstate_min; + uint8_t pstate_nom; + uint8_t pstate_turbo; + uint8_t pstate_ultra_turbo; + uint8_t spare; + uint64_t reserved1; + uint64_t reserved2; + struct QEMU_PACKED { + uint8_t id; + uint8_t flags; + uint16_t reserved; + uint32_t freq_khz; } pstates[MAX_PSTATES]; - u8 core_max[MAX_P9_CORES]; - u8 pad[56]; + uint8_t core_max[MAX_P9_CORES]; + uint8_t pad[56]; } v9; - struct __packed { /* Version 0xA0 */ - u8 occ_role; - u8 pstate_min; - u8 pstate_fixed_freq; - u8 pstate_base; - u8 pstate_ultra_turbo; - u8 pstate_fmax; - u8 minor; - u8 pstate_bottom_throttle; - u8 spare; - u8 spare1; - u32 reserved_32; - u64 reserved_64; - struct __packed { - u8 id; - u8 valid; - u16 reserved; - __be32 freq_khz; + struct QEMU_PACKED { /* Version 0xA0 */ + uint8_t occ_role; + uint8_t pstate_min; + uint8_t pstate_fixed_freq; + uint8_t pstate_base; + uint8_t pstate_ultra_turbo; + uint8_t pstate_fmax; + uint8_t minor; + uint8_t pstate_bottom_throttle; + uint8_t spare; + uint8_t spare1; + uint32_t reserved_32; + uint64_t reserved_64; + struct QEMU_PACKED { + uint8_t id; + uint8_t valid; + uint16_t reserved; + uint32_t freq_khz; } pstates[MAX_PSTATES]; - u8 core_max[MAX_P10_CORES]; - u8 pad[48]; + uint8_t core_max[MAX_P10_CORES]; + uint8_t pad[48]; } v10; }; -} __packed; +} QEMU_PACKED; /** * OPAL-OCC Command Response Interface @@ -531,13 +522,13 @@ struct occ_pstate_table { * @spare: Unused byte */ struct opal_command_buffer { - u8 flag; - u8 request_id; - u8 cmd; - u8 spare; - __be16 data_size; - u8 data[MAX_OPAL_CMD_DATA_LENGTH]; -} __packed; + uint8_t flag; + uint8_t request_id; + uint8_t cmd; + uint8_t spare; + uint16_t data_size; + uint8_t data[MAX_OPAL_CMD_DATA_LENGTH]; +} QEMU_PACKED; /** * OPAL-OCC Response Buffer @@ -571,13 +562,13 @@ struct opal_command_buffer { * @data: Response specific data */ struct occ_response_buffer { - u8 flag; - u8 request_id; - u8 cmd; - u8 status; - __be16 data_size; - u8 data[MAX_OCC_RSP_DATA_LENGTH]; -} __packed; + uint8_t flag; + uint8_t request_id; + uint8_t cmd; + uint8_t status; + uint16_t data_size; + uint8_t data[MAX_OCC_RSP_DATA_LENGTH]; +} QEMU_PACKED; /** * OCC-OPAL Shared Memory Interface Dynamic Data Vx90 @@ -608,31 +599,31 @@ struct occ_response_buffer { * @rsp: OCC Response Buffer */ struct occ_dynamic_data { - u8 occ_state; - u8 major_version; - u8 minor_version; - u8 gpus_present; - union __packed { - struct __packed { /* Version 0x90 */ - u8 spare1; + uint8_t occ_state; + uint8_t major_version; + uint8_t minor_version; + uint8_t gpus_present; + union QEMU_PACKED { + struct QEMU_PACKED { /* Version 0x90 */ + uint8_t spare1; } v9; - struct __packed { /* Version 0xA0 */ - u8 wof_enabled; + struct QEMU_PACKED { /* Version 0xA0 */ + uint8_t wof_enabled; } v10; }; - u8 cpu_throttle; - u8 mem_throttle; - u8 quick_pwr_drop; - u8 pwr_shifting_ratio; - u8 pwr_cap_type; - __be16 hard_min_pwr_cap; - __be16 max_pwr_cap; - __be16 cur_pwr_cap; - __be16 soft_min_pwr_cap; - u8 pad[110]; + uint8_t cpu_throttle; + uint8_t mem_throttle; + uint8_t quick_pwr_drop; + uint8_t pwr_shifting_ratio; + uint8_t pwr_cap_type; + uint16_t hard_min_pwr_cap; + uint16_t max_pwr_cap; + uint16_t cur_pwr_cap; + uint16_t soft_min_pwr_cap; + uint8_t pad[110]; struct opal_command_buffer cmd; struct occ_response_buffer rsp; -} __packed; +} QEMU_PACKED; enum occ_response_status { OCC_RSP_SUCCESS = 0x00, From 0cb6498b4ce8d0e800e85a43429919c208537405 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Thu, 20 Mar 2025 14:40:23 +1000 Subject: [PATCH 08/12] ppc/amigaone: Check blk_pwrite return value Coverity reported that return value of blk_pwrite() maybe should not be ignored. We can't do much if this happens other than report an error but let's do that to silence this report. Resolves: Coverity CID 1593725 Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Message-ID: <20250314200140.2DBE74E6069@zero.eik.bme.hu> Signed-off-by: Nicholas Piggin --- hw/ppc/amigaone.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/ppc/amigaone.c b/hw/ppc/amigaone.c index 483512125f..5d787c3059 100644 --- a/hw/ppc/amigaone.c +++ b/hw/ppc/amigaone.c @@ -108,8 +108,8 @@ static void nvram_write(void *opaque, hwaddr addr, uint64_t val, uint8_t *p = memory_region_get_ram_ptr(&s->mr); p[addr] = val; - if (s->blk) { - blk_pwrite(s->blk, addr, 1, &val, 0); + if (s->blk && blk_pwrite(s->blk, addr, 1, &val, 0) < 0) { + error_report("%s: could not write %s", __func__, blk_name(s->blk)); } } @@ -151,15 +151,17 @@ static void nvram_realize(DeviceState *dev, Error **errp) *c = cpu_to_be32(CRC32_DEFAULT_ENV); /* Also copies terminating \0 as env is terminated by \0\0 */ memcpy(p + 4, default_env, sizeof(default_env)); - if (s->blk) { - blk_pwrite(s->blk, 0, sizeof(crc) + sizeof(default_env), p, 0); + if (s->blk && + blk_pwrite(s->blk, 0, sizeof(crc) + sizeof(default_env), p, 0) < 0 + ) { + error_report("%s: could not write %s", __func__, blk_name(s->blk)); } return; } if (*c == 0) { *c = cpu_to_be32(crc32(0, p + 4, NVRAM_SIZE - 4)); - if (s->blk) { - blk_pwrite(s->blk, 0, 4, p, 0); + if (s->blk && blk_pwrite(s->blk, 0, 4, p, 0) < 0) { + error_report("%s: could not write %s", __func__, blk_name(s->blk)); } } if (be32_to_cpu(*c) != crc) { From 667413f5bfe3c1c4f082c9534b84e70c1ef3ff3a Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Thu, 20 Mar 2025 14:41:31 +1000 Subject: [PATCH 09/12] ppc/amigaone: Constify default_env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable holding default env is not supposed to be written. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20250314200145.08E0F4E6067@zero.eik.bme.hu> Signed-off-by: Nicholas Piggin --- hw/ppc/amigaone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/amigaone.c b/hw/ppc/amigaone.c index 5d787c3059..e9407a51b5 100644 --- a/hw/ppc/amigaone.c +++ b/hw/ppc/amigaone.c @@ -63,7 +63,7 @@ static const char dummy_fw[] = { #define NVRAM_ADDR 0xfd0e0000 #define NVRAM_SIZE (4 * KiB) -static char default_env[] = +static const char default_env[] = "baudrate=115200\0" "stdout=vga\0" "stdin=ps2kbd\0" From 1490d0bcdfcb78b4503cae42353d3dd50f4e9d96 Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Thu, 20 Mar 2025 14:50:24 +1000 Subject: [PATCH 10/12] ppc/spapr: fix default cpu for pre-9.0 machines. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When POWER10 CPU was made as default, we missed keeping POWER9 as default for older pseries releases (pre-9.0) at that time. This caused breakge in default cpu evaluation for older pseries machines and hence this fix. Fixes: 51113013f3 ("ppc/spapr: change pseries machine default to POWER10 CPU") Cc: qemu-stable@nongnu.org Signed-off-by: Harsh Prateek Bora Reviewed-by: Daniel P. Berrangé Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20250313094705.2361997-1-harshpb@linux.ibm.com> Signed-off-by: Nicholas Piggin --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9865d7147f..b0a0f8c689 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4816,6 +4816,7 @@ static void spapr_machine_8_2_class_options(MachineClass *mc) { spapr_machine_9_0_class_options(mc); compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.2"); } DEFINE_SPAPR_MACHINE(8, 2); From 8defe9da08135d03e054f20cb8fea4389be96e18 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 Mar 2025 21:39:59 +1000 Subject: [PATCH 11/12] target/ppc: Fix facility interrupt checks for VSX Facility interrupt checks in general should come after the ISA version check, because the facility interrupt and facility type themselves are ISA dependent and should not appear on CPUs where the instruction does not exist at all. This resolves a QEMU crash booting NetBSD/macppc due to qemu: fatal: Raised an exception without defined vector 94 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2741 Cc: Chinmay Rath Cc: qemu-stable@nongnu.org Debugged-by: Richard Henderson Reviewed-by: Richard Henderson Fixes: aa0f34ec3fc7 ("target/ppc: implement vrlq") Fixes: 7419dc5b2b5b ("target/ppc: Move VSX vector storage access insns to decodetree.") Signed-off-by: Nicholas Piggin --- target/ppc/translate/vmx-impl.c.inc | 2 +- target/ppc/translate/vsx-impl.c.inc | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 70d0ad2e71..92d6e8c603 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -994,8 +994,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask, { TCGv_i64 ah, al, vrb, n, t0, t1, zero = tcg_constant_i64(0); - REQUIRE_VECTOR(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); ah = tcg_temp_new_i64(); al = tcg_temp_new_i64(); diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index a869f30e86..00ad57c628 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -61,8 +61,8 @@ static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) TCGv EA; TCGv_i64 t0; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); @@ -80,8 +80,8 @@ static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a) TCGv EA; TCGv_i64 xth, xtl; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); @@ -113,12 +113,12 @@ static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a) TCGv EA; TCGv_i32 data; + REQUIRE_INSNS_FLAGS2(ctx, ISA300); if (a->rt < 32) { REQUIRE_VSX(ctx); } else { REQUIRE_VECTOR(ctx); } - REQUIRE_INSNS_FLAGS2(ctx, ISA300); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); @@ -133,8 +133,8 @@ static bool trans_LXVDSX(DisasContext *ctx, arg_LXVDSX *a) TCGv EA; TCGv_i64 data; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); @@ -185,8 +185,8 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) TCGv EA; TCGv_i64 xth, xtl; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); @@ -208,8 +208,8 @@ static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) TCGv EA; TCGv_i128 data; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); @@ -312,8 +312,8 @@ static bool trans_STXVD2X(DisasContext *ctx, arg_STXVD2X *a) TCGv EA; TCGv_i64 t0; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); @@ -331,8 +331,8 @@ static bool trans_STXVW4X(DisasContext *ctx, arg_STXVW4X *a) TCGv EA; TCGv_i64 xsh, xsl; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); xsh = tcg_temp_new_i64(); xsl = tcg_temp_new_i64(); @@ -364,8 +364,8 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) TCGv EA; TCGv_i64 xsh, xsl; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); xsh = tcg_temp_new_i64(); xsl = tcg_temp_new_i64(); @@ -394,8 +394,8 @@ static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) TCGv EA; TCGv_i128 data; - REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); From 73c0c904fc99e2ceecbbded84ec76d40d3f2daae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 Mar 2025 22:24:40 +1000 Subject: [PATCH 12/12] target/ppc: Fix e200 duplicate SPRs DSRR0/1 registers are in the BookE ISA not e200 specific, so remove the duplicate e200 register definitions. Cc: Roman Kapl Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2768 Fixes: 0e3bf4890906 ("ppc: add DBCR based debugging") Signed-off-by: Nicholas Piggin --- target/ppc/cpu_init.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 8b590e7f17..7decc09aec 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -2744,14 +2744,6 @@ static void init_proc_e200(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); /* TOFIX */ - spr_register(env, SPR_BOOKE_DSRR0, "DSRR0", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, - 0x00000000); - spr_register(env, SPR_BOOKE_DSRR1, "DSRR1", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, - 0x00000000); init_tlbs_emb(env); init_excp_e200(env, 0xFFFF0000UL);