From ab544de12787035edb7ad4994a80f9cd6a6b55d7 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Thu, 20 Feb 2025 13:38:31 -0800
Subject: [PATCH 01/43] hw/arm/smmuv3: Fill u.f_cd_fetch.addr for
 SMMU_EVT_F_CD_FETCH

When we fill in the SMMUEventInfo for SMMU_EVT_F_CD_FETCH we write
the address into the f_ste_fetch member of the union, but then when
we come to read it back in smmuv3_record_event() we will (correctly)
be using the f_cd_fetch member.

This is more like a cosmetics fix since the f_cd_fetch and f_ste_fetch are
basically the same field since they are in the exact same union with exact
same type, but it's conceptually wrong. Use the correct union member.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Message-id: 20250220213832.80289-1-nicolinc@nvidia.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/smmuv3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index b49a59b64c..b40acbe024 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -377,7 +377,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Cannot fetch pte at address=0x%"PRIx64"\n", addr);
         event->type = SMMU_EVT_F_CD_FETCH;
-        event->u.f_ste_fetch.addr = addr;
+        event->u.f_cd_fetch.addr = addr;
         return -EINVAL;
     }
     for (i = 0; i < ARRAY_SIZE(buf->word); i++) {

From f10104aeae3a17f181d5bb37b7fd7dad7fe86cba Mon Sep 17 00:00:00 2001
From: "Matthew R. Ochs" <mochs@nvidia.com>
Date: Fri, 21 Feb 2025 06:54:19 -0800
Subject: [PATCH 02/43] hw/arm/virt: Support larger highmem MMIO regions

The MMIO region size required to support virtualized environments with
large PCI BAR regions can exceed the hardcoded limit configured in QEMU.
For example, a VM with multiple NVIDIA Grace-Hopper GPUs passed through
requires more MMIO memory than the amount provided by VIRT_HIGH_PCIE_MMIO
(currently 512GB). Instead of updating VIRT_HIGH_PCIE_MMIO, introduce a
new parameter, highmem-mmio-size, that specifies the MMIO size required
to support the VM configuration.

Example usage with 1TB MMIO region size:
	-machine virt,gic-version=3,highmem-mmio-size=1T

Signed-off-by: Matthew R. Ochs <mochs@nvidia.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Message-id: 20250221145419.1281890-1-mochs@nvidia.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/virt.rst |  4 ++++
 hw/arm/virt.c            | 52 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 0c9c2ce035..adf446c0a2 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -144,6 +144,10 @@ highmem-mmio
   Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
   The default is ``on``.
 
+highmem-mmio-size
+  Set the high memory region size for PCI MMIO. Must be a power of 2 and
+  greater than or equal to the default size (512G).
+
 gic-version
   Specify the version of the Generic Interrupt Controller (GIC) to provide.
   Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4a5a9666e9..ee69081ef4 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -53,6 +53,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qemu/bitops.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
@@ -192,6 +193,10 @@ static const MemMapEntry base_memmap[] = {
     [VIRT_MEM] =                { GiB, LEGACY_RAMLIMIT_BYTES },
 };
 
+/* Update the docs for highmem-mmio-size when changing this default */
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE_GB 512
+#define DEFAULT_HIGH_PCIE_MMIO_SIZE (DEFAULT_HIGH_PCIE_MMIO_SIZE_GB * GiB)
+
 /*
  * Highmem IO Regions: This memory map is floating, located after the RAM.
  * Each MemMapEntry base (GPA) will be dynamically computed, depending on the
@@ -207,13 +212,16 @@ static const MemMapEntry base_memmap[] = {
  * PA space for one specific region is always reserved, even if the region
  * has been disabled or doesn't fit into the PA space. However, the PA space
  * for the region won't be reserved in these circumstances with compact layout.
+ *
+ * Note that the highmem-mmio-size property will update the high PCIE MMIO size
+ * field in this array.
  */
 static MemMapEntry extended_memmap[] = {
     /* Additional 64 MB redist region (can contain up to 512 redistributors) */
     [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
     [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
     /* Second PCIe window */
-    [VIRT_HIGH_PCIE_MMIO] =     { 0x0, 512 * GiB },
+    [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
 };
 
 static const int a15irqmap[] = {
@@ -2550,6 +2558,40 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
     vms->highmem_mmio = value;
 }
 
+static void virt_get_highmem_mmio_size(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size;
+
+    visit_type_size(v, name, &size, errp);
+}
+
+static void virt_set_highmem_mmio_size(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    uint64_t size;
+
+    if (!visit_type_size(v, name, &size, errp)) {
+        return;
+    }
+
+    if (!is_power_of_2(size)) {
+        error_setg(errp, "highmem-mmio-size is not a power of 2");
+        return;
+    }
+
+    if (size < DEFAULT_HIGH_PCIE_MMIO_SIZE) {
+        char *sz = size_to_str(DEFAULT_HIGH_PCIE_MMIO_SIZE);
+        error_setg(errp, "highmem-mmio-size cannot be set to a lower value "
+                         "than the default (%s)", sz);
+        g_free(sz);
+        return;
+    }
+
+    extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size;
+}
 
 static bool virt_get_its(Object *obj, Error **errp)
 {
@@ -3207,6 +3249,14 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
                                           "Set on/off to enable/disable high "
                                           "memory region for PCI MMIO");
 
+    object_class_property_add(oc, "highmem-mmio-size", "size",
+                                   virt_get_highmem_mmio_size,
+                                   virt_set_highmem_mmio_size,
+                                   NULL, NULL);
+    object_class_property_set_description(oc, "highmem-mmio-size",
+                                          "Set the high memory region size "
+                                          "for PCI MMIO");
+
     object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
                                   virt_set_gic_version);
     object_class_property_set_description(oc, "gic-version",

From cacf4cb2516aa4de94aa80fecb08be4dafa5ed44 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:09 +0000
Subject: [PATCH 03/43] monitor/hmp-cmds.c: Clean up hmp_dumpdtb printf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In hmp_dumpdtb(), we print a message when the command succeeds.  This
message is missing the trailing \n, so the HMP command prompt is
printed immediately after it.  We also weren't capitalizing 'DTB', or
quoting the filename in the message.  Fix these nits.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250206151214.2947842-2-peter.maydell@linaro.org
---
 monitor/hmp-cmds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 3825ff40a9..7ded3378cf 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -431,6 +431,6 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    monitor_printf(mon, "dtb dumped to %s", filename);
+    monitor_printf(mon, "DTB dumped to '%s'\n", filename);
 }
 #endif

From 3c25f487bc0672bf13473f4a7235c3ef592c954c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:10 +0000
Subject: [PATCH 04/43] hw/openrisc: Support monitor dumpdtb command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The openrisc machines don't set MachineState::fdt to point to their
DTB blob.  This means that although the command line '-machine
dumpdtb=file.dtb' option works, the equivalent QMP and HMP monitor
commands do not, but instead produce the error "This machine doesn't
have a FDT".

Set MachineState::fdt in openrisc_load_fdt(), when we write it to
guest memory.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250206151214.2947842-3-peter.maydell@linaro.org
---
 hw/openrisc/boot.c         | 7 +++++--
 hw/openrisc/openrisc_sim.c | 2 +-
 hw/openrisc/virt.c         | 2 +-
 include/hw/openrisc/boot.h | 3 ++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c
index 0f08df812d..72e2756af0 100644
--- a/hw/openrisc/boot.c
+++ b/hw/openrisc/boot.c
@@ -90,8 +90,8 @@ hwaddr openrisc_load_initrd(void *fdt, const char *filename,
     return start + size;
 }
 
-uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
-                           uint64_t mem_size)
+uint32_t openrisc_load_fdt(MachineState *ms, void *fdt,
+                           hwaddr load_start, uint64_t mem_size)
 {
     uint32_t fdt_addr;
     int ret;
@@ -111,6 +111,9 @@ uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
     /* copy in the device tree */
     qemu_fdt_dumpdtb(fdt, fdtsize);
 
+    /* Save FDT for dumpdtb monitor command */
+    ms->fdt = fdt;
+
     rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
                           &address_space_memory);
     qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index e0da4067ba..d9e0744922 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -354,7 +354,7 @@ static void openrisc_sim_init(MachineState *machine)
                                              machine->initrd_filename,
                                              load_addr, machine->ram_size);
         }
-        boot_info.fdt_addr = openrisc_load_fdt(state->fdt, load_addr,
+        boot_info.fdt_addr = openrisc_load_fdt(machine, state->fdt, load_addr,
                                                machine->ram_size);
     }
 }
diff --git a/hw/openrisc/virt.c b/hw/openrisc/virt.c
index 7b60bf8509..9afe407b00 100644
--- a/hw/openrisc/virt.c
+++ b/hw/openrisc/virt.c
@@ -540,7 +540,7 @@ static void openrisc_virt_init(MachineState *machine)
                                              machine->initrd_filename,
                                              load_addr, machine->ram_size);
         }
-        boot_info.fdt_addr = openrisc_load_fdt(state->fdt, load_addr,
+        boot_info.fdt_addr = openrisc_load_fdt(machine, state->fdt, load_addr,
                                                machine->ram_size);
     }
 }
diff --git a/include/hw/openrisc/boot.h b/include/hw/openrisc/boot.h
index 25a313d63a..9b4d88072c 100644
--- a/include/hw/openrisc/boot.h
+++ b/include/hw/openrisc/boot.h
@@ -20,6 +20,7 @@
 #define OPENRISC_BOOT_H
 
 #include "exec/cpu-defs.h"
+#include "hw/boards.h"
 
 hwaddr openrisc_load_kernel(ram_addr_t ram_size,
                             const char *kernel_filename,
@@ -28,7 +29,7 @@ hwaddr openrisc_load_kernel(ram_addr_t ram_size,
 hwaddr openrisc_load_initrd(void *fdt, const char *filename,
                             hwaddr load_start, uint64_t mem_size);
 
-uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
+uint32_t openrisc_load_fdt(MachineState *ms, void *fdt, hwaddr load_start,
                            uint64_t mem_size);
 
 #endif /* OPENRISC_BOOT_H */

From dfd0de718662a58ef2f2ef051939ed4b1a4d5ea7 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:11 +0000
Subject: [PATCH 05/43] hw/mips/boston: Check for error return from
 boston_fdt_filter()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function boston_fdt_filter() can return NULL on errors (in which
case it will print an error message).  When we call this from the
non-FIT-image codepath, we aren't checking the return value, so we
will plough on with a NULL pointer, and segfault in fdt_totalsize().
Check for errors here.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250206151214.2947842-4-peter.maydell@linaro.org
---
 hw/mips/boston.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 4690b254dd..de6ce1f163 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -810,6 +810,10 @@ static void boston_mach_init(MachineState *machine)
 
             dtb_load_data = boston_fdt_filter(s, dtb_file_data,
                                               NULL, &dtb_vaddr);
+            if (!dtb_load_data) {
+                /* boston_fdt_filter() already printed the error for us */
+                exit(1);
+            }
 
             /* Calculate real fdt size after filter */
             dt_size = fdt_totalsize(dtb_load_data);

From db0dd33559ee97a1fe84a1272258646279aca2e2 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:12 +0000
Subject: [PATCH 06/43] hw/mips/boston: Support dumpdtb monitor commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The boston machine doesn't set MachineState::fdt to the DTB blob that
it has loaded or created, which means that the QMP/HMP dumpdtb
monitor commands don't work.

Setting MachineState::fdt is easy in the non-FIT codepath: we can
simply do so immediately before loading the DTB into guest memory.
The FIT codepath is a bit more awkward as currently the FIT loader
throws away the memory that the FDT was in after it loads it into
guest memory.  So we add a void *pfdt argument to load_fit() for it
to store the FDT pointer into.

There is some readjustment required of the pointer handling in
loader-fit.c, so that it applies 'const' only where it should (e.g.
the data pointer we get back from fdt_getprop() is const, because
it's into the middle of the input FDT data, but the pointer that
fit_load_image_alloc() should not be const, because it's freshly
allocated memory that the caller can change if it likes).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250206151214.2947842-5-peter.maydell@linaro.org
---
 hw/core/loader-fit.c    | 38 +++++++++++++++++++++-----------------
 hw/mips/boston.c        | 11 +++++++----
 include/hw/loader-fit.h | 21 ++++++++++++++++++---
 3 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
index 9bdd4fa17c..6eb66406b0 100644
--- a/hw/core/loader-fit.c
+++ b/hw/core/loader-fit.c
@@ -32,8 +32,8 @@
 
 #define FIT_LOADER_MAX_PATH (128)
 
-static const void *fit_load_image_alloc(const void *itb, const char *name,
-                                        int *poff, size_t *psz, Error **errp)
+static void *fit_load_image_alloc(const void *itb, const char *name,
+                                  int *poff, size_t *psz, Error **errp)
 {
     const void *data;
     const char *comp;
@@ -80,11 +80,11 @@ static const void *fit_load_image_alloc(const void *itb, const char *name,
             return NULL;
         }
 
-        data = g_realloc(uncomp_data, uncomp_len);
+        uncomp_data = g_realloc(uncomp_data, uncomp_len);
         if (psz) {
             *psz = uncomp_len;
         }
-        return data;
+        return uncomp_data;
     }
 
     error_setg(errp, "unknown compression '%s'", comp);
@@ -177,13 +177,12 @@ out:
 
 static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
                         int cfg, void *opaque, const void *match_data,
-                        hwaddr kernel_end, Error **errp)
+                        hwaddr kernel_end, void **pfdt, Error **errp)
 {
     ERRP_GUARD();
     Error *err = NULL;
     const char *name;
-    const void *data;
-    const void *load_data;
+    void *data;
     hwaddr load_addr;
     int img_off;
     size_t sz;
@@ -194,7 +193,7 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
         return 0;
     }
 
-    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz, errp);
+    data = fit_load_image_alloc(itb, name, &img_off, &sz, errp);
     if (!data) {
         error_prepend(errp, "unable to load FDT image from FIT: ");
         return -EINVAL;
@@ -211,19 +210,23 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
     }
 
     if (ldr->fdt_filter) {
-        load_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+        void *filtered_data;
+
+        filtered_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+        if (filtered_data != data) {
+            g_free(data);
+            data = filtered_data;
+        }
     }
 
     load_addr = ldr->addr_to_phys(opaque, load_addr);
-    sz = fdt_totalsize(load_data);
-    rom_add_blob_fixed(name, load_data, sz, load_addr);
+    sz = fdt_totalsize(data);
+    rom_add_blob_fixed(name, data, sz, load_addr);
 
-    ret = 0;
+    *pfdt = data;
+    return 0;
 out:
     g_free((void *) data);
-    if (data != load_data) {
-        g_free((void *) load_data);
-    }
     return ret;
 }
 
@@ -259,7 +262,8 @@ out:
     return ret;
 }
 
-int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
+int load_fit(const struct fit_loader *ldr, const char *filename,
+             void **pfdt, void *opaque)
 {
     Error *err = NULL;
     const struct fit_loader_match *match;
@@ -323,7 +327,7 @@ int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
         goto out;
     }
 
-    ret = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end,
+    ret = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end, pfdt,
                        &err);
     if (ret) {
         error_report_err(err);
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index de6ce1f163..79410dabe7 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -358,8 +358,8 @@ static void gen_firmware(void *p, hwaddr kernel_entry, hwaddr fdt_addr)
                        kernel_entry);
 }
 
-static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
-                                     const void *match_data, hwaddr *load_addr)
+static void *boston_fdt_filter(void *opaque, const void *fdt_orig,
+                               const void *match_data, hwaddr *load_addr)
 {
     BostonState *s = BOSTON(opaque);
     MachineState *machine = s->mach;
@@ -797,7 +797,7 @@ static void boston_mach_init(MachineState *machine)
         if (kernel_size > 0) {
             int dt_size;
             g_autofree const void *dtb_file_data = NULL;
-            g_autofree const void *dtb_load_data = NULL;
+            void *dtb_load_data = NULL;
             hwaddr dtb_paddr = QEMU_ALIGN_UP(kernel_high, 64 * KiB);
             hwaddr dtb_vaddr = cpu_mips_phys_to_kseg0(NULL, dtb_paddr);
 
@@ -815,6 +815,8 @@ static void boston_mach_init(MachineState *machine)
                 exit(1);
             }
 
+            machine->fdt = dtb_load_data;
+
             /* Calculate real fdt size after filter */
             dt_size = fdt_totalsize(dtb_load_data);
             rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr);
@@ -822,7 +824,8 @@ static void boston_mach_init(MachineState *machine)
                                 rom_ptr(dtb_paddr, dt_size));
         } else {
             /* Try to load file as FIT */
-            fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+            fit_err = load_fit(&boston_fit_loader, machine->kernel_filename,
+                               &machine->fdt, s);
             if (fit_err) {
                 error_report("unable to load kernel image");
                 exit(1);
diff --git a/include/hw/loader-fit.h b/include/hw/loader-fit.h
index 0832e379dc..9a43490ed6 100644
--- a/include/hw/loader-fit.h
+++ b/include/hw/loader-fit.h
@@ -30,12 +30,27 @@ struct fit_loader_match {
 struct fit_loader {
     const struct fit_loader_match *matches;
     hwaddr (*addr_to_phys)(void *opaque, uint64_t addr);
-    const void *(*fdt_filter)(void *opaque, const void *fdt,
-                              const void *match_data, hwaddr *load_addr);
+    void *(*fdt_filter)(void *opaque, const void *fdt,
+                        const void *match_data, hwaddr *load_addr);
     const void *(*kernel_filter)(void *opaque, const void *kernel,
                                  hwaddr *load_addr, hwaddr *entry_addr);
 };
 
-int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque);
+/**
+ * load_fit: load a FIT format image
+ * @ldr: structure defining board specific properties and hooks
+ * @filename: image to load
+ * @pfdt: pointer to update with address of FDT blob
+ * @opaque: opaque value passed back to the hook functions in @ldr
+ * Returns: 0 on success, or a negative errno on failure
+ *
+ * @pfdt is used to tell the caller about the FDT blob. On return, it
+ * has been set to point to the FDT blob, and it is now the caller's
+ * responsibility to free that memory with g_free(). Usually the caller
+ * will want to pass in &machine->fdt here, to record the FDT blob for
+ * the dumpdtb option and QMP/HMP commands.
+ */
+int load_fit(const struct fit_loader *ldr, const char *filename, void **pfdt,
+             void *opaque);
 
 #endif /* HW_LOADER_FIT_H */

From 8fd2518ef2f8d34dc9ee53d6915a2a610eb1a659 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:13 +0000
Subject: [PATCH 07/43] hw: Centralize handling of -machine dumpdtb option

Currently we handle the 'dumpdtb' machine sub-option ad-hoc in every
board model that has an FDT.  It's up to the board code to make sure
it calls qemu_fdt_dumpdtb() in the right place.

This means we're inconsistent and often just ignore the user's
command line argument:
 * if the board doesn't have an FDT at all
 * if the board supports FDT, but there happens not to be one
   present (usually because of a missing -fdt option)

This isn't very helpful because it gives the user no clue why their
option was ignored.

However, in order to support the QMP/HMP dumpdtb commands we require
now that every FDT machine stores a pointer to the FDT in
MachineState::fdt.  This means we can handle -machine dumpdtb
centrally by calling the qmp_dumpdtb() function, unifying its
handling with the QMP/HMP commands.  All the board code calls to
qemu_fdt_dumpdtb() can then be removed.

For this commit we retain the existing behaviour that if there
is no FDT we silently ignore the -machine dumpdtb option.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/arm/boot.c                 |  2 --
 hw/core/machine.c             | 25 +++++++++++++++++++++++++
 hw/loongarch/virt-fdt-build.c |  1 -
 hw/mips/boston.c              |  1 -
 hw/openrisc/boot.c            |  1 -
 hw/ppc/e500.c                 |  1 -
 hw/ppc/pegasos2.c             |  1 -
 hw/ppc/pnv.c                  |  1 -
 hw/ppc/spapr.c                |  1 -
 hw/riscv/boot.c               |  2 --
 include/system/device_tree.h  |  2 --
 system/device_tree.c          | 15 ---------------
 12 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 42c18355e8..e296b62fa1 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -661,8 +661,6 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
         binfo->modify_dtb(binfo, fdt);
     }
 
-    qemu_fdt_dumpdtb(fdt, size);
-
     /* Put the DTB into the memory map as a ROM image: this will ensure
      * the DTB is copied again upon reset, even if addr points into RAM.
      */
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 02cff735b3..61c22f723a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -19,6 +19,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-machine.h"
+#include "qapi/qapi-commands-machine.h"
 #include "qemu/madvise.h"
 #include "qom/object_interfaces.h"
 #include "system/cpus.h"
@@ -1696,6 +1697,24 @@ void qemu_remove_machine_init_done_notifier(Notifier *notify)
     notifier_remove(notify);
 }
 
+static void handle_machine_dumpdtb(MachineState *ms)
+{
+    if (!ms->dumpdtb) {
+        return;
+    }
+    if (!ms->fdt) {
+        /* Silently ignore dumpdtb option if there is nothing to dump */
+        return;
+    }
+#ifdef CONFIG_FDT
+    qmp_dumpdtb(ms->dumpdtb, &error_fatal);
+    exit(0);
+#else
+    error_report("This machine doesn't have an FDT");
+    exit(1);
+#endif
+}
+
 void qdev_machine_creation_done(void)
 {
     cpu_synchronize_all_post_init();
@@ -1712,6 +1731,12 @@ void qdev_machine_creation_done(void)
     phase_advance(PHASE_MACHINE_READY);
     qdev_assert_realized_properly();
 
+    /*
+     * If the user used -machine dumpdtb=file.dtb to request that we
+     * dump the DTB to a file,  do it now, and exit.
+     */
+    handle_machine_dumpdtb(current_machine);
+
     /* TODO: once all bus devices are qdevified, this should be done
      * when bus is created by qdev.c */
     /*
diff --git a/hw/loongarch/virt-fdt-build.c b/hw/loongarch/virt-fdt-build.c
index dbc269afba..728ce46699 100644
--- a/hw/loongarch/virt-fdt-build.c
+++ b/hw/loongarch/virt-fdt-build.c
@@ -527,7 +527,6 @@ void virt_fdt_setup(LoongArchVirtMachineState *lvms)
      * Put the FDT into the memory map as a ROM image: this will ensure
      * the FDT is copied again upon reset, even if addr points into RAM.
      */
-    qemu_fdt_dumpdtb(machine->fdt, lvms->fdt_size);
     rom_add_blob_fixed_as("fdt", machine->fdt, lvms->fdt_size, FDT_BASE,
                           &address_space_memory);
     qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 79410dabe7..149a263bd5 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -395,7 +395,6 @@ static void *boston_fdt_filter(void *opaque, const void *fdt_orig,
                         1, ram_high_sz);
 
     fdt = g_realloc(fdt, fdt_totalsize(fdt));
-    qemu_fdt_dumpdtb(fdt, fdt_sz);
 
     s->fdt_base = *load_addr;
 
diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c
index 72e2756af0..0a5881be31 100644
--- a/hw/openrisc/boot.c
+++ b/hw/openrisc/boot.c
@@ -109,7 +109,6 @@ uint32_t openrisc_load_fdt(MachineState *ms, void *fdt,
     /* Should only fail if we've built a corrupted tree */
     g_assert(ret == 0);
     /* copy in the device tree */
-    qemu_fdt_dumpdtb(fdt, fdtsize);
 
     /* Save FDT for dumpdtb monitor command */
     ms->fdt = fdt;
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 26933e0457..fe8b9f7962 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -658,7 +658,6 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
 
 done:
     if (!dry_run) {
-        qemu_fdt_dumpdtb(fdt, fdt_size);
         cpu_physical_memory_write(addr, fdt, fdt_size);
 
         /* Set machine->fdt for 'dumpdtb' QMP/HMP command */
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index b057672e82..246d6d633b 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -417,7 +417,6 @@ static void pegasos2_machine_reset(MachineState *machine, ResetType type)
     d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
     qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
     g_free(pm->fdt_blob);
     pm->fdt_blob = fdt;
 
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 11fd477b71..87607508c7 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -744,7 +744,6 @@ static void pnv_reset(MachineState *machine, ResetType type)
         _FDT((fdt_pack(fdt)));
     }
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
 
     /* Update machine->fdt with latest fdt */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f3a4b4235d..c15340a58d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1760,7 +1760,6 @@ static void spapr_machine_reset(MachineState *machine, ResetType type)
                                   0, fdt_addr, 0);
         cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
     }
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
 
     g_free(spapr->fdt_blob);
     spapr->fdt_size = fdt_totalsize(fdt);
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index c309441b7d..765b9e2b1a 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -374,8 +374,6 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt)
     uint32_t fdtsize = fdt_totalsize(fdt);
 
     /* copy in the device tree */
-    qemu_fdt_dumpdtb(fdt, fdtsize);
-
     rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
                           &address_space_memory);
     qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
diff --git a/include/system/device_tree.h b/include/system/device_tree.h
index eb601522f8..49d8482ed4 100644
--- a/include/system/device_tree.h
+++ b/include/system/device_tree.h
@@ -133,8 +133,6 @@ int qemu_fdt_add_path(void *fdt, const char *path);
                          sizeof(qdt_tmp));                                    \
     } while (0)
 
-void qemu_fdt_dumpdtb(void *fdt, int size);
-
 /**
  * qemu_fdt_setprop_sized_cells_from_array:
  * @fdt: device tree blob
diff --git a/system/device_tree.c b/system/device_tree.c
index 4bc2d61b93..d605ed2a21 100644
--- a/system/device_tree.c
+++ b/system/device_tree.c
@@ -594,21 +594,6 @@ int qemu_fdt_add_path(void *fdt, const char *path)
     return retval;
 }
 
-void qemu_fdt_dumpdtb(void *fdt, int size)
-{
-    const char *dumpdtb = current_machine->dumpdtb;
-
-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
-            info_report("dtb dumped to %s. Exiting.", dumpdtb);
-            exit(0);
-        }
-        error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
-        exit(1);
-    }
-}
-
 int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
                                             const char *node_path,
                                             const char *property,

From bb09b7bfd37024381970744c71646e0239428897 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 6 Feb 2025 15:12:14 +0000
Subject: [PATCH 08/43] hw/core/machine.c: Make -machine dumpdtb=file.dtb with
 no DTB an error

Currently if the user requests via -machine dumpdtb=file.dtb that we
dump the DTB, but the machine doesn't have a DTB, we silently ignore
the option.  This is confusing to users, and is a legacy of the old
board-specific implementation of the option, where if the execution
codepath didn't go via a call to qemu_fdt_dumpdtb() we would never
handle the option.

Now we handle the option in one place in machine.c, we can provide
the user with a useful message if they asked us to dump a DTB when
none exists.  qmp_dumpdtb() already produces this error; remove the
logic in handle_machine_dumpdtb() that was there specifically to
avoid hitting it.

While we're here, beef up the error message a bit with a hint, and
make it consistent about "an FDT" rather than "a FDT".  (In the
qmp_dumpdtb() case this needs an ERRP_GUARD to make
error_append_hint() work when the caller passes error_fatal.)

Note that the three places where we might report "doesn't have an
FDT" are hit in different situations:

(1) in handle_machine_dumpdtb(), if CONFIG_FDT is not set: this is
because the QEMU binary was built without libfdt at all. The
build system will not let you build with a machine type that
needs an FDT but no libfdt, so here we know both that the machine
doesn't use FDT and that QEMU doesn't have the support:

(2) in the device_tree-stub.c qmp_dumpdtb(): this is used when
we had libfdt at build time but the target architecture didn't
enable any machines which did "select DEVICE_TREE", so here we
know that the machine doesn't use FDT.

(3) in qmp_dumpdtb(), if current_machine->fdt is NULL all we know
is that this machine never set it. That might be because it doesn't
use FDT, or it might be because the user didn't pass an FDT
on the command line and the machine doesn't autogenerate an FDT.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2733
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250206151214.2947842-7-peter.maydell@linaro.org
---
 hw/core/machine.c         | 6 ++----
 system/device_tree-stub.c | 5 ++++-
 system/device_tree.c      | 7 ++++++-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 61c22f723a..b68b8b94a3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1702,15 +1702,13 @@ static void handle_machine_dumpdtb(MachineState *ms)
     if (!ms->dumpdtb) {
         return;
     }
-    if (!ms->fdt) {
-        /* Silently ignore dumpdtb option if there is nothing to dump */
-        return;
-    }
 #ifdef CONFIG_FDT
     qmp_dumpdtb(ms->dumpdtb, &error_fatal);
     exit(0);
 #else
     error_report("This machine doesn't have an FDT");
+    error_printf("(this machine type definitely doesn't use FDT, and "
+                 "this QEMU doesn't have FDT support compiled in)\n");
     exit(1);
 #endif
 }
diff --git a/system/device_tree-stub.c b/system/device_tree-stub.c
index bddda6fa37..428330b0fe 100644
--- a/system/device_tree-stub.c
+++ b/system/device_tree-stub.c
@@ -5,6 +5,9 @@
 #ifdef CONFIG_FDT
 void qmp_dumpdtb(const char *filename, Error **errp)
 {
-    error_setg(errp, "This machine doesn't have a FDT");
+    ERRP_GUARD();
+
+    error_setg(errp, "This machine doesn't have an FDT");
+    error_append_hint(errp, "(this machine type definitely doesn't use FDT)\n");
 }
 #endif
diff --git a/system/device_tree.c b/system/device_tree.c
index d605ed2a21..aa3fe9516f 100644
--- a/system/device_tree.c
+++ b/system/device_tree.c
@@ -635,11 +635,16 @@ out:
 
 void qmp_dumpdtb(const char *filename, Error **errp)
 {
+    ERRP_GUARD();
+
     g_autoptr(GError) err = NULL;
     uint32_t size;
 
     if (!current_machine->fdt) {
-        error_setg(errp, "This machine doesn't have a FDT");
+        error_setg(errp, "This machine doesn't have an FDT");
+        error_append_hint(errp,
+                          "(Perhaps it doesn't support FDT at all, or perhaps "
+                          "you need to provide an FDT with the -fdt option?)\n");
         return;
     }
 

From 9b6e986e280f43e3df3baf7aae2069d599b5056c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:13 +0000
Subject: [PATCH 09/43] fpu: Make targets specify floatx80 default Inf at
 runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we hardcode at compile time whether the floatx80 default
Infinity value has the explicit integer bit set or not (x86 sets it;
m68k does not).  To be able to compile softfloat once for all targets
we'd like to move this setting to runtime.

Define a new FloatX80Behaviour enum which is a set of flags that
define the target's floatx80 handling.  Initially we define just one
flag, for whether the default Infinity has the Integer bit set or
not, but we will expand this in future commits to cover the other
floatx80 target specifics that we currently make compile-time
settings.

Define a new function floatx80_default_inf() which returns the
appropriate default Infinity value of the given sign, and use it in
the code that was previously directly using the compile-time constant
floatx80_infinity_{low,high} values when packing an infinity into a
floatx80.

Since floatx80 is highly unlikely to be supported in any new
architecture, and the existing code is generally written as "default
to like x87, with an ifdef for m68k", we make the default value for
the floatx80 behaviour flags be "what x87 does".  This means we only
need to change the m68k target to specify the behaviour flags.

(Other users of floatx80 are the Arm NWFPE emulation, which is
obsolete and probably not actually doing the right thing anyway, and
the PPC xsrqpxp insn.  Making the default be "like x87" avoids our
needing to review and test for behaviour changes there.)

We will clean up the remaining uses of the floatx80_infinity global
constant in subsequent commits.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-2-peter.maydell@linaro.org
Message-id: 20250217125055.160887-2-peter.maydell@linaro.org
---
 fpu/softfloat-specialize.c.inc  | 10 ++++++++++
 fpu/softfloat.c                 |  7 +++----
 include/fpu/softfloat-helpers.h | 12 ++++++++++++
 include/fpu/softfloat-types.h   | 13 +++++++++++++
 include/fpu/softfloat.h         |  1 +
 target/m68k/cpu.c               |  6 ++++++
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index cbbbab52ba..73789e97d7 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -227,6 +227,16 @@ floatx80 floatx80_default_nan(float_status *status)
 | The pattern for a default generated extended double-precision inf.
 *----------------------------------------------------------------------------*/
 
+floatx80 floatx80_default_inf(bool zSign, float_status *status)
+{
+    /*
+     * Whether the Integer bit is set in the default Infinity is
+     * target dependent.
+     */
+    bool z = status->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero;
+    return packFloatx80(zSign, 0x7fff, z ? 0 : (1ULL << 63));
+}
+
 #define floatx80_infinity_high 0x7FFF
 #if defined(TARGET_M68K)
 #define floatx80_infinity_low  UINT64_C(0x0000000000000000)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f4fed9bfda..b12ad2b42a 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1860,7 +1860,8 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
 
     case float_class_inf:
         /* x86 and m68k differ in the setting of the integer bit. */
-        frac = floatx80_infinity_low;
+        frac = s->floatx80_behaviour & floatx80_default_inf_int_bit_is_zero ?
+            0 : (1ULL << 63);
         exp = fmt->exp_max;
         break;
 
@@ -5144,9 +5145,7 @@ floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
                ) {
                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
             }
-            return packFloatx80(zSign,
-                                floatx80_infinity_high,
-                                floatx80_infinity_low);
+            return floatx80_default_inf(zSign, status);
         }
         if ( zExp <= 0 ) {
             isTiny = status->tininess_before_rounding
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
index 8983c2748e..90862f5cd2 100644
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -75,6 +75,12 @@ static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val,
     status->floatx80_rounding_precision = val;
 }
 
+static inline void set_floatx80_behaviour(FloatX80Behaviour b,
+                                          float_status *status)
+{
+    status->floatx80_behaviour = b;
+}
+
 static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule,
                                             float_status *status)
 {
@@ -151,6 +157,12 @@ get_floatx80_rounding_precision(const float_status *status)
     return status->floatx80_rounding_precision;
 }
 
+static inline FloatX80Behaviour
+get_floatx80_behaviour(const float_status *status)
+{
+    return status->floatx80_behaviour;
+}
+
 static inline Float2NaNPropRule
 get_float_2nan_prop_rule(const float_status *status)
 {
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 53d5eb8521..dd22ecdbe6 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -320,6 +320,18 @@ typedef enum __attribute__((__packed__)) {
     float_ftz_before_rounding = 1,
 } FloatFTZDetection;
 
+/*
+ * floatx80 is primarily used by x86 and m68k, and there are
+ * differences in the handling, largely related to the explicit
+ * Integer bit which floatx80 has and the other float formats do not.
+ * These flag values allow specification of the target's requirements
+ * and can be ORed together to set floatx80_behaviour.
+ */
+typedef enum __attribute__((__packed__)) {
+    /* In the default Infinity value, is the Integer bit 0 ? */
+    floatx80_default_inf_int_bit_is_zero = 1,
+} FloatX80Behaviour;
+
 /*
  * Floating Point Status. Individual architectures may maintain
  * several versions of float_status for different functions. The
@@ -331,6 +343,7 @@ typedef struct float_status {
     uint16_t float_exception_flags;
     FloatRoundMode float_rounding_mode;
     FloatX80RoundPrec floatx80_rounding_precision;
+    FloatX80Behaviour floatx80_behaviour;
     Float2NaNPropRule float_2nan_prop_rule;
     Float3NaNPropRule float_3nan_prop_rule;
     FloatInfZeroNaNRule float_infzeronan_rule;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 09a40b4310..afae390602 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -961,6 +961,7 @@ float128 floatx80_to_float128(floatx80, float_status *status);
 | The pattern for an extended double-precision inf.
 *----------------------------------------------------------------------------*/
 extern const floatx80 floatx80_infinity;
+floatx80 floatx80_default_inf(bool zSign, float_status *status);
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision operations.
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 41dfdf5804..df66e8ba22 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -107,6 +107,12 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
     set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
     /* Default NaN: sign bit clear, all frac bits set */
     set_float_default_nan_pattern(0b01111111, &env->fp_status);
+    /*
+     * m68k-specific floatx80 behaviour:
+     *  * default Infinity values have a zero Integer bit
+     */
+    set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero,
+                           &env->fp_status);
 
     nan = floatx80_default_nan(&env->fp_status);
     for (i = 0; i < 8; i++) {

From e456d4465b630ab2eed08611c6193f0a880e0ea3 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:14 +0000
Subject: [PATCH 10/43] target/m68k: Avoid using floatx80_infinity global const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The global const floatx80_infinity is (unlike all the other
float*_infinity values) target-specific, because whether the explicit
Integer bit is set or not varies between m68k and i386.  We want to
be able to compile softfloat once for multiple targets, so we can't
continue to use a single global whose value needs to be different
between targets.

Replace the direct uses of floatx80_infinity in target/m68k with
calls to the new floatx80_default_inf() function.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-3-peter.maydell@linaro.org
Message-id: 20250217125055.160887-3-peter.maydell@linaro.org
---
 target/m68k/softfloat.c | 47 ++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/target/m68k/softfloat.c b/target/m68k/softfloat.c
index 02dcc03d15..d1f150e641 100644
--- a/target/m68k/softfloat.c
+++ b/target/m68k/softfloat.c
@@ -142,8 +142,7 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaN(a, b, status);
         }
-        return packFloatx80(aSign, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(aSign, status);
     }
     if (aExp == 0) {
         if (aSig == 0) {
@@ -245,7 +244,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
             float_raise(float_flag_invalid, status);
             return floatx80_default_nan(status);
         }
-        return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -255,8 +254,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
     if (aSign && aExp >= one_exp) {
         if (aExp == one_exp && aSig == one_sig) {
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(aSign, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(aSign, status);
         }
         float_raise(float_flag_invalid, status);
         return floatx80_default_nan(status);
@@ -442,8 +440,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
@@ -452,8 +449,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
     if (aExp == 0) {
         if (aSig == 0) { /* zero */
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(1, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(1, status);
         }
         if ((aSig & one_sig) == 0) { /* denormal */
             normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
@@ -610,15 +606,13 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
     if (aExp == 0 && aSig == 0) {
         float_raise(float_flag_divbyzero, status);
-        return packFloatx80(1, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(1, status);
     }
 
     if (aSign) {
@@ -668,16 +662,14 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
             propagateFloatx80NaNOneArg(a, status);
         }
         if (aSign == 0) {
-            return packFloatx80(0, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(0, status);
         }
     }
 
     if (aExp == 0) {
         if (aSig == 0) {
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(1, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(1, status);
         }
         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
     }
@@ -740,8 +732,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -924,8 +915,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -1075,8 +1065,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(0, 0, 0);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2260,8 +2249,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status)
     if (compact >= 0x3FFF8000) { /* |X| >= 1 */
         if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
             float_raise(float_flag_divbyzero, status);
-            return packFloatx80(aSign, floatx80_infinity.high,
-                                floatx80_infinity.low);
+            return floatx80_default_inf(aSign, status);
         } else { /* |X| > 1 */
             float_raise(float_flag_invalid, status);
             return floatx80_default_nan(status);
@@ -2320,8 +2308,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
         if (aSign) {
             return packFloatx80(aSign, one_exp, one_sig);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2687,8 +2674,7 @@ floatx80 floatx80_sinh(floatx80 a, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaNOneArg(a, status);
         }
-        return packFloatx80(aSign, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(aSign, status);
     }
 
     if (aExp == 0 && aSig == 0) {
@@ -2774,8 +2760,7 @@ floatx80 floatx80_cosh(floatx80 a, float_status *status)
         if ((uint64_t) (aSig << 1)) {
             return propagateFloatx80NaNOneArg(a, status);
         }
-        return packFloatx80(0, floatx80_infinity.high,
-                            floatx80_infinity.low);
+        return floatx80_default_inf(0, status);
     }
 
     if (aExp == 0 && aSig == 0) {

From 165ce008d734bc0024dabdbfd1c41738bc5b834f Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:15 +0000
Subject: [PATCH 11/43] target/i386: Avoid using floatx80_infinity global const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The global const floatx80_infinity is (unlike all the other
float*_infinity values) target-specific, because whether the explicit
Integer bit is set or not varies between m68k and i386.  We want to
be able to compile softfloat once for multiple targets, so we can't
continue to use a single global whose value needs to be different
between targets.

Replace the direct uses of floatx80_infinity in target/i386 with
calls to the new floatx80_default_inf() function. Note that because
we can ask the function for either a negative or positive infinity,
we don't need to change the sign of a positive infinity via
floatx80_chs() for the negative-Inf case.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-4-peter.maydell@linaro.org
Message-id: 20250217125055.160887-4-peter.maydell@linaro.org
---
 target/i386/tcg/fpu_helper.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index f112c6c673..741af09f90 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -1832,7 +1832,7 @@ void helper_fxtract(CPUX86State *env)
     } else if (floatx80_is_infinity(ST0)) {
         fpush(env);
         ST0 = ST1;
-        ST1 = floatx80_infinity;
+        ST1 = floatx80_default_inf(0, &env->fp_status);
     } else {
         int expdif;
 
@@ -2358,9 +2358,8 @@ void helper_fscale(CPUX86State *env)
                 float_raise(float_flag_invalid, &env->fp_status);
                 ST0 = floatx80_default_nan(&env->fp_status);
             } else {
-                ST0 = (floatx80_is_neg(ST0) ?
-                       floatx80_chs(floatx80_infinity) :
-                       floatx80_infinity);
+                ST0 = floatx80_default_inf(floatx80_is_neg(ST0),
+                                           &env->fp_status);
             }
         }
     } else {

From 9ea6d1f141426a7da91f1c7ba3d693472f0550a4 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:16 +0000
Subject: [PATCH 12/43] fpu: Pass float_status to floatx80_is_infinity()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike the other float formats, whether a floatx80 value is
considered to be an Infinity is target-dependent.  (On x86 if the
explicit integer bit is clear this is a "pseudo-infinity" and not a
valid infinity; m68k does not care about the value of the integer
bit.)

Currently we select this target-specific logic at compile time with
an ifdef.  We're going to want to do this at runtime, so change the
floatx80_is_infinity() function to take a float_status.

This commit doesn't change any logic; we'll do that in the
next commit.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-5-peter.maydell@linaro.org
---
 include/fpu/softfloat.h      |  2 +-
 target/i386/tcg/fpu_helper.c | 20 +++++++++++---------
 target/m68k/fpu_helper.c     |  2 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index afae390602..3c83d703ba 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -996,7 +996,7 @@ static inline floatx80 floatx80_chs(floatx80 a)
     return a;
 }
 
-static inline bool floatx80_is_infinity(floatx80 a)
+static inline bool floatx80_is_infinity(floatx80 a, float_status *status)
 {
 #if defined(TARGET_M68K)
     return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1);
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 741af09f90..3b79bc049d 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -1393,7 +1393,8 @@ void helper_fpatan(CPUX86State *env)
         /* Pass this NaN through.  */
     } else if (floatx80_is_zero(ST1) && !arg0_sign) {
         /* Pass this zero through.  */
-    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
+    } else if (((floatx80_is_infinity(ST0, &env->fp_status) &&
+                 !floatx80_is_infinity(ST1, &env->fp_status)) ||
                  arg0_exp - arg1_exp >= 80) &&
                !arg0_sign) {
         /*
@@ -1442,8 +1443,8 @@ void helper_fpatan(CPUX86State *env)
             rexp = pi_exp;
             rsig0 = pi_sig_high;
             rsig1 = pi_sig_low;
-        } else if (floatx80_is_infinity(ST1)) {
-            if (floatx80_is_infinity(ST0)) {
+        } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
+            if (floatx80_is_infinity(ST0, &env->fp_status)) {
                 if (arg0_sign) {
                     rexp = pi_34_exp;
                     rsig0 = pi_34_sig_high;
@@ -1462,7 +1463,8 @@ void helper_fpatan(CPUX86State *env)
             rexp = pi_2_exp;
             rsig0 = pi_2_sig_high;
             rsig1 = pi_2_sig_low;
-        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
+        } else if (floatx80_is_infinity(ST0, &env->fp_status) ||
+                   arg0_exp - arg1_exp >= 80) {
             /* ST0 is negative.  */
             rexp = pi_exp;
             rsig0 = pi_sig_high;
@@ -1829,7 +1831,7 @@ void helper_fxtract(CPUX86State *env)
         }
         fpush(env);
         ST0 = ST1;
-    } else if (floatx80_is_infinity(ST0)) {
+    } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
         fpush(env);
         ST0 = ST1;
         ST1 = floatx80_default_inf(0, &env->fp_status);
@@ -2173,7 +2175,7 @@ void helper_fyl2x(CPUX86State *env)
     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
-    } else if (floatx80_is_infinity(ST1)) {
+    } else if (floatx80_is_infinity(ST1, &env->fp_status)) {
         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
                                              &env->fp_status);
         switch (cmp) {
@@ -2188,7 +2190,7 @@ void helper_fyl2x(CPUX86State *env)
             ST1 = floatx80_default_nan(&env->fp_status);
             break;
         }
-    } else if (floatx80_is_infinity(ST0)) {
+    } else if (floatx80_is_infinity(ST0, &env->fp_status)) {
         if (floatx80_is_zero(ST1)) {
             float_raise(float_flag_invalid, &env->fp_status);
             ST1 = floatx80_default_nan(&env->fp_status);
@@ -2341,11 +2343,11 @@ void helper_fscale(CPUX86State *env)
             float_raise(float_flag_invalid, &env->fp_status);
             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
         }
-    } else if (floatx80_is_infinity(ST1) &&
+    } else if (floatx80_is_infinity(ST1, &env->fp_status) &&
                !floatx80_invalid_encoding(ST0) &&
                !floatx80_is_any_nan(ST0)) {
         if (floatx80_is_neg(ST1)) {
-            if (floatx80_is_infinity(ST0)) {
+            if (floatx80_is_infinity(ST0, &env->fp_status)) {
                 float_raise(float_flag_invalid, &env->fp_status);
                 ST0 = floatx80_default_nan(&env->fp_status);
             } else {
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
index 339b73ad7d..eb1cb8c687 100644
--- a/target/m68k/fpu_helper.c
+++ b/target/m68k/fpu_helper.c
@@ -455,7 +455,7 @@ void HELPER(ftst)(CPUM68KState *env, FPReg *val)
 
     if (floatx80_is_any_nan(val->d)) {
         cc |= FPSR_CC_A;
-    } else if (floatx80_is_infinity(val->d)) {
+    } else if (floatx80_is_infinity(val->d, &env->fp_status)) {
         cc |= FPSR_CC_I;
     } else if (floatx80_is_zero(val->d)) {
         cc |= FPSR_CC_Z;

From 44eb32a9835fe2feb19503e93476eee602daee0b Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:17 +0000
Subject: [PATCH 13/43] fpu: Make targets specify whether floatx80 Inf can have
 Int bit clear
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Intel terminology, a floatx80 Infinity with the explicit integer
bit clear is a "pseudo-infinity"; for x86 these are not valid
infinity values.  m68k is looser and does not care whether the
Integer bit is set or clear in an infinity.

Move this setting to runtime rather than using an ifdef in
floatx80_is_infinity().

Since this was the last use of the floatx80_infinity global constant,
we remove it and its definition here.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-6-peter.maydell@linaro.org
Message-id: 20250217125055.160887-5-peter.maydell@linaro.org
---
 fpu/softfloat-specialize.c.inc | 10 ----------
 include/fpu/softfloat-types.h  |  5 +++++
 include/fpu/softfloat.h        | 18 +++++++++++-------
 target/m68k/cpu.c              |  4 +++-
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 73789e97d7..8327f72786 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -237,16 +237,6 @@ floatx80 floatx80_default_inf(bool zSign, float_status *status)
     return packFloatx80(zSign, 0x7fff, z ? 0 : (1ULL << 63));
 }
 
-#define floatx80_infinity_high 0x7FFF
-#if defined(TARGET_M68K)
-#define floatx80_infinity_low  UINT64_C(0x0000000000000000)
-#else
-#define floatx80_infinity_low  UINT64_C(0x8000000000000000)
-#endif
-
-const floatx80 floatx80_infinity
-    = make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index dd22ecdbe6..e1732beba4 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -330,6 +330,11 @@ typedef enum __attribute__((__packed__)) {
 typedef enum __attribute__((__packed__)) {
     /* In the default Infinity value, is the Integer bit 0 ? */
     floatx80_default_inf_int_bit_is_zero = 1,
+    /*
+     * Are Pseudo-infinities (Inf with the Integer bit zero) valid?
+     * If so, floatx80_is_infinity() will return true for them.
+     */
+    floatx80_pseudo_inf_valid = 2,
 } FloatX80Behaviour;
 
 /*
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 3c83d703ba..07259c5930 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -960,7 +960,6 @@ float128 floatx80_to_float128(floatx80, float_status *status);
 /*----------------------------------------------------------------------------
 | The pattern for an extended double-precision inf.
 *----------------------------------------------------------------------------*/
-extern const floatx80 floatx80_infinity;
 floatx80 floatx80_default_inf(bool zSign, float_status *status);
 
 /*----------------------------------------------------------------------------
@@ -998,12 +997,17 @@ static inline floatx80 floatx80_chs(floatx80 a)
 
 static inline bool floatx80_is_infinity(floatx80 a, float_status *status)
 {
-#if defined(TARGET_M68K)
-    return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1);
-#else
-    return (a.high & 0x7fff) == floatx80_infinity.high &&
-                       a.low == floatx80_infinity.low;
-#endif
+    /*
+     * It's target-specific whether the Integer bit is permitted
+     * to be 0 in a valid Infinity value. (x86 says no, m68k says yes).
+     */
+    bool intbit = a.low >> 63;
+
+    if (!intbit &&
+        !(status->floatx80_behaviour & floatx80_pseudo_inf_valid)) {
+        return false;
+    }
+    return (a.high & 0x7fff) == 0x7fff && !(a.low << 1);
 }
 
 static inline bool floatx80_is_neg(floatx80 a)
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index df66e8ba22..56b23de21f 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -110,8 +110,10 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
     /*
      * m68k-specific floatx80 behaviour:
      *  * default Infinity values have a zero Integer bit
+     *  * input Infinities may have the Integer bit either 0 or 1
      */
-    set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero,
+    set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero |
+                           floatx80_pseudo_inf_valid,
                            &env->fp_status);
 
     nan = floatx80_default_nan(&env->fp_status);

From 765fe845ccb953b77b1b7e0557b13a7b760067b0 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:18 +0000
Subject: [PATCH 14/43] fpu: Pass float_status to floatx80_invalid_encoding()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The definition of which floatx80 encodings are invalid is
target-specific.  Currently we handle this with an ifdef, but we
would like to defer this decision to runtime.  In preparation, pass a
float_status argument to floatx80_invalid_encoding().

We will change the implementation from ifdef to looking at
the status argument in the following commit.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-7-peter.maydell@linaro.org
---
 fpu/softfloat.c              |  2 +-
 include/fpu/softfloat.h      |  2 +-
 target/i386/tcg/fpu_helper.c | 24 +++++++++++++-----------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b12ad2b42a..2a20ae871e 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1810,7 +1810,7 @@ static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
         g_assert_not_reached();
     }
 
-    if (unlikely(floatx80_invalid_encoding(f))) {
+    if (unlikely(floatx80_invalid_encoding(f, s))) {
         float_raise(float_flag_invalid, s);
         return false;
     }
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 07259c5930..1c8f3cbb78 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -1081,7 +1081,7 @@ static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b,
 | pseudo-denormals, which must still be correctly handled as inputs even
 | if they are never generated as outputs.
 *----------------------------------------------------------------------------*/
-static inline bool floatx80_invalid_encoding(floatx80 a)
+static inline bool floatx80_invalid_encoding(floatx80 a, float_status *s)
 {
 #if defined(TARGET_M68K)
     /*-------------------------------------------------------------------------
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 3b79bc049d..4858ae9a5f 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -1141,7 +1141,7 @@ void helper_f2xm1(CPUX86State *env)
     int32_t exp = extractFloatx80Exp(ST0);
     bool sign = extractFloatx80Sign(ST0);
 
-    if (floatx80_invalid_encoding(ST0)) {
+    if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -1383,8 +1383,8 @@ void helper_fpatan(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -1819,7 +1819,7 @@ void helper_fxtract(CPUX86State *env)
                            &env->fp_status);
         fpush(env);
         ST0 = temp.d;
-    } else if (floatx80_invalid_encoding(ST0)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
         fpush(env);
@@ -1870,7 +1870,8 @@ static void helper_fprem_common(CPUX86State *env, bool mod)
     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
         exp0 == 0x7fff || exp1 == 0x7fff ||
-        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
+        floatx80_invalid_encoding(ST0, &env->fp_status) ||
+        floatx80_invalid_encoding(ST1, &env->fp_status)) {
         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
     } else {
         if (exp0 == 0) {
@@ -2066,8 +2067,8 @@ void helper_fyl2xp1(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -2164,8 +2165,8 @@ void helper_fyl2x(CPUX86State *env)
     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
-    } else if (floatx80_invalid_encoding(ST0) ||
-               floatx80_invalid_encoding(ST1)) {
+    } else if (floatx80_invalid_encoding(ST0, &env->fp_status) ||
+               floatx80_invalid_encoding(ST1, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST1 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST0)) {
@@ -2331,7 +2332,8 @@ void helper_frndint(CPUX86State *env)
 void helper_fscale(CPUX86State *env)
 {
     uint8_t old_flags = save_exception_flags(env);
-    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
+    if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
+        floatx80_invalid_encoding(ST0, &env->fp_status)) {
         float_raise(float_flag_invalid, &env->fp_status);
         ST0 = floatx80_default_nan(&env->fp_status);
     } else if (floatx80_is_any_nan(ST1)) {
@@ -2344,7 +2346,7 @@ void helper_fscale(CPUX86State *env)
             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
         }
     } else if (floatx80_is_infinity(ST1, &env->fp_status) &&
-               !floatx80_invalid_encoding(ST0) &&
+               !floatx80_invalid_encoding(ST0, &env->fp_status) &&
                !floatx80_is_any_nan(ST0)) {
         if (floatx80_is_neg(ST1)) {
             if (floatx80_is_infinity(ST0, &env->fp_status)) {

From a261d3e331ca06f4d92e689f2bee40d0a0cdee08 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:19 +0000
Subject: [PATCH 15/43] fpu: Make floatx80 invalid encoding settable at runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because floatx80 has an explicit integer bit, this permits some
odd encodings where the integer bit is not set correctly for the
floating point value type. In In Intel terminology the
 categories are:
  exp == 0, int = 0, mantissa == 0 : zeroes
  exp == 0, int = 0, mantissa != 0 : denormals
  exp == 0, int = 1 : pseudo-denormals
  0 < exp < 0x7fff, int = 0 : unnormals
  0 < exp < 0x7fff, int = 1 : normals
  exp == 0x7fff, int = 0, mantissa == 0 : pseudo-infinities
  exp == 0x7fff, int = 1, mantissa == 0 : infinities
  exp == 0x7fff, int = 0, mantissa != 0 : pseudo-NaNs
  exp == 0x7fff, int = 1, mantissa == 0 : NaNs

The usual IEEE cases of zero, denormal, normal, inf and NaN are always valid.
x87 permits as input also pseudo-denormals.
m68k permits all those and also pseudo-infinities, pseudo-NaNs and unnormals.

Currently we have an ifdef in floatx80_invalid_encoding() to select
the x86 vs m68k behaviour.  Add new floatx80_behaviour flags to
select whether pseudo-NaN and unnormal are valid, and use these
(plus the existing pseudo_inf_valid flag) to decide whether these
encodings are invalid at runtime.

We leave pseudo-denormals as always-valid, since both x86 and m68k
accept them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-8-peter.maydell@linaro.org
Message-id: 20250217125055.160887-6-peter.maydell@linaro.org
---
 include/fpu/softfloat-types.h | 14 ++++++++
 include/fpu/softfloat.h       | 68 ++++++++++++++++++-----------------
 target/m68k/cpu.c             | 28 ++++++++++++++-
 3 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index e1732beba4..b1941384ae 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -333,8 +333,22 @@ typedef enum __attribute__((__packed__)) {
     /*
      * Are Pseudo-infinities (Inf with the Integer bit zero) valid?
      * If so, floatx80_is_infinity() will return true for them.
+     * If not, floatx80_invalid_encoding will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
      */
     floatx80_pseudo_inf_valid = 2,
+    /*
+     * Are Pseudo-NaNs (NaNs where the Integer bit is zero) valid?
+     * If not, floatx80_invalid_encoding() will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
+     */
+    floatx80_pseudo_nan_valid = 4,
+    /*
+     * Are Unnormals (0 < exp < 0x7fff, Integer bit zero) valid?
+     * If not, floatx80_invalid_encoding() will return false for them,
+     * and using them as inputs to a float op will raise Invalid.
+     */
+    floatx80_unnormal_valid = 8,
 } FloatX80Behaviour;
 
 /*
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 1c8f3cbb78..c18ab2cb60 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -1073,41 +1073,45 @@ static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b,
 
 /*----------------------------------------------------------------------------
 | Return whether the given value is an invalid floatx80 encoding.
-| Invalid floatx80 encodings arise when the integer bit is not set, but
-| the exponent is not zero. The only times the integer bit is permitted to
-| be zero is in subnormal numbers and the value zero.
-| This includes what the Intel software developer's manual calls pseudo-NaNs,
-| pseudo-infinities and un-normal numbers. It does not include
-| pseudo-denormals, which must still be correctly handled as inputs even
-| if they are never generated as outputs.
+| Invalid floatx80 encodings may arise when the integer bit is not set
+| correctly; this is target-specific. In Intel terminology the
+| categories are:
+|  exp == 0, int = 0, mantissa == 0 : zeroes
+|  exp == 0, int = 0, mantissa != 0 : denormals
+|  exp == 0, int = 1 : pseudo-denormals
+|  0 < exp < 0x7fff, int = 0 : unnormals
+|  0 < exp < 0x7fff, int = 1 : normals
+|  exp == 0x7fff, int = 0, mantissa == 0 : pseudo-infinities
+|  exp == 0x7fff, int = 1, mantissa == 0 : infinities
+|  exp == 0x7fff, int = 0, mantissa != 0 : pseudo-NaNs
+|  exp == 0x7fff, int = 1, mantissa == 0 : NaNs
+|
+| The usual IEEE cases of zero, denormal, normal, inf and NaN are always valid.
+| x87 permits as input also pseudo-denormals.
+| m68k permits all those and also pseudo-infinities, pseudo-NaNs and unnormals.
+|
+| Since we don't have a target that handles floatx80 but prohibits
+| pseudo-denormals in input, we don't currently have a floatx80_behaviour
+| flag for that case, but instead always accept it. Conveniently this
+| means that all cases with either exponent 0 or the integer bit set are
+| valid for all targets.
 *----------------------------------------------------------------------------*/
 static inline bool floatx80_invalid_encoding(floatx80 a, float_status *s)
 {
-#if defined(TARGET_M68K)
-    /*-------------------------------------------------------------------------
-    | With m68k, the explicit integer bit can be zero in the case of:
-    | - zeros                (exp == 0, mantissa == 0)
-    | - denormalized numbers (exp == 0, mantissa != 0)
-    | - unnormalized numbers (exp != 0, exp < 0x7FFF)
-    | - infinities           (exp == 0x7FFF, mantissa == 0)
-    | - not-a-numbers        (exp == 0x7FFF, mantissa != 0)
-    |
-    | For infinities and NaNs, the explicit integer bit can be either one or
-    | zero.
-    |
-    | The IEEE 754 standard does not define a zero integer bit. Such a number
-    | is an unnormalized number. Hardware does not directly support
-    | denormalized and unnormalized numbers, but implicitly supports them by
-    | trapping them as unimplemented data types, allowing efficient conversion
-    | in software.
-    |
-    | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL",
-    |     "1.6 FLOATING-POINT DATA TYPES"
-    *------------------------------------------------------------------------*/
-    return false;
-#else
-    return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0;
-#endif
+    if ((a.low >> 63) || (a.high & 0x7fff) == 0) {
+        /* Anything with the Integer bit set or the exponent 0 is valid */
+        return false;
+    }
+
+    if ((a.high & 0x7fff) == 0x7fff) {
+        if (a.low) {
+            return !(s->floatx80_behaviour & floatx80_pseudo_nan_valid);
+        } else {
+            return !(s->floatx80_behaviour & floatx80_pseudo_inf_valid);
+        }
+    } else {
+        return !(s->floatx80_behaviour & floatx80_unnormal_valid);
+    }
 }
 
 #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 56b23de21f..505fa97a53 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -111,9 +111,35 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
      * m68k-specific floatx80 behaviour:
      *  * default Infinity values have a zero Integer bit
      *  * input Infinities may have the Integer bit either 0 or 1
+     *  * pseudo-denormals supported for input and output
+     *  * don't raise Invalid for pseudo-NaN/pseudo-Inf/Unnormal
+     *
+     * With m68k, the explicit integer bit can be zero in the case of:
+     * - zeros                (exp == 0, mantissa == 0)
+     * - denormalized numbers (exp == 0, mantissa != 0)
+     * - unnormalized numbers (exp != 0, exp < 0x7FFF)
+     * - infinities           (exp == 0x7FFF, mantissa == 0)
+     * - not-a-numbers        (exp == 0x7FFF, mantissa != 0)
+     *
+     * For infinities and NaNs, the explicit integer bit can be either one or
+     * zero.
+     *
+     * The IEEE 754 standard does not define a zero integer bit. Such a number
+     * is an unnormalized number. Hardware does not directly support
+     * denormalized and unnormalized numbers, but implicitly supports them by
+     * trapping them as unimplemented data types, allowing efficient conversion
+     * in software.
+     *
+     * See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL",
+     *     "1.6 FLOATING-POINT DATA TYPES"
+     *
+     * Note though that QEMU's fp emulation does directly handle both
+     * denormal and unnormal values, and does not trap to guest software.
      */
     set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero |
-                           floatx80_pseudo_inf_valid,
+                           floatx80_pseudo_inf_valid |
+                           floatx80_pseudo_nan_valid |
+                           floatx80_unnormal_valid,
                            &env->fp_status);
 
     nan = floatx80_default_nan(&env->fp_status);

From 1e75d8247ff27a307781230898fbcb8fbb9a8298 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:20 +0000
Subject: [PATCH 16/43] fpu: Move m68k_denormal fmt flag into
 floatx80_behaviour
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we compile-time set an 'm68k_denormal' flag in the FloatFmt
for floatx80 for m68k.  This controls our handling of what the Intel
documentation calls a "pseudo-denormal": a value where the exponent
field is zero and the explicit integer bit is set.

For x86, the x87 FPU is supposed to accept a pseudo-denormal as
input, but never generate one on output.  For m68k, these values are
permitted on input and may be produced on output.

Replace the flag in the FloatFmt with a flag indicating whether the
float format has an explicit bit (which will be true for floatx80 for
all targets, and false for every other float type).  Then we can gate
the handling of these pseudo-denormals on the setting of a
floatx80_behaviour flag.

As far as I can see from the code we don't actually handle the
x86-mandated "accept on input but don't generate" behaviour, because
the handling in partsN(canonicalize) looked at fmt->m68k_denormal.
So I have added TODO comments to that effect.

This commit doesn't change any behaviour for any target.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20250224111524.1101196-9-peter.maydell@linaro.org
Message-id: 20250217125055.160887-7-peter.maydell@linaro.org
---
 fpu/softfloat-parts.c.inc     | 27 ++++++++++++++++++++++++---
 fpu/softfloat.c               |  9 ++++-----
 include/fpu/softfloat-types.h | 19 +++++++++++++++++++
 target/m68k/cpu.c             |  3 ++-
 4 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 1d09f066c5..171bfd06e3 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -195,6 +195,25 @@ static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
                                  const FloatFmt *fmt)
 {
+    /*
+     * It's target-dependent how to handle the case of exponent 0
+     * and Integer bit set. Intel calls these "pseudodenormals",
+     * and treats them as if the integer bit was 0, and never
+     * produces them on output. This is the default behaviour for QEMU.
+     * For m68k, the integer bit is considered validly part of the
+     * input value when the exponent is 0, and may be 0 or 1,
+     * giving extra range. They may also be generated as outputs.
+     * (The m68k manual actually calls these values part of the
+     * normalized number range, not the denormalized number range,
+     * but that distinction is not important for us, because
+     * m68k doesn't care about the input_denormal_used status flag.)
+     * floatx80_pseudo_denormal_valid selects the m68k behaviour,
+     * which changes both how we canonicalize such a value and
+     * how we uncanonicalize results.
+     */
+    bool has_pseudo_denormals = fmt->has_explicit_bit &&
+        (status->floatx80_behaviour & floatx80_pseudo_denormal_valid);
+
     if (unlikely(p->exp == 0)) {
         if (likely(frac_eqz(p))) {
             p->cls = float_class_zero;
@@ -206,7 +225,7 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
             int shift = frac_normalize(p);
             p->cls = float_class_denormal;
             p->exp = fmt->frac_shift - fmt->exp_bias
-                   - shift + !fmt->m68k_denormal;
+                   - shift + !has_pseudo_denormals;
         }
     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
         p->cls = float_class_normal;
@@ -342,13 +361,15 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
         frac_clear(p);
     } else {
         bool is_tiny = s->tininess_before_rounding || exp < 0;
+        bool has_pseudo_denormals = fmt->has_explicit_bit &&
+            (s->floatx80_behaviour & floatx80_pseudo_denormal_valid);
 
         if (!is_tiny) {
             FloatPartsN discard;
             is_tiny = !frac_addi(&discard, p, inc);
         }
 
-        frac_shrjam(p, !fmt->m68k_denormal - exp);
+        frac_shrjam(p, !has_pseudo_denormals - exp);
 
         if (p->frac_lo & round_mask) {
             /* Need to recompute round-to-even/round-to-odd. */
@@ -379,7 +400,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             p->frac_lo &= ~round_mask;
         }
 
-        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !has_pseudo_denormals;
         frac_shr(p, frac_shift);
 
         if (is_tiny) {
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 2a20ae871e..b299cfaf86 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -537,7 +537,8 @@ typedef struct {
  *   round_mask: bits below lsb which must be rounded
  * The following optional modifiers are available:
  *   arm_althp: handle ARM Alternative Half Precision
- *   m68k_denormal: explicit integer bit for extended precision may be 1
+ *   has_explicit_bit: has an explicit integer bit; this affects whether
+ *   the float_status floatx80_behaviour handling applies
  */
 typedef struct {
     int exp_size;
@@ -547,7 +548,7 @@ typedef struct {
     int frac_size;
     int frac_shift;
     bool arm_althp;
-    bool m68k_denormal;
+    bool has_explicit_bit;
     uint64_t round_mask;
 } FloatFmt;
 
@@ -600,9 +601,7 @@ static const FloatFmt floatx80_params[3] = {
     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
     [floatx80_precision_x] = {
         FLOATX80_PARAMS(64),
-#ifdef TARGET_M68K
-        .m68k_denormal = true,
-#endif
+        .has_explicit_bit = true,
     },
 };
 
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index b1941384ae..1af2a0cb14 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -349,6 +349,25 @@ typedef enum __attribute__((__packed__)) {
      * and using them as inputs to a float op will raise Invalid.
      */
     floatx80_unnormal_valid = 8,
+
+    /*
+     * If the exponent is 0 and the Integer bit is set, Intel call
+     * this a "pseudo-denormal"; x86 supports that only on input
+     * (treating them as denormals by ignoring the Integer bit).
+     * For m68k, the integer bit is considered validly part of the
+     * input value when the exponent is 0, and may be 0 or 1,
+     * giving extra range. They may also be generated as outputs.
+     * (The m68k manual actually calls these values part of the
+     * normalized number range, not the denormalized number range.)
+     *
+     * By default you get the Intel behaviour where the Integer
+     * bit is ignored; if this is set then the Integer bit value
+     * is honoured, m68k-style.
+     *
+     * Either way, floatx80_invalid_encoding() will always accept
+     * pseudo-denormals.
+     */
+    floatx80_pseudo_denormal_valid = 16,
 } FloatX80Behaviour;
 
 /*
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 505fa97a53..2617d8f6ed 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -139,7 +139,8 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
     set_floatx80_behaviour(floatx80_default_inf_int_bit_is_zero |
                            floatx80_pseudo_inf_valid |
                            floatx80_pseudo_nan_valid |
-                           floatx80_unnormal_valid,
+                           floatx80_unnormal_valid |
+                           floatx80_pseudo_denormal_valid,
                            &env->fp_status);
 
     nan = floatx80_default_nan(&env->fp_status);

From 2e01cfea0735889a1e0481fc783d621779439572 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:21 +0000
Subject: [PATCH 17/43] fpu: Always decide no_signaling_nans() at runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we have a compile-time shortcut where we
return false from no_signaling_nans() on everything except
Xtensa, because we know that's the only target that
might ever set status->no_signaling_nans.

Remove the ifdef, so we always look at the status flag;
this has no behavioural change, but will be necessary
if we want to build softfloat once for all targets.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250224111524.1101196-10-peter.maydell@linaro.org
Message-id: 20250217125055.160887-8-peter.maydell@linaro.org
---
 fpu/softfloat-specialize.c.inc | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 8327f72786..a2c6afad5d 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -85,11 +85,7 @@ this code that are retained.
  */
 static inline bool no_signaling_nans(float_status *status)
 {
-#if defined(TARGET_XTENSA)
     return status->no_signaling_nans;
-#else
-    return false;
-#endif
 }
 
 /* Define how the architecture discriminates signaling NaNs.

From 3abed4d0eace62910e90c206cb9d5741c6095b12 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:22 +0000
Subject: [PATCH 18/43] fpu: Always decide snan_bit_is_one() at runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we have a compile-time shortcut where we return a hardcode
value from snan_bit_is_one() on everything except MIPS, because we
know that's the only target that needs to change
status->no_signaling_nans at runtime.

Remove the ifdef, so we always look at the status flag.  This means
we must update the two targets (HPPA and SH4) that were previously
hardcoded to return true so that they set the status flag correctly.

This has no behavioural change, but will be necessary if we want to
build softfloat once for all targets.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250224111524.1101196-11-peter.maydell@linaro.org
Message-id: 20250217125055.160887-9-peter.maydell@linaro.org
---
 fpu/softfloat-specialize.c.inc | 7 -------
 target/hppa/fpu_helper.c       | 1 +
 target/sh4/cpu.c               | 1 +
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index a2c6afad5d..ba4fa08b7b 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -93,17 +93,10 @@ static inline bool no_signaling_nans(float_status *status)
  * In IEEE 754-1985 this was implementation defined, but in IEEE 754-2008
  * the msb must be zero.  MIPS is (so far) unique in supporting both the
  * 2008 revision and backward compatibility with their original choice.
- * Thus for MIPS we must make the choice at runtime.
  */
 static inline bool snan_bit_is_one(float_status *status)
 {
-#if defined(TARGET_MIPS)
     return status->snan_bit_is_one;
-#elif defined(TARGET_HPPA) || defined(TARGET_SH4)
-    return 1;
-#else
-    return 0;
-#endif
 }
 
 /*----------------------------------------------------------------------------
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
index 8ff4b44804..a62d9d3083 100644
--- a/target/hppa/fpu_helper.c
+++ b/target/hppa/fpu_helper.c
@@ -67,6 +67,7 @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
     set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
     /* Default NaN: sign bit clear, msb-1 frac bit set */
     set_float_default_nan_pattern(0b00100000, &env->fp_status);
+    set_snan_bit_is_one(true, &env->fp_status);
     /*
      * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
      * enabled by FPSR.D happens before or after rounding. We pick "before"
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 4ac693d99b..ccfe222bdf 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -128,6 +128,7 @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
     set_flush_to_zero(1, &env->fp_status);
 #endif
     set_default_nan_mode(1, &env->fp_status);
+    set_snan_bit_is_one(true, &env->fp_status);
     /* sign bit clear, set all frac bits other than msb */
     set_float_default_nan_pattern(0b00111111, &env->fp_status);
     /*

From c5d4173fcf04f6de9b9bb0959d1fdfc08254381a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:23 +0000
Subject: [PATCH 19/43] fpu: Don't compile-time disable hardfloat for PPC
 targets

We happen to know that for the PPC target the FP status flags (and in
particular float_flag_inexact) will always be cleared before a
floating point operation, and so can_use_fpu() will always return
false.  So we speed things up a little by forcing QEMU_NO_HARDFLOAT
to true on that target.

We would like to build softfloat once for all targets; that means
removing target-specific ifdefs.  Remove the check for TARGET_PPC;
this won't change behaviour because can_use_fpu() will see that
float_flag_inexact is clear and take the softfloat path anyway.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250224111524.1101196-12-peter.maydell@linaro.org
Message-id: 20250217125055.160887-10-peter.maydell@linaro.org
---
 fpu/softfloat.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b299cfaf86..b38eea8d87 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -220,11 +220,9 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64)
  * the use of hardfloat, since hardfloat relies on the inexact flag being
  * already set.
  */
-#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 # if defined(__FAST_MATH__)
 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
     IEEE implementation
-# endif
 # define QEMU_NO_HARDFLOAT 1
 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 #else

From 5d3462b4cde8bedb33362dab0a3ae94d403899b0 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 24 Feb 2025 11:15:24 +0000
Subject: [PATCH 20/43] fpu: Build only once
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now we have removed all the target-specifics from the softfloat code,
we can switch to building it once for the whole system rather than
once per target.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250224111524.1101196-13-peter.maydell@linaro.org
Message-id: 20250217125055.160887-11-peter.maydell@linaro.org
---
 fpu/meson.build | 2 +-
 fpu/softfloat.c | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/fpu/meson.build b/fpu/meson.build
index 1a9992ded5..646c76f0c6 100644
--- a/fpu/meson.build
+++ b/fpu/meson.build
@@ -1 +1 @@
-specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
+common_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b38eea8d87..34c962d6bd 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -79,9 +79,6 @@ this code that are retained.
  * version 2 or later. See the COPYING file in the top-level directory.
  */
 
-/* softfloat (and in particular the code in softfloat-specialize.h) is
- * target-dependent and needs the TARGET_* macros.
- */
 #include "qemu/osdep.h"
 #include <math.h>
 #include "qemu/bitops.h"

From 1deb15c88ab3f1b0788b9e41b08217036eca3c91 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 21 Feb 2025 19:09:53 +0000
Subject: [PATCH 21/43] target/arm: Move TCG-only VFP code into tcg/ subdir

Most of the target/arm/vfp_helper.c file is purely TCG helper code,
guarded by #ifdef CONFIG_TCG.  Move this into a new file in
target/arm/tcg/.

This leaves only the code relating to getting and setting the
FPCR/FPSR/FPSCR in the original file. (Some of this also is
TCG-only, but that needs more careful disentangling.)

Having two vfp_helper.c files might seem a bit confusing,
but once we've finished moving all the helper code out
of the old file we are going to rename it to vfp_fpscr.c.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250221190957.811948-2-peter.maydell@linaro.org
---
 target/arm/tcg/meson.build  |    1 +
 target/arm/tcg/vfp_helper.c | 1130 +++++++++++++++++++++++++++++++++++
 target/arm/vfp_helper.c     | 1109 ----------------------------------
 3 files changed, 1131 insertions(+), 1109 deletions(-)
 create mode 100644 target/arm/tcg/vfp_helper.c

diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 1f9077c372..dd12ccedb1 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -41,6 +41,7 @@ arm_ss.add(files(
   'vec_helper.c',
   'tlb-insns.c',
   'arith_helper.c',
+  'vfp_helper.c',
 ))
 
 arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c
new file mode 100644
index 0000000000..aa580ff64c
--- /dev/null
+++ b/target/arm/tcg/vfp_helper.c
@@ -0,0 +1,1130 @@
+/*
+ * ARM VFP floating-point operations
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "fpu/softfloat.h"
+#include "qemu/log.h"
+
+/*
+ * VFP support.  We follow the convention used for VFP instructions:
+ * Single precision routines have a "s" suffix, double precision a
+ * "d" suffix.
+ */
+
+#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
+
+#define VFP_BINOP(name) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, float_status *fpst) \
+{ \
+    return float16_ ## name(a, b, fpst); \
+} \
+float32 VFP_HELPER(name, s)(float32 a, float32 b, float_status *fpst) \
+{ \
+    return float32_ ## name(a, b, fpst); \
+} \
+float64 VFP_HELPER(name, d)(float64 a, float64 b, float_status *fpst) \
+{ \
+    return float64_ ## name(a, b, fpst); \
+}
+VFP_BINOP(add)
+VFP_BINOP(sub)
+VFP_BINOP(mul)
+VFP_BINOP(div)
+VFP_BINOP(min)
+VFP_BINOP(max)
+VFP_BINOP(minnum)
+VFP_BINOP(maxnum)
+#undef VFP_BINOP
+
+dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, float_status *fpst)
+{
+    return float16_sqrt(a, fpst);
+}
+
+float32 VFP_HELPER(sqrt, s)(float32 a, float_status *fpst)
+{
+    return float32_sqrt(a, fpst);
+}
+
+float64 VFP_HELPER(sqrt, d)(float64 a, float_status *fpst)
+{
+    return float64_sqrt(a, fpst);
+}
+
+static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
+{
+    uint32_t flags;
+    switch (cmp) {
+    case float_relation_equal:
+        flags = 0x6;
+        break;
+    case float_relation_less:
+        flags = 0x8;
+        break;
+    case float_relation_greater:
+        flags = 0x2;
+        break;
+    case float_relation_unordered:
+        flags = 0x3;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
+}
+
+/* XXX: check quiet/signaling case */
+#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
+void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env)  \
+{ \
+    softfloat_to_vfp_compare(env, \
+        FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
+} \
+void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
+{ \
+    softfloat_to_vfp_compare(env, \
+        FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
+}
+DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
+DO_VFP_cmp(s, float32, float32, FPST_A32)
+DO_VFP_cmp(d, float64, float64, FPST_A32)
+#undef DO_VFP_cmp
+
+/* Integer to float and float to integer conversions */
+
+#define CONV_ITOF(name, ftype, fsz, sign)                           \
+ftype HELPER(name)(uint32_t x, float_status *fpst)                  \
+{                                                                   \
+    return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
+}
+
+#define CONV_FTOI(name, ftype, fsz, sign, round)                \
+sign##int32_t HELPER(name)(ftype x, float_status *fpst)         \
+{                                                               \
+    if (float##fsz##_is_any_nan(x)) {                           \
+        float_raise(float_flag_invalid, fpst);                  \
+        return 0;                                               \
+    }                                                           \
+    return float##fsz##_to_##sign##int32##round(x, fpst);       \
+}
+
+#define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
+    CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
+    CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
+    CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
+
+FLOAT_CONVS(si, h, uint32_t, 16, )
+FLOAT_CONVS(si, s, float32, 32, )
+FLOAT_CONVS(si, d, float64, 64, )
+FLOAT_CONVS(ui, h, uint32_t, 16, u)
+FLOAT_CONVS(ui, s, float32, 32, u)
+FLOAT_CONVS(ui, d, float64, 64, u)
+
+#undef CONV_ITOF
+#undef CONV_FTOI
+#undef FLOAT_CONVS
+
+/* floating point conversion */
+float64 VFP_HELPER(fcvtd, s)(float32 x, float_status *status)
+{
+    return float32_to_float64(x, status);
+}
+
+float32 VFP_HELPER(fcvts, d)(float64 x, float_status *status)
+{
+    return float64_to_float32(x, status);
+}
+
+uint32_t HELPER(bfcvt)(float32 x, float_status *status)
+{
+    return float32_to_bfloat16(x, status);
+}
+
+uint32_t HELPER(bfcvt_pair)(uint64_t pair, float_status *status)
+{
+    bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
+    bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
+    return deposit32(lo, 16, 16, hi);
+}
+
+/*
+ * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
+ * must always round-to-nearest; the AArch64 ones honour the FPSCR
+ * rounding mode. (For AArch32 Neon the standard-FPSCR is set to
+ * round-to-nearest so either helper will work.) AArch32 float-to-fix
+ * must round-to-zero.
+ */
+#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)            \
+ftype HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift,      \
+                                float_status *fpst)                    \
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpst); }
+
+#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)      \
+    ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t  x, \
+                                                     uint32_t shift,   \
+                                                     float_status *fpst) \
+    {                                                                  \
+        ftype ret;                                                     \
+        FloatRoundMode oldmode = fpst->float_rounding_mode;            \
+        fpst->float_rounding_mode = float_round_nearest_even;          \
+        ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpst);       \
+        fpst->float_rounding_mode = oldmode;                           \
+        return ret;                                                    \
+    }
+
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
+uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift,      \
+                                            float_status *fpst)           \
+{                                                                         \
+    if (unlikely(float##fsz##_is_any_nan(x))) {                           \
+        float_raise(float_flag_invalid, fpst);                            \
+        return 0;                                                         \
+    }                                                                     \
+    return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
+}
+
+#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype)            \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
+VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)        \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
+                         float_round_to_zero, _round_to_zero)    \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
+                         get_float_rounding_mode(fpst), )
+
+#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype)        \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
+                         get_float_rounding_mode(fpst), )
+
+VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
+VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
+VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
+VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
+VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
+VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
+VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
+VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
+VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
+VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
+VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
+VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
+VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
+VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
+VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
+VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
+VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
+VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
+VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64,
+                         float_round_to_zero, _round_to_zero)
+VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64,
+                         float_round_to_zero, _round_to_zero)
+
+#undef VFP_CONV_FIX
+#undef VFP_CONV_FIX_FLOAT
+#undef VFP_CONV_FLOAT_FIX_ROUND
+#undef VFP_CONV_FIX_A64
+
+/* Set the current fp rounding mode and return the old one.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_rmode)(uint32_t rmode, float_status *fp_status)
+{
+    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+    set_float_rounding_mode(rmode, fp_status);
+
+    return prev_rmode;
+}
+
+/* Half precision conversions.  */
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, float_status *fpst,
+                                    uint32_t ahp_mode)
+{
+    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
+     * it would affect flushing input denormals.
+     */
+    bool save = get_flush_inputs_to_zero(fpst);
+    set_flush_inputs_to_zero(false, fpst);
+    float32 r = float16_to_float32(a, !ahp_mode, fpst);
+    set_flush_inputs_to_zero(save, fpst);
+    return r;
+}
+
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, float_status *fpst,
+                                     uint32_t ahp_mode)
+{
+    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
+     * it would affect flushing output denormals.
+     */
+    bool save = get_flush_to_zero(fpst);
+    set_flush_to_zero(false, fpst);
+    float16 r = float32_to_float16(a, !ahp_mode, fpst);
+    set_flush_to_zero(save, fpst);
+    return r;
+}
+
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, float_status *fpst,
+                                    uint32_t ahp_mode)
+{
+    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
+     * it would affect flushing input denormals.
+     */
+    bool save = get_flush_inputs_to_zero(fpst);
+    set_flush_inputs_to_zero(false, fpst);
+    float64 r = float16_to_float64(a, !ahp_mode, fpst);
+    set_flush_inputs_to_zero(save, fpst);
+    return r;
+}
+
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, float_status *fpst,
+                                     uint32_t ahp_mode)
+{
+    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
+     * it would affect flushing output denormals.
+     */
+    bool save = get_flush_to_zero(fpst);
+    set_flush_to_zero(false, fpst);
+    float16 r = float64_to_float16(a, !ahp_mode, fpst);
+    set_flush_to_zero(save, fpst);
+    return r;
+}
+
+/* NEON helpers.  */
+
+/* Constants 256 and 512 are used in some helpers; we avoid relying on
+ * int->float conversions at run-time.  */
+#define float64_256 make_float64(0x4070000000000000LL)
+#define float64_512 make_float64(0x4080000000000000LL)
+#define float16_maxnorm make_float16(0x7bff)
+#define float32_maxnorm make_float32(0x7f7fffff)
+#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
+
+/* Reciprocal functions
+ *
+ * The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
+ */
+
+/* See RecipEstimate()
+ *
+ * input is a 9 bit fixed point number
+ * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
+ * result range 256 .. 511 for a number from 1.0 to 511/256.
+ */
+
+static int recip_estimate(int input)
+{
+    int a, b, r;
+    assert(256 <= input && input < 512);
+    a = (input * 2) + 1;
+    b = (1 << 19) / a;
+    r = (b + 1) >> 1;
+    assert(256 <= r && r < 512);
+    return r;
+}
+
+/*
+ * Increased precision version:
+ * input is a 13 bit fixed point number
+ * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
+ * result range 4096 .. 8191 for a number from 1.0 to 2.0
+ */
+static int recip_estimate_incprec(int input)
+{
+    int a, b, r;
+    assert(2048 <= input && input < 4096);
+    a = (input * 2) + 1;
+    /*
+     * The pseudocode expresses this as an operation on infinite
+     * precision reals where it calculates 2^25 / a and then looks
+     * at the error between that and the rounded-down-to-integer
+     * value to see if it should instead round up. We instead
+     * follow the same approach as the pseudocode for the 8-bit
+     * precision version, and calculate (2 * (2^25 / a)) as an
+     * integer so we can do the "add one and halve" to round it.
+     * So the 1 << 26 here is correct.
+     */
+    b = (1 << 26) / a;
+    r = (b + 1) >> 1;
+    assert(4096 <= r && r < 8192);
+    return r;
+}
+
+/*
+ * Common wrapper to call recip_estimate
+ *
+ * The parameters are exponent and 64 bit fraction (without implicit
+ * bit) where the binary point is nominally at bit 52. Returns a
+ * float64 which can then be rounded to the appropriate size by the
+ * callee.
+ */
+
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
+                                    bool increasedprecision)
+{
+    uint32_t scaled, estimate;
+    uint64_t result_frac;
+    int result_exp;
+
+    /* Handle sub-normals */
+    if (*exp == 0) {
+        if (extract64(frac, 51, 1) == 0) {
+            *exp = -1;
+            frac <<= 2;
+        } else {
+            frac <<= 1;
+        }
+    }
+
+    if (increasedprecision) {
+        /* scaled = UInt('1':fraction<51:41>) */
+        scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
+        estimate = recip_estimate_incprec(scaled);
+    } else {
+        /* scaled = UInt('1':fraction<51:44>) */
+        scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+        estimate = recip_estimate(scaled);
+    }
+
+    result_exp = exp_off - *exp;
+    if (increasedprecision) {
+        result_frac = deposit64(0, 40, 12, estimate);
+    } else {
+        result_frac = deposit64(0, 44, 8, estimate);
+    }
+    if (result_exp == 0) {
+        result_frac = deposit64(result_frac >> 1, 51, 1, 1);
+    } else if (result_exp == -1) {
+        result_frac = deposit64(result_frac >> 2, 50, 2, 1);
+        result_exp = 0;
+    }
+
+    *exp = result_exp;
+
+    return result_frac;
+}
+
+static bool round_to_inf(float_status *fpst, bool sign_bit)
+{
+    switch (fpst->float_rounding_mode) {
+    case float_round_nearest_even: /* Round to Nearest */
+        return true;
+    case float_round_up: /* Round to +Inf */
+        return !sign_bit;
+    case float_round_down: /* Round to -Inf */
+        return sign_bit;
+    case float_round_to_zero: /* Round to Zero */
+        return false;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
+{
+    float16 f16 = float16_squash_input_denormal(input, fpst);
+    uint32_t f16_val = float16_val(f16);
+    uint32_t f16_sign = float16_is_neg(f16);
+    int f16_exp = extract32(f16_val, 10, 5);
+    uint32_t f16_frac = extract32(f16_val, 0, 10);
+    uint64_t f64_frac;
+
+    if (float16_is_any_nan(f16)) {
+        float16 nan = f16;
+        if (float16_is_signaling_nan(f16, fpst)) {
+            float_raise(float_flag_invalid, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float16_silence_nan(f16, fpst);
+            }
+        }
+        if (fpst->default_nan_mode) {
+            nan =  float16_default_nan(fpst);
+        }
+        return nan;
+    } else if (float16_is_infinity(f16)) {
+        return float16_set_sign(float16_zero, float16_is_neg(f16));
+    } else if (float16_is_zero(f16)) {
+        float_raise(float_flag_divbyzero, fpst);
+        return float16_set_sign(float16_infinity, float16_is_neg(f16));
+    } else if (float16_abs(f16) < (1 << 8)) {
+        /* Abs(value) < 2.0^-16 */
+        float_raise(float_flag_overflow | float_flag_inexact, fpst);
+        if (round_to_inf(fpst, f16_sign)) {
+            return float16_set_sign(float16_infinity, f16_sign);
+        } else {
+            return float16_set_sign(float16_maxnorm, f16_sign);
+        }
+    } else if (f16_exp >= 29 && fpst->flush_to_zero) {
+        float_raise(float_flag_underflow, fpst);
+        return float16_set_sign(float16_zero, float16_is_neg(f16));
+    }
+
+    f64_frac = call_recip_estimate(&f16_exp, 29,
+                                   ((uint64_t) f16_frac) << (52 - 10), false);
+
+    /* result = sign : result_exp<4:0> : fraction<51:42> */
+    f16_val = deposit32(0, 15, 1, f16_sign);
+    f16_val = deposit32(f16_val, 10, 5, f16_exp);
+    f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
+    return make_float16(f16_val);
+}
+
+/*
+ * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
+ * which is used when FPCR.AH == 1.
+ */
+static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
+{
+    float32 f32 = float32_squash_input_denormal(input, fpst);
+    uint32_t f32_val = float32_val(f32);
+    bool f32_sign = float32_is_neg(f32);
+    int f32_exp = extract32(f32_val, 23, 8);
+    uint32_t f32_frac = extract32(f32_val, 0, 23);
+    uint64_t f64_frac;
+
+    if (float32_is_any_nan(f32)) {
+        float32 nan = f32;
+        if (float32_is_signaling_nan(f32, fpst)) {
+            float_raise(float_flag_invalid, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float32_silence_nan(f32, fpst);
+            }
+        }
+        if (fpst->default_nan_mode) {
+            nan =  float32_default_nan(fpst);
+        }
+        return nan;
+    } else if (float32_is_infinity(f32)) {
+        return float32_set_sign(float32_zero, float32_is_neg(f32));
+    } else if (float32_is_zero(f32)) {
+        float_raise(float_flag_divbyzero, fpst);
+        return float32_set_sign(float32_infinity, float32_is_neg(f32));
+    } else if (float32_abs(f32) < (1ULL << 21)) {
+        /* Abs(value) < 2.0^-128 */
+        float_raise(float_flag_overflow | float_flag_inexact, fpst);
+        if (round_to_inf(fpst, f32_sign)) {
+            return float32_set_sign(float32_infinity, f32_sign);
+        } else {
+            return float32_set_sign(float32_maxnorm, f32_sign);
+        }
+    } else if (f32_exp >= 253 && fpst->flush_to_zero) {
+        float_raise(float_flag_underflow, fpst);
+        return float32_set_sign(float32_zero, float32_is_neg(f32));
+    }
+
+    f64_frac = call_recip_estimate(&f32_exp, 253,
+                                   ((uint64_t) f32_frac) << (52 - 23), rpres);
+
+    /* result = sign : result_exp<7:0> : fraction<51:29> */
+    f32_val = deposit32(0, 31, 1, f32_sign);
+    f32_val = deposit32(f32_val, 23, 8, f32_exp);
+    f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
+    return make_float32(f32_val);
+}
+
+float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
+{
+    return do_recpe_f32(input, fpst, false);
+}
+
+float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
+{
+    return do_recpe_f32(input, fpst, true);
+}
+
+float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
+{
+    float64 f64 = float64_squash_input_denormal(input, fpst);
+    uint64_t f64_val = float64_val(f64);
+    bool f64_sign = float64_is_neg(f64);
+    int f64_exp = extract64(f64_val, 52, 11);
+    uint64_t f64_frac = extract64(f64_val, 0, 52);
+
+    /* Deal with any special cases */
+    if (float64_is_any_nan(f64)) {
+        float64 nan = f64;
+        if (float64_is_signaling_nan(f64, fpst)) {
+            float_raise(float_flag_invalid, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float64_silence_nan(f64, fpst);
+            }
+        }
+        if (fpst->default_nan_mode) {
+            nan =  float64_default_nan(fpst);
+        }
+        return nan;
+    } else if (float64_is_infinity(f64)) {
+        return float64_set_sign(float64_zero, float64_is_neg(f64));
+    } else if (float64_is_zero(f64)) {
+        float_raise(float_flag_divbyzero, fpst);
+        return float64_set_sign(float64_infinity, float64_is_neg(f64));
+    } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
+        /* Abs(value) < 2.0^-1024 */
+        float_raise(float_flag_overflow | float_flag_inexact, fpst);
+        if (round_to_inf(fpst, f64_sign)) {
+            return float64_set_sign(float64_infinity, f64_sign);
+        } else {
+            return float64_set_sign(float64_maxnorm, f64_sign);
+        }
+    } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
+        float_raise(float_flag_underflow, fpst);
+        return float64_set_sign(float64_zero, float64_is_neg(f64));
+    }
+
+    f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
+
+    /* result = sign : result_exp<10:0> : fraction<51:0>; */
+    f64_val = deposit64(0, 63, 1, f64_sign);
+    f64_val = deposit64(f64_val, 52, 11, f64_exp);
+    f64_val = deposit64(f64_val, 0, 52, f64_frac);
+    return make_float64(f64_val);
+}
+
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+
+static int do_recip_sqrt_estimate(int a)
+{
+    int b, estimate;
+
+    assert(128 <= a && a < 512);
+    if (a < 256) {
+        a = a * 2 + 1;
+    } else {
+        a = (a >> 1) << 1;
+        a = (a + 1) * 2;
+    }
+    b = 512;
+    while (a * (b + 1) * (b + 1) < (1 << 28)) {
+        b += 1;
+    }
+    estimate = (b + 1) / 2;
+    assert(256 <= estimate && estimate < 512);
+
+    return estimate;
+}
+
+static int do_recip_sqrt_estimate_incprec(int a)
+{
+    /*
+     * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
+     * in terms of an infinite-precision floating point calculation of a
+     * square root. We implement this using the same kind of pure integer
+     * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
+     */
+    int64_t b, estimate;
+
+    assert(1024 <= a && a < 4096);
+    if (a < 2048) {
+        a = a * 2 + 1;
+    } else {
+        a = (a >> 1) << 1;
+        a = (a + 1) * 2;
+    }
+    b = 8192;
+    while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
+        b += 1;
+    }
+    estimate = (b + 1) / 2;
+
+    assert(4096 <= estimate && estimate < 8192);
+
+    return estimate;
+}
+
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
+                                    bool increasedprecision)
+{
+    int estimate;
+    uint32_t scaled;
+
+    if (*exp == 0) {
+        while (extract64(frac, 51, 1) == 0) {
+            frac = frac << 1;
+            *exp -= 1;
+        }
+        frac = extract64(frac, 0, 51) << 1;
+    }
+
+    if (increasedprecision) {
+        if (*exp & 1) {
+            /* scaled = UInt('01':fraction<51:42>) */
+            scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
+        } else {
+            /* scaled = UInt('1':fraction<51:41>) */
+            scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
+        }
+        estimate = do_recip_sqrt_estimate_incprec(scaled);
+    } else {
+        if (*exp & 1) {
+            /* scaled = UInt('01':fraction<51:45>) */
+            scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+        } else {
+            /* scaled = UInt('1':fraction<51:44>) */
+            scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+        }
+        estimate = do_recip_sqrt_estimate(scaled);
+    }
+
+    *exp = (exp_off - *exp) / 2;
+    if (increasedprecision) {
+        return extract64(estimate, 0, 12) << 40;
+    } else {
+        return extract64(estimate, 0, 8) << 44;
+    }
+}
+
+uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
+{
+    float16 f16 = float16_squash_input_denormal(input, s);
+    uint16_t val = float16_val(f16);
+    bool f16_sign = float16_is_neg(f16);
+    int f16_exp = extract32(val, 10, 5);
+    uint16_t f16_frac = extract32(val, 0, 10);
+    uint64_t f64_frac;
+
+    if (float16_is_any_nan(f16)) {
+        float16 nan = f16;
+        if (float16_is_signaling_nan(f16, s)) {
+            float_raise(float_flag_invalid, s);
+            if (!s->default_nan_mode) {
+                nan = float16_silence_nan(f16, s);
+            }
+        }
+        if (s->default_nan_mode) {
+            nan =  float16_default_nan(s);
+        }
+        return nan;
+    } else if (float16_is_zero(f16)) {
+        float_raise(float_flag_divbyzero, s);
+        return float16_set_sign(float16_infinity, f16_sign);
+    } else if (f16_sign) {
+        float_raise(float_flag_invalid, s);
+        return float16_default_nan(s);
+    } else if (float16_is_infinity(f16)) {
+        return float16_zero;
+    }
+
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+     * preserving the parity of the exponent.  */
+
+    f64_frac = ((uint64_t) f16_frac) << (52 - 10);
+
+    f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
+
+    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
+    val = deposit32(0, 15, 1, f16_sign);
+    val = deposit32(val, 10, 5, f16_exp);
+    val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
+    return make_float16(val);
+}
+
+/*
+ * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
+ * which is used when FPCR.AH == 1.
+ */
+static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
+{
+    float32 f32 = float32_squash_input_denormal(input, s);
+    uint32_t val = float32_val(f32);
+    uint32_t f32_sign = float32_is_neg(f32);
+    int f32_exp = extract32(val, 23, 8);
+    uint32_t f32_frac = extract32(val, 0, 23);
+    uint64_t f64_frac;
+
+    if (float32_is_any_nan(f32)) {
+        float32 nan = f32;
+        if (float32_is_signaling_nan(f32, s)) {
+            float_raise(float_flag_invalid, s);
+            if (!s->default_nan_mode) {
+                nan = float32_silence_nan(f32, s);
+            }
+        }
+        if (s->default_nan_mode) {
+            nan =  float32_default_nan(s);
+        }
+        return nan;
+    } else if (float32_is_zero(f32)) {
+        float_raise(float_flag_divbyzero, s);
+        return float32_set_sign(float32_infinity, float32_is_neg(f32));
+    } else if (float32_is_neg(f32)) {
+        float_raise(float_flag_invalid, s);
+        return float32_default_nan(s);
+    } else if (float32_is_infinity(f32)) {
+        return float32_zero;
+    }
+
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+     * preserving the parity of the exponent.  */
+
+    f64_frac = ((uint64_t) f32_frac) << 29;
+
+    f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
+
+    /*
+     * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
+     * or for increased precision
+     * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
+     */
+    val = deposit32(0, 31, 1, f32_sign);
+    val = deposit32(val, 23, 8, f32_exp);
+    if (rpres) {
+        val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
+    } else {
+        val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+    }
+    return make_float32(val);
+}
+
+float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
+{
+    return do_rsqrte_f32(input, s, false);
+}
+
+float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
+{
+    return do_rsqrte_f32(input, s, true);
+}
+
+float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
+{
+    float64 f64 = float64_squash_input_denormal(input, s);
+    uint64_t val = float64_val(f64);
+    bool f64_sign = float64_is_neg(f64);
+    int f64_exp = extract64(val, 52, 11);
+    uint64_t f64_frac = extract64(val, 0, 52);
+
+    if (float64_is_any_nan(f64)) {
+        float64 nan = f64;
+        if (float64_is_signaling_nan(f64, s)) {
+            float_raise(float_flag_invalid, s);
+            if (!s->default_nan_mode) {
+                nan = float64_silence_nan(f64, s);
+            }
+        }
+        if (s->default_nan_mode) {
+            nan =  float64_default_nan(s);
+        }
+        return nan;
+    } else if (float64_is_zero(f64)) {
+        float_raise(float_flag_divbyzero, s);
+        return float64_set_sign(float64_infinity, float64_is_neg(f64));
+    } else if (float64_is_neg(f64)) {
+        float_raise(float_flag_invalid, s);
+        return float64_default_nan(s);
+    } else if (float64_is_infinity(f64)) {
+        return float64_zero;
+    }
+
+    f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
+
+    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
+    val = deposit64(0, 61, 1, f64_sign);
+    val = deposit64(val, 52, 11, f64_exp);
+    val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
+    return make_float64(val);
+}
+
+uint32_t HELPER(recpe_u32)(uint32_t a)
+{
+    int input, estimate;
+
+    if ((a & 0x80000000) == 0) {
+        return 0xffffffff;
+    }
+
+    input = extract32(a, 23, 9);
+    estimate = recip_estimate(input);
+
+    return deposit32(0, (32 - 9), 9, estimate);
+}
+
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
+{
+    int estimate;
+
+    if ((a & 0xc0000000) == 0) {
+        return 0xffffffff;
+    }
+
+    estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
+
+    return deposit32(0, 23, 9, estimate);
+}
+
+/* VFPv4 fused multiply-accumulate */
+dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
+                                   dh_ctype_f16 c, float_status *fpst)
+{
+    return float16_muladd(a, b, c, 0, fpst);
+}
+
+float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c,
+                              float_status *fpst)
+{
+    return float32_muladd(a, b, c, 0, fpst);
+}
+
+float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c,
+                              float_status *fpst)
+{
+    return float64_muladd(a, b, c, 0, fpst);
+}
+
+/* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, float_status *fp_status)
+{
+    return float16_round_to_int(x, fp_status);
+}
+
+float32 HELPER(rints_exact)(float32 x, float_status *fp_status)
+{
+    return float32_round_to_int(x, fp_status);
+}
+
+float64 HELPER(rintd_exact)(float64 x, float_status *fp_status)
+{
+    return float64_round_to_int(x, fp_status);
+}
+
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, float_status *fp_status)
+{
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float16 ret;
+
+    ret = float16_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
+}
+
+float32 HELPER(rints)(float32 x, float_status *fp_status)
+{
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float32 ret;
+
+    ret = float32_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
+}
+
+float64 HELPER(rintd)(float64 x, float_status *fp_status)
+{
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float64 ret;
+
+    ret = float64_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
+}
+
+/* Convert ARM rounding mode to softfloat */
+const FloatRoundMode arm_rmode_to_sf_map[] = {
+    [FPROUNDING_TIEEVEN] = float_round_nearest_even,
+    [FPROUNDING_POSINF] = float_round_up,
+    [FPROUNDING_NEGINF] = float_round_down,
+    [FPROUNDING_ZERO] = float_round_to_zero,
+    [FPROUNDING_TIEAWAY] = float_round_ties_away,
+    [FPROUNDING_ODD] = float_round_to_odd,
+};
+
+/*
+ * Implement float64 to int32_t conversion without saturation;
+ * the result is supplied modulo 2^32.
+ */
+uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
+{
+    uint32_t frac, e_old, e_new;
+    bool inexact;
+
+    e_old = get_float_exception_flags(status);
+    set_float_exception_flags(0, status);
+    frac = float64_to_int32_modulo(value, float_round_to_zero, status);
+    e_new = get_float_exception_flags(status);
+    set_float_exception_flags(e_old | e_new, status);
+
+    /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
+    inexact = e_new & (float_flag_inexact |
+                       float_flag_input_denormal_flushed |
+                       float_flag_invalid);
+
+    /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
+    inexact |= value == float64_chs(float64_zero);
+
+    /* Pack the result and the env->ZF representation of Z together.  */
+    return deposit64(frac, 32, 32, inexact);
+}
+
+uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
+{
+    uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
+    uint32_t result = pair;
+    uint32_t z = (pair >> 32) == 0;
+
+    /* Store Z, clear NCV, in FPSCR.NZCV.  */
+    env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);
+
+    return result;
+}
+
+/* Round a float32 to an integer that fits in int32_t or int64_t.  */
+static float32 frint_s(float32 f, float_status *fpst, int intsize)
+{
+    int old_flags = get_float_exception_flags(fpst);
+    uint32_t exp = extract32(f, 23, 8);
+
+    if (unlikely(exp == 0xff)) {
+        /* NaN or Inf.  */
+        goto overflow;
+    }
+
+    /* Round and re-extract the exponent.  */
+    f = float32_round_to_int(f, fpst);
+    exp = extract32(f, 23, 8);
+
+    /* Validate the range of the result.  */
+    if (exp < 126 + intsize) {
+        /* abs(F) <= INT{N}_MAX */
+        return f;
+    }
+    if (exp == 126 + intsize) {
+        uint32_t sign = extract32(f, 31, 1);
+        uint32_t frac = extract32(f, 0, 23);
+        if (sign && frac == 0) {
+            /* F == INT{N}_MIN */
+            return f;
+        }
+    }
+
+ overflow:
+    /*
+     * Raise Invalid and return INT{N}_MIN as a float.  Revert any
+     * inexact exception float32_round_to_int may have raised.
+     */
+    set_float_exception_flags(old_flags | float_flag_invalid, fpst);
+    return (0x100u + 126u + intsize) << 23;
+}
+
+float32 HELPER(frint32_s)(float32 f, float_status *fpst)
+{
+    return frint_s(f, fpst, 32);
+}
+
+float32 HELPER(frint64_s)(float32 f, float_status *fpst)
+{
+    return frint_s(f, fpst, 64);
+}
+
+/* Round a float64 to an integer that fits in int32_t or int64_t.  */
+static float64 frint_d(float64 f, float_status *fpst, int intsize)
+{
+    int old_flags = get_float_exception_flags(fpst);
+    uint32_t exp = extract64(f, 52, 11);
+
+    if (unlikely(exp == 0x7ff)) {
+        /* NaN or Inf.  */
+        goto overflow;
+    }
+
+    /* Round and re-extract the exponent.  */
+    f = float64_round_to_int(f, fpst);
+    exp = extract64(f, 52, 11);
+
+    /* Validate the range of the result.  */
+    if (exp < 1022 + intsize) {
+        /* abs(F) <= INT{N}_MAX */
+        return f;
+    }
+    if (exp == 1022 + intsize) {
+        uint64_t sign = extract64(f, 63, 1);
+        uint64_t frac = extract64(f, 0, 52);
+        if (sign && frac == 0) {
+            /* F == INT{N}_MIN */
+            return f;
+        }
+    }
+
+ overflow:
+    /*
+     * Raise Invalid and return INT{N}_MIN as a float.  Revert any
+     * inexact exception float64_round_to_int may have raised.
+     */
+    set_float_exception_flags(old_flags | float_flag_invalid, fpst);
+    return (uint64_t)(0x800 + 1022 + intsize) << 52;
+}
+
+float64 HELPER(frint32_d)(float64 f, float_status *fpst)
+{
+    return frint_d(f, fpst, 32);
+}
+
+float64 HELPER(frint64_d)(float64 f, float_status *fpst)
+{
+    return frint_d(f, fpst, 64);
+}
+
+void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
+{
+    uint32_t syndrome;
+
+    switch (reg) {
+    case ARM_VFP_MVFR0:
+    case ARM_VFP_MVFR1:
+    case ARM_VFP_MVFR2:
+        if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
+            return;
+        }
+        break;
+    case ARM_VFP_FPSID:
+        if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
+            return;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
+                | ARM_EL_IL
+                | (1 << 24) | (0xe << 20) | (7 << 14)
+                | (reg << 10) | (rt << 5) | 1);
+
+    raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
+}
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 5d424477a2..0e849d8d4d 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -23,13 +23,6 @@
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
-#ifdef CONFIG_TCG
-#include "qemu/log.h"
-#endif
-
-/* VFP support.  We follow the convention used for VFP instructions:
-   Single precision routines have a "s" suffix, double precision a
-   "d" suffix.  */
 
 /*
  * Set the float_status behaviour to match the Arm defaults:
@@ -419,1105 +412,3 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val)
 {
     HELPER(vfp_set_fpscr)(env, val);
 }
-
-#ifdef CONFIG_TCG
-
-#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
-
-#define VFP_BINOP(name) \
-dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, float_status *fpst) \
-{ \
-    return float16_ ## name(a, b, fpst); \
-} \
-float32 VFP_HELPER(name, s)(float32 a, float32 b, float_status *fpst) \
-{ \
-    return float32_ ## name(a, b, fpst); \
-} \
-float64 VFP_HELPER(name, d)(float64 a, float64 b, float_status *fpst) \
-{ \
-    return float64_ ## name(a, b, fpst); \
-}
-VFP_BINOP(add)
-VFP_BINOP(sub)
-VFP_BINOP(mul)
-VFP_BINOP(div)
-VFP_BINOP(min)
-VFP_BINOP(max)
-VFP_BINOP(minnum)
-VFP_BINOP(maxnum)
-#undef VFP_BINOP
-
-dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, float_status *fpst)
-{
-    return float16_sqrt(a, fpst);
-}
-
-float32 VFP_HELPER(sqrt, s)(float32 a, float_status *fpst)
-{
-    return float32_sqrt(a, fpst);
-}
-
-float64 VFP_HELPER(sqrt, d)(float64 a, float_status *fpst)
-{
-    return float64_sqrt(a, fpst);
-}
-
-static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
-{
-    uint32_t flags;
-    switch (cmp) {
-    case float_relation_equal:
-        flags = 0x6;
-        break;
-    case float_relation_less:
-        flags = 0x8;
-        break;
-    case float_relation_greater:
-        flags = 0x2;
-        break;
-    case float_relation_unordered:
-        flags = 0x3;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
-}
-
-/* XXX: check quiet/signaling case */
-#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
-void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env)  \
-{ \
-    softfloat_to_vfp_compare(env, \
-        FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
-} \
-void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
-{ \
-    softfloat_to_vfp_compare(env, \
-        FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
-}
-DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
-DO_VFP_cmp(s, float32, float32, FPST_A32)
-DO_VFP_cmp(d, float64, float64, FPST_A32)
-#undef DO_VFP_cmp
-
-/* Integer to float and float to integer conversions */
-
-#define CONV_ITOF(name, ftype, fsz, sign)                           \
-ftype HELPER(name)(uint32_t x, float_status *fpst)                  \
-{                                                                   \
-    return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
-}
-
-#define CONV_FTOI(name, ftype, fsz, sign, round)                \
-sign##int32_t HELPER(name)(ftype x, float_status *fpst)         \
-{                                                               \
-    if (float##fsz##_is_any_nan(x)) {                           \
-        float_raise(float_flag_invalid, fpst);                  \
-        return 0;                                               \
-    }                                                           \
-    return float##fsz##_to_##sign##int32##round(x, fpst);       \
-}
-
-#define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
-    CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
-    CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
-    CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
-
-FLOAT_CONVS(si, h, uint32_t, 16, )
-FLOAT_CONVS(si, s, float32, 32, )
-FLOAT_CONVS(si, d, float64, 64, )
-FLOAT_CONVS(ui, h, uint32_t, 16, u)
-FLOAT_CONVS(ui, s, float32, 32, u)
-FLOAT_CONVS(ui, d, float64, 64, u)
-
-#undef CONV_ITOF
-#undef CONV_FTOI
-#undef FLOAT_CONVS
-
-/* floating point conversion */
-float64 VFP_HELPER(fcvtd, s)(float32 x, float_status *status)
-{
-    return float32_to_float64(x, status);
-}
-
-float32 VFP_HELPER(fcvts, d)(float64 x, float_status *status)
-{
-    return float64_to_float32(x, status);
-}
-
-uint32_t HELPER(bfcvt)(float32 x, float_status *status)
-{
-    return float32_to_bfloat16(x, status);
-}
-
-uint32_t HELPER(bfcvt_pair)(uint64_t pair, float_status *status)
-{
-    bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
-    bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
-    return deposit32(lo, 16, 16, hi);
-}
-
-/*
- * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
- * must always round-to-nearest; the AArch64 ones honour the FPSCR
- * rounding mode. (For AArch32 Neon the standard-FPSCR is set to
- * round-to-nearest so either helper will work.) AArch32 float-to-fix
- * must round-to-zero.
- */
-#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)            \
-ftype HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift,      \
-                                float_status *fpst)                    \
-{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpst); }
-
-#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)      \
-    ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t  x, \
-                                                     uint32_t shift,   \
-                                                     float_status *fpst) \
-    {                                                                  \
-        ftype ret;                                                     \
-        FloatRoundMode oldmode = fpst->float_rounding_mode;            \
-        fpst->float_rounding_mode = float_round_nearest_even;          \
-        ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpst);       \
-        fpst->float_rounding_mode = oldmode;                           \
-        return ret;                                                    \
-    }
-
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
-uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift,      \
-                                            float_status *fpst)           \
-{                                                                         \
-    if (unlikely(float##fsz##_is_any_nan(x))) {                           \
-        float_raise(float_flag_invalid, fpst);                            \
-        return 0;                                                         \
-    }                                                                     \
-    return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
-}
-
-#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype)            \
-VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
-VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)        \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
-                         float_round_to_zero, _round_to_zero)    \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
-                         get_float_rounding_mode(fpst), )
-
-#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype)        \
-VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
-                         get_float_rounding_mode(fpst), )
-
-VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
-VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
-VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
-VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
-VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
-VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
-VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
-VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
-VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
-VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
-VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
-VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
-VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
-VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
-VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
-VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
-VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
-VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
-VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64,
-                         float_round_to_zero, _round_to_zero)
-VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64,
-                         float_round_to_zero, _round_to_zero)
-
-#undef VFP_CONV_FIX
-#undef VFP_CONV_FIX_FLOAT
-#undef VFP_CONV_FLOAT_FIX_ROUND
-#undef VFP_CONV_FIX_A64
-
-/* Set the current fp rounding mode and return the old one.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_rmode)(uint32_t rmode, float_status *fp_status)
-{
-    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
-    set_float_rounding_mode(rmode, fp_status);
-
-    return prev_rmode;
-}
-
-/* Half precision conversions.  */
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, float_status *fpst,
-                                    uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing input denormals.
-     */
-    bool save = get_flush_inputs_to_zero(fpst);
-    set_flush_inputs_to_zero(false, fpst);
-    float32 r = float16_to_float32(a, !ahp_mode, fpst);
-    set_flush_inputs_to_zero(save, fpst);
-    return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, float_status *fpst,
-                                     uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing output denormals.
-     */
-    bool save = get_flush_to_zero(fpst);
-    set_flush_to_zero(false, fpst);
-    float16 r = float32_to_float16(a, !ahp_mode, fpst);
-    set_flush_to_zero(save, fpst);
-    return r;
-}
-
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, float_status *fpst,
-                                    uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing input denormals.
-     */
-    bool save = get_flush_inputs_to_zero(fpst);
-    set_flush_inputs_to_zero(false, fpst);
-    float64 r = float16_to_float64(a, !ahp_mode, fpst);
-    set_flush_inputs_to_zero(save, fpst);
-    return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, float_status *fpst,
-                                     uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing output denormals.
-     */
-    bool save = get_flush_to_zero(fpst);
-    set_flush_to_zero(false, fpst);
-    float16 r = float64_to_float16(a, !ahp_mode, fpst);
-    set_flush_to_zero(save, fpst);
-    return r;
-}
-
-/* NEON helpers.  */
-
-/* Constants 256 and 512 are used in some helpers; we avoid relying on
- * int->float conversions at run-time.  */
-#define float64_256 make_float64(0x4070000000000000LL)
-#define float64_512 make_float64(0x4080000000000000LL)
-#define float16_maxnorm make_float16(0x7bff)
-#define float32_maxnorm make_float32(0x7f7fffff)
-#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
-
-/* Reciprocal functions
- *
- * The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
- */
-
-/* See RecipEstimate()
- *
- * input is a 9 bit fixed point number
- * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
- * result range 256 .. 511 for a number from 1.0 to 511/256.
- */
-
-static int recip_estimate(int input)
-{
-    int a, b, r;
-    assert(256 <= input && input < 512);
-    a = (input * 2) + 1;
-    b = (1 << 19) / a;
-    r = (b + 1) >> 1;
-    assert(256 <= r && r < 512);
-    return r;
-}
-
-/*
- * Increased precision version:
- * input is a 13 bit fixed point number
- * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
- * result range 4096 .. 8191 for a number from 1.0 to 2.0
- */
-static int recip_estimate_incprec(int input)
-{
-    int a, b, r;
-    assert(2048 <= input && input < 4096);
-    a = (input * 2) + 1;
-    /*
-     * The pseudocode expresses this as an operation on infinite
-     * precision reals where it calculates 2^25 / a and then looks
-     * at the error between that and the rounded-down-to-integer
-     * value to see if it should instead round up. We instead
-     * follow the same approach as the pseudocode for the 8-bit
-     * precision version, and calculate (2 * (2^25 / a)) as an
-     * integer so we can do the "add one and halve" to round it.
-     * So the 1 << 26 here is correct.
-     */
-    b = (1 << 26) / a;
-    r = (b + 1) >> 1;
-    assert(4096 <= r && r < 8192);
-    return r;
-}
-
-/*
- * Common wrapper to call recip_estimate
- *
- * The parameters are exponent and 64 bit fraction (without implicit
- * bit) where the binary point is nominally at bit 52. Returns a
- * float64 which can then be rounded to the appropriate size by the
- * callee.
- */
-
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
-                                    bool increasedprecision)
-{
-    uint32_t scaled, estimate;
-    uint64_t result_frac;
-    int result_exp;
-
-    /* Handle sub-normals */
-    if (*exp == 0) {
-        if (extract64(frac, 51, 1) == 0) {
-            *exp = -1;
-            frac <<= 2;
-        } else {
-            frac <<= 1;
-        }
-    }
-
-    if (increasedprecision) {
-        /* scaled = UInt('1':fraction<51:41>) */
-        scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
-        estimate = recip_estimate_incprec(scaled);
-    } else {
-        /* scaled = UInt('1':fraction<51:44>) */
-        scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
-        estimate = recip_estimate(scaled);
-    }
-
-    result_exp = exp_off - *exp;
-    if (increasedprecision) {
-        result_frac = deposit64(0, 40, 12, estimate);
-    } else {
-        result_frac = deposit64(0, 44, 8, estimate);
-    }
-    if (result_exp == 0) {
-        result_frac = deposit64(result_frac >> 1, 51, 1, 1);
-    } else if (result_exp == -1) {
-        result_frac = deposit64(result_frac >> 2, 50, 2, 1);
-        result_exp = 0;
-    }
-
-    *exp = result_exp;
-
-    return result_frac;
-}
-
-static bool round_to_inf(float_status *fpst, bool sign_bit)
-{
-    switch (fpst->float_rounding_mode) {
-    case float_round_nearest_even: /* Round to Nearest */
-        return true;
-    case float_round_up: /* Round to +Inf */
-        return !sign_bit;
-    case float_round_down: /* Round to -Inf */
-        return sign_bit;
-    case float_round_to_zero: /* Round to Zero */
-        return false;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
-{
-    float16 f16 = float16_squash_input_denormal(input, fpst);
-    uint32_t f16_val = float16_val(f16);
-    uint32_t f16_sign = float16_is_neg(f16);
-    int f16_exp = extract32(f16_val, 10, 5);
-    uint32_t f16_frac = extract32(f16_val, 0, 10);
-    uint64_t f64_frac;
-
-    if (float16_is_any_nan(f16)) {
-        float16 nan = f16;
-        if (float16_is_signaling_nan(f16, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            if (!fpst->default_nan_mode) {
-                nan = float16_silence_nan(f16, fpst);
-            }
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float16_default_nan(fpst);
-        }
-        return nan;
-    } else if (float16_is_infinity(f16)) {
-        return float16_set_sign(float16_zero, float16_is_neg(f16));
-    } else if (float16_is_zero(f16)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float16_set_sign(float16_infinity, float16_is_neg(f16));
-    } else if (float16_abs(f16) < (1 << 8)) {
-        /* Abs(value) < 2.0^-16 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f16_sign)) {
-            return float16_set_sign(float16_infinity, f16_sign);
-        } else {
-            return float16_set_sign(float16_maxnorm, f16_sign);
-        }
-    } else if (f16_exp >= 29 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float16_set_sign(float16_zero, float16_is_neg(f16));
-    }
-
-    f64_frac = call_recip_estimate(&f16_exp, 29,
-                                   ((uint64_t) f16_frac) << (52 - 10), false);
-
-    /* result = sign : result_exp<4:0> : fraction<51:42> */
-    f16_val = deposit32(0, 15, 1, f16_sign);
-    f16_val = deposit32(f16_val, 10, 5, f16_exp);
-    f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
-    return make_float16(f16_val);
-}
-
-/*
- * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
- * which is used when FPCR.AH == 1.
- */
-static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
-{
-    float32 f32 = float32_squash_input_denormal(input, fpst);
-    uint32_t f32_val = float32_val(f32);
-    bool f32_sign = float32_is_neg(f32);
-    int f32_exp = extract32(f32_val, 23, 8);
-    uint32_t f32_frac = extract32(f32_val, 0, 23);
-    uint64_t f64_frac;
-
-    if (float32_is_any_nan(f32)) {
-        float32 nan = f32;
-        if (float32_is_signaling_nan(f32, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            if (!fpst->default_nan_mode) {
-                nan = float32_silence_nan(f32, fpst);
-            }
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float32_default_nan(fpst);
-        }
-        return nan;
-    } else if (float32_is_infinity(f32)) {
-        return float32_set_sign(float32_zero, float32_is_neg(f32));
-    } else if (float32_is_zero(f32)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float32_set_sign(float32_infinity, float32_is_neg(f32));
-    } else if (float32_abs(f32) < (1ULL << 21)) {
-        /* Abs(value) < 2.0^-128 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f32_sign)) {
-            return float32_set_sign(float32_infinity, f32_sign);
-        } else {
-            return float32_set_sign(float32_maxnorm, f32_sign);
-        }
-    } else if (f32_exp >= 253 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float32_set_sign(float32_zero, float32_is_neg(f32));
-    }
-
-    f64_frac = call_recip_estimate(&f32_exp, 253,
-                                   ((uint64_t) f32_frac) << (52 - 23), rpres);
-
-    /* result = sign : result_exp<7:0> : fraction<51:29> */
-    f32_val = deposit32(0, 31, 1, f32_sign);
-    f32_val = deposit32(f32_val, 23, 8, f32_exp);
-    f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
-    return make_float32(f32_val);
-}
-
-float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
-{
-    return do_recpe_f32(input, fpst, false);
-}
-
-float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
-{
-    return do_recpe_f32(input, fpst, true);
-}
-
-float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
-{
-    float64 f64 = float64_squash_input_denormal(input, fpst);
-    uint64_t f64_val = float64_val(f64);
-    bool f64_sign = float64_is_neg(f64);
-    int f64_exp = extract64(f64_val, 52, 11);
-    uint64_t f64_frac = extract64(f64_val, 0, 52);
-
-    /* Deal with any special cases */
-    if (float64_is_any_nan(f64)) {
-        float64 nan = f64;
-        if (float64_is_signaling_nan(f64, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            if (!fpst->default_nan_mode) {
-                nan = float64_silence_nan(f64, fpst);
-            }
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float64_default_nan(fpst);
-        }
-        return nan;
-    } else if (float64_is_infinity(f64)) {
-        return float64_set_sign(float64_zero, float64_is_neg(f64));
-    } else if (float64_is_zero(f64)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float64_set_sign(float64_infinity, float64_is_neg(f64));
-    } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
-        /* Abs(value) < 2.0^-1024 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f64_sign)) {
-            return float64_set_sign(float64_infinity, f64_sign);
-        } else {
-            return float64_set_sign(float64_maxnorm, f64_sign);
-        }
-    } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float64_set_sign(float64_zero, float64_is_neg(f64));
-    }
-
-    f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
-
-    /* result = sign : result_exp<10:0> : fraction<51:0>; */
-    f64_val = deposit64(0, 63, 1, f64_sign);
-    f64_val = deposit64(f64_val, 52, 11, f64_exp);
-    f64_val = deposit64(f64_val, 0, 52, f64_frac);
-    return make_float64(f64_val);
-}
-
-/* The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM.
- */
-
-static int do_recip_sqrt_estimate(int a)
-{
-    int b, estimate;
-
-    assert(128 <= a && a < 512);
-    if (a < 256) {
-        a = a * 2 + 1;
-    } else {
-        a = (a >> 1) << 1;
-        a = (a + 1) * 2;
-    }
-    b = 512;
-    while (a * (b + 1) * (b + 1) < (1 << 28)) {
-        b += 1;
-    }
-    estimate = (b + 1) / 2;
-    assert(256 <= estimate && estimate < 512);
-
-    return estimate;
-}
-
-static int do_recip_sqrt_estimate_incprec(int a)
-{
-    /*
-     * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
-     * in terms of an infinite-precision floating point calculation of a
-     * square root. We implement this using the same kind of pure integer
-     * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
-     */
-    int64_t b, estimate;
-
-    assert(1024 <= a && a < 4096);
-    if (a < 2048) {
-        a = a * 2 + 1;
-    } else {
-        a = (a >> 1) << 1;
-        a = (a + 1) * 2;
-    }
-    b = 8192;
-    while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
-        b += 1;
-    }
-    estimate = (b + 1) / 2;
-
-    assert(4096 <= estimate && estimate < 8192);
-
-    return estimate;
-}
-
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
-                                    bool increasedprecision)
-{
-    int estimate;
-    uint32_t scaled;
-
-    if (*exp == 0) {
-        while (extract64(frac, 51, 1) == 0) {
-            frac = frac << 1;
-            *exp -= 1;
-        }
-        frac = extract64(frac, 0, 51) << 1;
-    }
-
-    if (increasedprecision) {
-        if (*exp & 1) {
-            /* scaled = UInt('01':fraction<51:42>) */
-            scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
-        } else {
-            /* scaled = UInt('1':fraction<51:41>) */
-            scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
-        }
-        estimate = do_recip_sqrt_estimate_incprec(scaled);
-    } else {
-        if (*exp & 1) {
-            /* scaled = UInt('01':fraction<51:45>) */
-            scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
-        } else {
-            /* scaled = UInt('1':fraction<51:44>) */
-            scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
-        }
-        estimate = do_recip_sqrt_estimate(scaled);
-    }
-
-    *exp = (exp_off - *exp) / 2;
-    if (increasedprecision) {
-        return extract64(estimate, 0, 12) << 40;
-    } else {
-        return extract64(estimate, 0, 8) << 44;
-    }
-}
-
-uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
-{
-    float16 f16 = float16_squash_input_denormal(input, s);
-    uint16_t val = float16_val(f16);
-    bool f16_sign = float16_is_neg(f16);
-    int f16_exp = extract32(val, 10, 5);
-    uint16_t f16_frac = extract32(val, 0, 10);
-    uint64_t f64_frac;
-
-    if (float16_is_any_nan(f16)) {
-        float16 nan = f16;
-        if (float16_is_signaling_nan(f16, s)) {
-            float_raise(float_flag_invalid, s);
-            if (!s->default_nan_mode) {
-                nan = float16_silence_nan(f16, s);
-            }
-        }
-        if (s->default_nan_mode) {
-            nan =  float16_default_nan(s);
-        }
-        return nan;
-    } else if (float16_is_zero(f16)) {
-        float_raise(float_flag_divbyzero, s);
-        return float16_set_sign(float16_infinity, f16_sign);
-    } else if (f16_sign) {
-        float_raise(float_flag_invalid, s);
-        return float16_default_nan(s);
-    } else if (float16_is_infinity(f16)) {
-        return float16_zero;
-    }
-
-    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
-     * preserving the parity of the exponent.  */
-
-    f64_frac = ((uint64_t) f16_frac) << (52 - 10);
-
-    f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
-
-    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
-    val = deposit32(0, 15, 1, f16_sign);
-    val = deposit32(val, 10, 5, f16_exp);
-    val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
-    return make_float16(val);
-}
-
-/*
- * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
- * which is used when FPCR.AH == 1.
- */
-static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
-{
-    float32 f32 = float32_squash_input_denormal(input, s);
-    uint32_t val = float32_val(f32);
-    uint32_t f32_sign = float32_is_neg(f32);
-    int f32_exp = extract32(val, 23, 8);
-    uint32_t f32_frac = extract32(val, 0, 23);
-    uint64_t f64_frac;
-
-    if (float32_is_any_nan(f32)) {
-        float32 nan = f32;
-        if (float32_is_signaling_nan(f32, s)) {
-            float_raise(float_flag_invalid, s);
-            if (!s->default_nan_mode) {
-                nan = float32_silence_nan(f32, s);
-            }
-        }
-        if (s->default_nan_mode) {
-            nan =  float32_default_nan(s);
-        }
-        return nan;
-    } else if (float32_is_zero(f32)) {
-        float_raise(float_flag_divbyzero, s);
-        return float32_set_sign(float32_infinity, float32_is_neg(f32));
-    } else if (float32_is_neg(f32)) {
-        float_raise(float_flag_invalid, s);
-        return float32_default_nan(s);
-    } else if (float32_is_infinity(f32)) {
-        return float32_zero;
-    }
-
-    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
-     * preserving the parity of the exponent.  */
-
-    f64_frac = ((uint64_t) f32_frac) << 29;
-
-    f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
-
-    /*
-     * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
-     * or for increased precision
-     * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
-     */
-    val = deposit32(0, 31, 1, f32_sign);
-    val = deposit32(val, 23, 8, f32_exp);
-    if (rpres) {
-        val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
-    } else {
-        val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
-    }
-    return make_float32(val);
-}
-
-float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
-{
-    return do_rsqrte_f32(input, s, false);
-}
-
-float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
-{
-    return do_rsqrte_f32(input, s, true);
-}
-
-float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
-{
-    float64 f64 = float64_squash_input_denormal(input, s);
-    uint64_t val = float64_val(f64);
-    bool f64_sign = float64_is_neg(f64);
-    int f64_exp = extract64(val, 52, 11);
-    uint64_t f64_frac = extract64(val, 0, 52);
-
-    if (float64_is_any_nan(f64)) {
-        float64 nan = f64;
-        if (float64_is_signaling_nan(f64, s)) {
-            float_raise(float_flag_invalid, s);
-            if (!s->default_nan_mode) {
-                nan = float64_silence_nan(f64, s);
-            }
-        }
-        if (s->default_nan_mode) {
-            nan =  float64_default_nan(s);
-        }
-        return nan;
-    } else if (float64_is_zero(f64)) {
-        float_raise(float_flag_divbyzero, s);
-        return float64_set_sign(float64_infinity, float64_is_neg(f64));
-    } else if (float64_is_neg(f64)) {
-        float_raise(float_flag_invalid, s);
-        return float64_default_nan(s);
-    } else if (float64_is_infinity(f64)) {
-        return float64_zero;
-    }
-
-    f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
-
-    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
-    val = deposit64(0, 61, 1, f64_sign);
-    val = deposit64(val, 52, 11, f64_exp);
-    val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
-    return make_float64(val);
-}
-
-uint32_t HELPER(recpe_u32)(uint32_t a)
-{
-    int input, estimate;
-
-    if ((a & 0x80000000) == 0) {
-        return 0xffffffff;
-    }
-
-    input = extract32(a, 23, 9);
-    estimate = recip_estimate(input);
-
-    return deposit32(0, (32 - 9), 9, estimate);
-}
-
-uint32_t HELPER(rsqrte_u32)(uint32_t a)
-{
-    int estimate;
-
-    if ((a & 0xc0000000) == 0) {
-        return 0xffffffff;
-    }
-
-    estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
-
-    return deposit32(0, 23, 9, estimate);
-}
-
-/* VFPv4 fused multiply-accumulate */
-dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
-                                   dh_ctype_f16 c, float_status *fpst)
-{
-    return float16_muladd(a, b, c, 0, fpst);
-}
-
-float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c,
-                              float_status *fpst)
-{
-    return float32_muladd(a, b, c, 0, fpst);
-}
-
-float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c,
-                              float_status *fpst)
-{
-    return float64_muladd(a, b, c, 0, fpst);
-}
-
-/* ARMv8 round to integral */
-dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, float_status *fp_status)
-{
-    return float16_round_to_int(x, fp_status);
-}
-
-float32 HELPER(rints_exact)(float32 x, float_status *fp_status)
-{
-    return float32_round_to_int(x, fp_status);
-}
-
-float64 HELPER(rintd_exact)(float64 x, float_status *fp_status)
-{
-    return float64_round_to_int(x, fp_status);
-}
-
-dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, float_status *fp_status)
-{
-    int old_flags = get_float_exception_flags(fp_status), new_flags;
-    float16 ret;
-
-    ret = float16_round_to_int(x, fp_status);
-
-    /* Suppress any inexact exceptions the conversion produced */
-    if (!(old_flags & float_flag_inexact)) {
-        new_flags = get_float_exception_flags(fp_status);
-        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
-    }
-
-    return ret;
-}
-
-float32 HELPER(rints)(float32 x, float_status *fp_status)
-{
-    int old_flags = get_float_exception_flags(fp_status), new_flags;
-    float32 ret;
-
-    ret = float32_round_to_int(x, fp_status);
-
-    /* Suppress any inexact exceptions the conversion produced */
-    if (!(old_flags & float_flag_inexact)) {
-        new_flags = get_float_exception_flags(fp_status);
-        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
-    }
-
-    return ret;
-}
-
-float64 HELPER(rintd)(float64 x, float_status *fp_status)
-{
-    int old_flags = get_float_exception_flags(fp_status), new_flags;
-    float64 ret;
-
-    ret = float64_round_to_int(x, fp_status);
-
-    /* Suppress any inexact exceptions the conversion produced */
-    if (!(old_flags & float_flag_inexact)) {
-        new_flags = get_float_exception_flags(fp_status);
-        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
-    }
-
-    return ret;
-}
-
-/* Convert ARM rounding mode to softfloat */
-const FloatRoundMode arm_rmode_to_sf_map[] = {
-    [FPROUNDING_TIEEVEN] = float_round_nearest_even,
-    [FPROUNDING_POSINF] = float_round_up,
-    [FPROUNDING_NEGINF] = float_round_down,
-    [FPROUNDING_ZERO] = float_round_to_zero,
-    [FPROUNDING_TIEAWAY] = float_round_ties_away,
-    [FPROUNDING_ODD] = float_round_to_odd,
-};
-
-/*
- * Implement float64 to int32_t conversion without saturation;
- * the result is supplied modulo 2^32.
- */
-uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
-{
-    uint32_t frac, e_old, e_new;
-    bool inexact;
-
-    e_old = get_float_exception_flags(status);
-    set_float_exception_flags(0, status);
-    frac = float64_to_int32_modulo(value, float_round_to_zero, status);
-    e_new = get_float_exception_flags(status);
-    set_float_exception_flags(e_old | e_new, status);
-
-    /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
-    inexact = e_new & (float_flag_inexact |
-                       float_flag_input_denormal_flushed |
-                       float_flag_invalid);
-
-    /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
-    inexact |= value == float64_chs(float64_zero);
-
-    /* Pack the result and the env->ZF representation of Z together.  */
-    return deposit64(frac, 32, 32, inexact);
-}
-
-uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
-{
-    uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
-    uint32_t result = pair;
-    uint32_t z = (pair >> 32) == 0;
-
-    /* Store Z, clear NCV, in FPSCR.NZCV.  */
-    env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);
-
-    return result;
-}
-
-/* Round a float32 to an integer that fits in int32_t or int64_t.  */
-static float32 frint_s(float32 f, float_status *fpst, int intsize)
-{
-    int old_flags = get_float_exception_flags(fpst);
-    uint32_t exp = extract32(f, 23, 8);
-
-    if (unlikely(exp == 0xff)) {
-        /* NaN or Inf.  */
-        goto overflow;
-    }
-
-    /* Round and re-extract the exponent.  */
-    f = float32_round_to_int(f, fpst);
-    exp = extract32(f, 23, 8);
-
-    /* Validate the range of the result.  */
-    if (exp < 126 + intsize) {
-        /* abs(F) <= INT{N}_MAX */
-        return f;
-    }
-    if (exp == 126 + intsize) {
-        uint32_t sign = extract32(f, 31, 1);
-        uint32_t frac = extract32(f, 0, 23);
-        if (sign && frac == 0) {
-            /* F == INT{N}_MIN */
-            return f;
-        }
-    }
-
- overflow:
-    /*
-     * Raise Invalid and return INT{N}_MIN as a float.  Revert any
-     * inexact exception float32_round_to_int may have raised.
-     */
-    set_float_exception_flags(old_flags | float_flag_invalid, fpst);
-    return (0x100u + 126u + intsize) << 23;
-}
-
-float32 HELPER(frint32_s)(float32 f, float_status *fpst)
-{
-    return frint_s(f, fpst, 32);
-}
-
-float32 HELPER(frint64_s)(float32 f, float_status *fpst)
-{
-    return frint_s(f, fpst, 64);
-}
-
-/* Round a float64 to an integer that fits in int32_t or int64_t.  */
-static float64 frint_d(float64 f, float_status *fpst, int intsize)
-{
-    int old_flags = get_float_exception_flags(fpst);
-    uint32_t exp = extract64(f, 52, 11);
-
-    if (unlikely(exp == 0x7ff)) {
-        /* NaN or Inf.  */
-        goto overflow;
-    }
-
-    /* Round and re-extract the exponent.  */
-    f = float64_round_to_int(f, fpst);
-    exp = extract64(f, 52, 11);
-
-    /* Validate the range of the result.  */
-    if (exp < 1022 + intsize) {
-        /* abs(F) <= INT{N}_MAX */
-        return f;
-    }
-    if (exp == 1022 + intsize) {
-        uint64_t sign = extract64(f, 63, 1);
-        uint64_t frac = extract64(f, 0, 52);
-        if (sign && frac == 0) {
-            /* F == INT{N}_MIN */
-            return f;
-        }
-    }
-
- overflow:
-    /*
-     * Raise Invalid and return INT{N}_MIN as a float.  Revert any
-     * inexact exception float64_round_to_int may have raised.
-     */
-    set_float_exception_flags(old_flags | float_flag_invalid, fpst);
-    return (uint64_t)(0x800 + 1022 + intsize) << 52;
-}
-
-float64 HELPER(frint32_d)(float64 f, float_status *fpst)
-{
-    return frint_d(f, fpst, 32);
-}
-
-float64 HELPER(frint64_d)(float64 f, float_status *fpst)
-{
-    return frint_d(f, fpst, 64);
-}
-
-void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
-{
-    uint32_t syndrome;
-
-    switch (reg) {
-    case ARM_VFP_MVFR0:
-    case ARM_VFP_MVFR1:
-    case ARM_VFP_MVFR2:
-        if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
-            return;
-        }
-        break;
-    case ARM_VFP_FPSID:
-        if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
-            return;
-        }
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
-                | ARM_EL_IL
-                | (1 << 24) | (0xe << 20) | (7 << 14)
-                | (reg << 10) | (rt << 5) | 1);
-
-    raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
-}
-
-#endif

From e34cfba5e8d7bd631398a09d658dee40b1aef085 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 21 Feb 2025 19:09:54 +0000
Subject: [PATCH 22/43] target/arm: Move FPSCR get/set helpers to
 tcg/vfp_helper.c

Currently the helper_vfp_get_fpscr() and helper_vfp_set_fpscr()
functions do the actual work of updating the FPSCR, and we have
wrappers vfp_get_fpscr() and vfp_set_fpscr() which we use for calls
from other QEMU C code.

Flip these around so that it is vfp_get_fpscr() and vfp_set_fpscr()
which do the actual work, and helper_vfp_get_fpscr() and
helper_vfp_set_fpscr() which are the wrappers; this allows us to move
them to tcg/vfp_helper.c.

Since this is the last HELPER() we had in arm/vfp_helper.c, we can
drop the include of helper-proto.h.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250221190957.811948-3-peter.maydell@linaro.org
---
 target/arm/tcg/vfp_helper.c | 10 ++++++++++
 target/arm/vfp_helper.c     | 15 ++-------------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index aa580ff64c..cd6e0d0eda 100644
--- a/target/arm/tcg/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -1128,3 +1128,13 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
 
     raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
 }
+
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+    return vfp_get_fpscr(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpscr(env, val);
+}
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 0e849d8d4d..0919acb7b8 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/helper-proto.h"
 #include "internals.h"
 #include "cpu-features.h"
 #include "fpu/softfloat.h"
@@ -298,17 +297,12 @@ uint32_t vfp_get_fpsr(CPUARMState *env)
     return fpsr;
 }
 
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+uint32_t vfp_get_fpscr(CPUARMState *env)
 {
     return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
         (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
 }
 
-uint32_t vfp_get_fpscr(CPUARMState *env)
-{
-    return HELPER(vfp_get_fpscr)(env);
-}
-
 void vfp_set_fpsr(CPUARMState *env, uint32_t val)
 {
     ARMCPU *cpu = env_archcpu(env);
@@ -402,13 +396,8 @@ void vfp_set_fpcr(CPUARMState *env, uint32_t val)
     vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
 }
 
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
 {
     vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
     vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
 }
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
-    HELPER(vfp_set_fpscr)(env, val);
-}

From b9d3dc45532e696f5ee566edd227a4f46bad0f35 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 21 Feb 2025 19:09:55 +0000
Subject: [PATCH 23/43] target/arm: Move softfloat specific FPCR/FPSR handling
 to tcg/

The softfloat (i.e. TCG) specific handling for the FPCR
and FPSR is abstracted behind five functions:
 arm_set_default_fp_behaviours
 arm_set_ah_fp_behaviours
 vfp_get_fpsr_from_host
 vfp_clear_float_status_exc_flags
 vfp_set_fpsr_to_host

Currently we rely on the first two calling softfloat functions that
work even in a KVM-only compile because they're defined as inline in
the softfloat header file, and we provide stub versions of the last
three in arm/vfp_helper.c if CONFIG_TCG isn't defined.

Move the softfloat-specific versions of these functions to
tcg/vfp_helper.c, and provide the non-TCG stub versions in
tcg-stubs.c.

This lets us drop the softfloat header include and the last
set of CONFIG_TCG ifdefs from arm/vfp_helper.c.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250221190957.811948-4-peter.maydell@linaro.org
---
 target/arm/internals.h      |   9 ++
 target/arm/tcg-stubs.c      |  22 ++++
 target/arm/tcg/vfp_helper.c | 228 +++++++++++++++++++++++++++++++++
 target/arm/vfp_helper.c     | 248 ------------------------------------
 4 files changed, 259 insertions(+), 248 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index b318734145..a6ff228f9f 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1833,5 +1833,14 @@ int alle1_tlbmask(CPUARMState *env);
 void arm_set_default_fp_behaviours(float_status *s);
 /* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
 void arm_set_ah_fp_behaviours(float_status *s);
+/* Read the float_status info and return the appropriate FPSR value */
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env);
+/* Clear the exception status flags from all float_status fields */
+void vfp_clear_float_status_exc_flags(CPUARMState *env);
+/*
+ * Update float_status fields to handle the bits of the FPCR
+ * specified by mask changing to the values in val.
+ */
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
 
 #endif
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
index f3f45d54f2..93a15cad61 100644
--- a/target/arm/tcg-stubs.c
+++ b/target/arm/tcg-stubs.c
@@ -30,3 +30,25 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
 void define_tlb_insn_regs(ARMCPU *cpu)
 {
 }
+
+/* With KVM, we never use float_status, so these can be no-ops */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+}
+
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+    return 0;
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+}
diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index cd6e0d0eda..b32e2f4e27 100644
--- a/target/arm/tcg/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -25,6 +25,234 @@
 #include "fpu/softfloat.h"
 #include "qemu/log.h"
 
+/*
+ * Set the float_status behaviour to match the Arm defaults:
+ *  * tininess-before-rounding
+ *  * 2-input NaN propagation prefers SNaN over QNaN, and then
+ *    operand A over operand B (see FPProcessNaNs() pseudocode)
+ *  * 3-input NaN propagation prefers SNaN over QNaN, and then
+ *    operand C over A over B (see FPProcessNaNs3() pseudocode,
+ *    but note that for QEMU muladd is a * b + c, whereas for
+ *    the pseudocode function the arguments are in the order c, a, b.
+ *  * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
+ *    and the input NaN if it is signalling
+ *  * Default NaN has sign bit clear, msb frac bit set
+ */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+    set_float_detect_tininess(float_tininess_before_rounding, s);
+    set_float_ftz_detection(float_ftz_before_rounding, s);
+    set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
+    set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
+    set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
+    set_float_default_nan_pattern(0b01000000, s);
+}
+
+/*
+ * Set the float_status behaviour to match the FEAT_AFP
+ * FPCR.AH=1 requirements:
+ *  * tininess-after-rounding
+ *  * 2-input NaN propagation prefers the first NaN
+ *  * 3-input NaN propagation prefers a over b over c
+ *  * 0 * Inf + NaN always returns the input NaN and doesn't
+ *    set Invalid for a QNaN
+ *  * default NaN has sign bit set, msb frac bit set
+ */
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+    set_float_detect_tininess(float_tininess_after_rounding, s);
+    set_float_ftz_detection(float_ftz_after_rounding, s);
+    set_float_2nan_prop_rule(float_2nan_prop_ab, s);
+    set_float_3nan_prop_rule(float_3nan_prop_abc, s);
+    set_float_infzeronan_rule(float_infzeronan_dnan_never |
+                              float_infzeronan_suppress_invalid, s);
+    set_float_default_nan_pattern(0b11000000, s);
+}
+
+/* Convert host exception flags to vfp form.  */
+static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
+{
+    uint32_t target_bits = 0;
+
+    if (host_bits & float_flag_invalid) {
+        target_bits |= FPSR_IOC;
+    }
+    if (host_bits & float_flag_divbyzero) {
+        target_bits |= FPSR_DZC;
+    }
+    if (host_bits & float_flag_overflow) {
+        target_bits |= FPSR_OFC;
+    }
+    if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) {
+        target_bits |= FPSR_UFC;
+    }
+    if (host_bits & float_flag_inexact) {
+        target_bits |= FPSR_IXC;
+    }
+    if (host_bits & float_flag_input_denormal_flushed) {
+        target_bits |= FPSR_IDC;
+    }
+    /*
+     * With FPCR.AH, IDC is set when an input denormal is used,
+     * and flushing an output denormal to zero sets both IXC and UFC.
+     */
+    if (ah && (host_bits & float_flag_input_denormal_used)) {
+        target_bits |= FPSR_IDC;
+    }
+    if (ah && (host_bits & float_flag_output_denormal_flushed)) {
+        target_bits |= FPSR_IXC;
+    }
+    return target_bits;
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+    uint32_t a32_flags = 0, a64_flags = 0;
+
+    a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
+    a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
+    /* FZ16 does not generate an input denormal exception.  */
+    a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
+          & ~float_flag_input_denormal_flushed);
+    a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
+          & ~float_flag_input_denormal_flushed);
+
+    a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
+    a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
+          & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
+    /*
+     * We do not merge in flags from FPST_AH or FPST_AH_F16, because
+     * they are used for insns that must not set the cumulative exception bits.
+     */
+
+    /*
+     * Flushing an input denormal *only* because FPCR.FIZ == 1 does
+     * not set FPSR.IDC; if FPCR.FZ is also set then this takes
+     * precedence and IDC is set (see the FPUnpackBase pseudocode).
+     * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
+     * We only do this for the a64 flags because FIZ has no effect
+     * on AArch32 even if it is set.
+     */
+    if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
+        a64_flags &= ~float_flag_input_denormal_flushed;
+    }
+    return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
+        vfp_exceptbits_from_host(a32_flags, false);
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+    /*
+     * Clear out all the exception-flag information in the float_status
+     * values. The caller should have arranged for env->vfp.fpsr to
+     * be the architecturally up-to-date exception flag information first.
+     */
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
+    set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
+}
+
+static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
+{
+    /*
+     * Synchronize any pending exception-flag information in the
+     * float_status values into env->vfp.fpsr, and then clear out
+     * the float_status data.
+     */
+    env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
+    vfp_clear_float_status_exc_flags(env);
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+    uint64_t changed = env->vfp.fpcr;
+
+    changed ^= val;
+    changed &= mask;
+    if (changed & (3 << 22)) {
+        int i = (val >> 22) & 3;
+        switch (i) {
+        case FPROUNDING_TIEEVEN:
+            i = float_round_nearest_even;
+            break;
+        case FPROUNDING_POSINF:
+            i = float_round_up;
+            break;
+        case FPROUNDING_NEGINF:
+            i = float_round_down;
+            break;
+        case FPROUNDING_ZERO:
+            i = float_round_to_zero;
+            break;
+        }
+        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
+        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
+        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
+        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
+    }
+    if (changed & FPCR_FZ16) {
+        bool ftz_enabled = val & FPCR_FZ16;
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
+    }
+    if (changed & FPCR_FZ) {
+        bool ftz_enabled = val & FPCR_FZ;
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
+        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
+        /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
+    }
+    if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
+        /*
+         * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
+         * both FPCR.AH = 0 and FPCR.FZ = 1.
+         */
+        bool fitz_enabled = (val & FPCR_FIZ) ||
+            (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
+        set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
+    }
+    if (changed & FPCR_DN) {
+        bool dnan_enabled = val & FPCR_DN;
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
+        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
+    }
+    if (changed & FPCR_AH) {
+        bool ah_enabled = val & FPCR_AH;
+
+        if (ah_enabled) {
+            /* Change behaviours for A64 FP operations */
+            arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
+            arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
+        } else {
+            arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
+            arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
+        }
+    }
+    /*
+     * If any bits changed that we look at in vfp_get_fpsr_from_host(),
+     * we must sync the float_status flags into vfp.fpsr now (under the
+     * old regime) before we update vfp.fpcr.
+     */
+    if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
+        vfp_sync_and_clear_float_status_exc_flags(env);
+    }
+}
+
 /*
  * VFP support.  We follow the convention used for VFP instructions:
  * Single precision routines have a "s" suffix, double precision a
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 0919acb7b8..cc0f055ef0 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -21,254 +21,6 @@
 #include "cpu.h"
 #include "internals.h"
 #include "cpu-features.h"
-#include "fpu/softfloat.h"
-
-/*
- * Set the float_status behaviour to match the Arm defaults:
- *  * tininess-before-rounding
- *  * 2-input NaN propagation prefers SNaN over QNaN, and then
- *    operand A over operand B (see FPProcessNaNs() pseudocode)
- *  * 3-input NaN propagation prefers SNaN over QNaN, and then
- *    operand C over A over B (see FPProcessNaNs3() pseudocode,
- *    but note that for QEMU muladd is a * b + c, whereas for
- *    the pseudocode function the arguments are in the order c, a, b.
- *  * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
- *    and the input NaN if it is signalling
- *  * Default NaN has sign bit clear, msb frac bit set
- */
-void arm_set_default_fp_behaviours(float_status *s)
-{
-    set_float_detect_tininess(float_tininess_before_rounding, s);
-    set_float_ftz_detection(float_ftz_before_rounding, s);
-    set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
-    set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
-    set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
-    set_float_default_nan_pattern(0b01000000, s);
-}
-
-/*
- * Set the float_status behaviour to match the FEAT_AFP
- * FPCR.AH=1 requirements:
- *  * tininess-after-rounding
- *  * 2-input NaN propagation prefers the first NaN
- *  * 3-input NaN propagation prefers a over b over c
- *  * 0 * Inf + NaN always returns the input NaN and doesn't
- *    set Invalid for a QNaN
- *  * default NaN has sign bit set, msb frac bit set
- */
-void arm_set_ah_fp_behaviours(float_status *s)
-{
-    set_float_detect_tininess(float_tininess_after_rounding, s);
-    set_float_ftz_detection(float_ftz_after_rounding, s);
-    set_float_2nan_prop_rule(float_2nan_prop_ab, s);
-    set_float_3nan_prop_rule(float_3nan_prop_abc, s);
-    set_float_infzeronan_rule(float_infzeronan_dnan_never |
-                              float_infzeronan_suppress_invalid, s);
-    set_float_default_nan_pattern(0b11000000, s);
-}
-
-#ifdef CONFIG_TCG
-
-/* Convert host exception flags to vfp form.  */
-static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
-{
-    uint32_t target_bits = 0;
-
-    if (host_bits & float_flag_invalid) {
-        target_bits |= FPSR_IOC;
-    }
-    if (host_bits & float_flag_divbyzero) {
-        target_bits |= FPSR_DZC;
-    }
-    if (host_bits & float_flag_overflow) {
-        target_bits |= FPSR_OFC;
-    }
-    if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) {
-        target_bits |= FPSR_UFC;
-    }
-    if (host_bits & float_flag_inexact) {
-        target_bits |= FPSR_IXC;
-    }
-    if (host_bits & float_flag_input_denormal_flushed) {
-        target_bits |= FPSR_IDC;
-    }
-    /*
-     * With FPCR.AH, IDC is set when an input denormal is used,
-     * and flushing an output denormal to zero sets both IXC and UFC.
-     */
-    if (ah && (host_bits & float_flag_input_denormal_used)) {
-        target_bits |= FPSR_IDC;
-    }
-    if (ah && (host_bits & float_flag_output_denormal_flushed)) {
-        target_bits |= FPSR_IXC;
-    }
-    return target_bits;
-}
-
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
-{
-    uint32_t a32_flags = 0, a64_flags = 0;
-
-    a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
-    a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
-    /* FZ16 does not generate an input denormal exception.  */
-    a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
-          & ~float_flag_input_denormal_flushed);
-    a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
-          & ~float_flag_input_denormal_flushed);
-
-    a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
-    a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
-          & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
-    /*
-     * We do not merge in flags from FPST_AH or FPST_AH_F16, because
-     * they are used for insns that must not set the cumulative exception bits.
-     */
-
-    /*
-     * Flushing an input denormal *only* because FPCR.FIZ == 1 does
-     * not set FPSR.IDC; if FPCR.FZ is also set then this takes
-     * precedence and IDC is set (see the FPUnpackBase pseudocode).
-     * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
-     * We only do this for the a64 flags because FIZ has no effect
-     * on AArch32 even if it is set.
-     */
-    if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
-        a64_flags &= ~float_flag_input_denormal_flushed;
-    }
-    return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
-        vfp_exceptbits_from_host(a32_flags, false);
-}
-
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
-{
-    /*
-     * Clear out all the exception-flag information in the float_status
-     * values. The caller should have arranged for env->vfp.fpsr to
-     * be the architecturally up-to-date exception flag information first.
-     */
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
-    set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
-}
-
-static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
-{
-    /*
-     * Synchronize any pending exception-flag information in the
-     * float_status values into env->vfp.fpsr, and then clear out
-     * the float_status data.
-     */
-    env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
-    vfp_clear_float_status_exc_flags(env);
-}
-
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-    uint64_t changed = env->vfp.fpcr;
-
-    changed ^= val;
-    changed &= mask;
-    if (changed & (3 << 22)) {
-        int i = (val >> 22) & 3;
-        switch (i) {
-        case FPROUNDING_TIEEVEN:
-            i = float_round_nearest_even;
-            break;
-        case FPROUNDING_POSINF:
-            i = float_round_up;
-            break;
-        case FPROUNDING_NEGINF:
-            i = float_round_down;
-            break;
-        case FPROUNDING_ZERO:
-            i = float_round_to_zero;
-            break;
-        }
-        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
-        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
-        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
-        set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
-    }
-    if (changed & FPCR_FZ16) {
-        bool ftz_enabled = val & FPCR_FZ16;
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
-    }
-    if (changed & FPCR_FZ) {
-        bool ftz_enabled = val & FPCR_FZ;
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
-        /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
-    }
-    if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
-        /*
-         * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
-         * both FPCR.AH = 0 and FPCR.FZ = 1.
-         */
-        bool fitz_enabled = (val & FPCR_FIZ) ||
-            (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
-        set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
-    }
-    if (changed & FPCR_DN) {
-        bool dnan_enabled = val & FPCR_DN;
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
-    }
-    if (changed & FPCR_AH) {
-        bool ah_enabled = val & FPCR_AH;
-
-        if (ah_enabled) {
-            /* Change behaviours for A64 FP operations */
-            arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
-            arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
-        } else {
-            arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
-            arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
-        }
-    }
-    /*
-     * If any bits changed that we look at in vfp_get_fpsr_from_host(),
-     * we must sync the float_status flags into vfp.fpsr now (under the
-     * old regime) before we update vfp.fpcr.
-     */
-    if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
-        vfp_sync_and_clear_float_status_exc_flags(env);
-    }
-}
-
-#else
-
-static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
-{
-    return 0;
-}
-
-static void vfp_clear_float_status_exc_flags(CPUARMState *env)
-{
-}
-
-static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
-{
-}
-
-#endif
 
 uint32_t vfp_get_fpcr(CPUARMState *env)
 {

From cb8bb8472ef83d8c8c6beac37d6db47ab3b68e18 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 21 Feb 2025 19:09:56 +0000
Subject: [PATCH 24/43] target/arm: Rename vfp_helper.c to vfp_fpscr.c

The vfp_helper.c in the target/arm directory now only has
code for handling FPSCR/FPCR/FPSR in it, and no helper
functions. Rename it to vfp_fpscr.c; this helps keep it
distinct from tcg/vfp_helper.c.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250221190957.811948-5-peter.maydell@linaro.org
---
 target/arm/meson.build                   | 2 +-
 target/arm/{vfp_helper.c => vfp_fpscr.c} | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename target/arm/{vfp_helper.c => vfp_fpscr.c} (98%)

diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464dbb..3065081d24 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -4,7 +4,7 @@ arm_ss.add(files(
   'debug_helper.c',
   'gdbstub.c',
   'helper.c',
-  'vfp_helper.c',
+  'vfp_fpscr.c',
 ))
 arm_ss.add(zlib)
 
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_fpscr.c
similarity index 98%
rename from target/arm/vfp_helper.c
rename to target/arm/vfp_fpscr.c
index cc0f055ef0..92ea60ebbf 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_fpscr.c
@@ -1,5 +1,5 @@
 /*
- * ARM VFP floating-point operations
+ * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *

From fd207677a83087454b8afef31651985a1df0d2dd Mon Sep 17 00:00:00 2001
From: Joelle van Dyne <j@getutm.app>
Date: Mon, 24 Feb 2025 08:57:34 -0800
Subject: [PATCH 25/43] target/arm/hvf: Disable SME feature

macOS 15.2's Hypervisor.framework exposes SME feature on M4 Macs.
However, QEMU's hvf accelerator code does not properly support it
yet, causing QEMU to fail to start when hvf accelerator is used on
these systems, with the error message:

  qemu-aarch64-softmmu: cannot disable sme4224
  All SME vector lengths are disabled.
  With SME enabled, at least one vector length must be enabled.

Ideally we would have SME support on these hosts; however, until that
point, we must suppress the SME feature in the ID registers, so that
users can at least run non-SME guests.

Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2665
Signed-off-by: Joelle van Dyne <j@getutm.app>
Message-id: 20250224165735.36792-1-j@getutm.app
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: expanded commit message, comment]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/hvf/hvf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 0afd96018e..872a25be86 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -899,6 +899,18 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 
     clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar.id_aa64mmfr0);
 
+    /*
+     * Disable SME, which is not properly handled by QEMU hvf yet.
+     * To allow this through we would need to:
+     * - make sure that the SME state is correctly handled in the
+     *   get_registers/put_registers functions
+     * - get the SME-specific CPU properties to work with accelerators
+     *   other than TCG
+     * - fix any assumptions we made that SME implies SVE (since
+     *   on the M4 there is SME but not SVE)
+     */
+    host_isar.id_aa64pfr1 &= ~R_ID_AA64PFR1_SME_MASK;
+
     ahcf->isar = host_isar;
 
     /*

From 12c365315ab25d364cff24dfeea8d7ff1e752b9f Mon Sep 17 00:00:00 2001
From: Joelle van Dyne <j@getutm.app>
Date: Mon, 24 Feb 2025 10:41:23 -0800
Subject: [PATCH 26/43] target/arm/hvf: sign extend the data for a load
 operation when SSE=1

In the syndrome value for a data abort, bit 21 is SSE, which is
set to indicate that the abort was on a sign-extending load. When
we handle the data abort from the guest via address_space_read(),
we forgot to handle this and so would return the wrong value if
the guest did a sign-extending load to an MMIO region. Add the
sign-extension of the returned data.

Cc: qemu-stable@nongnu.org
Signed-off-by: Joelle van Dyne <j@getutm.app>
Message-id: 20250224184123.50780-1-j@getutm.app
[PMM: Drop an unnecessary check on 'len'; expand commit message]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/hvf/hvf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 872a25be86..2439af63a0 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -1983,6 +1983,7 @@ int hvf_vcpu_exec(CPUState *cpu)
         bool isv = syndrome & ARM_EL_ISV;
         bool iswrite = (syndrome >> 6) & 1;
         bool s1ptw = (syndrome >> 7) & 1;
+        bool sse = (syndrome >> 21) & 1;
         uint32_t sas = (syndrome >> 22) & 3;
         uint32_t len = 1 << sas;
         uint32_t srt = (syndrome >> 16) & 0x1f;
@@ -2010,6 +2011,9 @@ int hvf_vcpu_exec(CPUState *cpu)
             address_space_read(&address_space_memory,
                                hvf_exit->exception.physical_address,
                                MEMTXATTRS_UNSPECIFIED, &val, len);
+            if (sse) {
+                val = sextract64(val, 0, len * 8);
+            }
             hvf_set_reg(cpu, srt, val);
         }
 

From 2cac20cbf7b01e9a4e404db2ff9bee09ee26f315 Mon Sep 17 00:00:00 2001
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Date: Mon, 24 Feb 2025 12:50:53 -0800
Subject: [PATCH 27/43] hw/misc/npcm_clk: fix buffer-overflow

Regression introduced by cf76c4
(hw/misc: Add nr_regs and cold_reset_values to NPCM CLK)

cold_reset_values has a different size, depending on device used
(NPCM7xx vs NPCM8xx). However, s->regs has a fixed size, which matches
NPCM8xx. Thus, when initializing a NPCM7xx, we go past cold_reset_values
ending.

Report by asan:
==2066==ERROR: AddressSanitizer: global-buffer-overflow on address 0x55d68a3e97f0 at pc 0x7fcaf2b2d14b bp 0x7ffff0cc3890 sp 0x7ffff0cc3040
READ of size 196 at 0x55d68a3e97f0 thread T0
    #0 0x7fcaf2b2d14a in __interceptor_memcpy ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:827
    #1 0x55d688447e0d in memcpy /usr/include/x86_64-linux-gnu/bits/string_fortified.h:29
    #2 0x55d688447e0d in npcm_clk_enter_reset ../hw/misc/npcm_clk.c:968
    #3 0x55d6899b7213 in resettable_phase_enter ../hw/core/resettable.c:136
    #4 0x55d6899a1ef7 in bus_reset_child_foreach ../hw/core/bus.c:97
    #5 0x55d6899b717d in resettable_child_foreach ../hw/core/resettable.c:92
    #6 0x55d6899b717d in resettable_phase_enter ../hw/core/resettable.c:129
    #7 0x55d6899b4ead in resettable_container_child_foreach ../hw/core/resetcontainer.c:54
    #8 0x55d6899b717d in resettable_child_foreach ../hw/core/resettable.c:92
    #9 0x55d6899b717d in resettable_phase_enter ../hw/core/resettable.c:129
    #10 0x55d6899b7bfa in resettable_assert_reset ../hw/core/resettable.c:55
    #11 0x55d6899b8666 in resettable_reset ../hw/core/resettable.c:45
    #12 0x55d688d15cd2 in qemu_system_reset ../system/runstate.c:527
    #13 0x55d687fc5edd in qdev_machine_creation_done ../hw/core/machine.c:1738
    #14 0x55d688d209bd in qemu_machine_creation_done ../system/vl.c:2779
    #15 0x55d688d209bd in qmp_x_exit_preconfig ../system/vl.c:2807
    #16 0x55d688d281fb in qemu_init ../system/vl.c:3838
    #17 0x55d687ceab12 in main ../system/main.c:68
    #18 0x7fcaef006249  (/lib/x86_64-linux-gnu/libc.so.6+0x27249)
    #19 0x7fcaef006304 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27304)
    #20 0x55d687cf0010 in _start (/home/runner/work/qemu-ci/qemu-ci/build/qemu-system-arm+0x371c010)

0x55d68a3e97f0 is located 0 bytes to the right of global variable 'npcm7xx_cold_reset_values' defined in '../hw/misc/npcm_clk.c:134:23' (0x55d68a3e9780) of size 112

Impacted tests:
Summary of Failures:

check:
  2/747 qemu:qtest+qtest-aarch64 / qtest-aarch64/qom-test                         ERROR             9.28s   killed by signal 6 SIGABRT
  4/747 qemu:qtest+qtest-arm / qtest-arm/qom-test                                 ERROR             7.82s   killed by signal 6 SIGABRT
 32/747 qemu:qtest+qtest-aarch64 / qtest-aarch64/device-introspect-test           ERROR            10.91s   killed by signal 6 SIGABRT
 35/747 qemu:qtest+qtest-arm / qtest-arm/device-introspect-test                   ERROR            11.33s   killed by signal 6 SIGABRT
114/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_pwm-test                         ERROR             0.98s   killed by signal 6 SIGABRT
115/747 qemu:qtest+qtest-aarch64 / qtest-aarch64/test-hmp                         ERROR             2.95s   killed by signal 6 SIGABRT
117/747 qemu:qtest+qtest-arm / qtest-arm/test-hmp                                 ERROR             2.54s   killed by signal 6 SIGABRT
151/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_watchdog_timer-test              ERROR             0.96s   killed by signal 6 SIGABRT
247/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_adc-test                         ERROR             0.96s   killed by signal 6 SIGABRT
248/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_gpio-test                        ERROR             1.05s   killed by signal 6 SIGABRT
249/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_rng-test                         ERROR             0.97s   killed by signal 6 SIGABRT
250/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_sdhci-test                       ERROR             0.97s   killed by signal 6 SIGABRT
251/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_smbus-test                       ERROR             0.89s   killed by signal 6 SIGABRT
252/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_timer-test                       ERROR             1.09s   killed by signal 6 SIGABRT
253/747 qemu:qtest+qtest-arm / qtest-arm/npcm_gmac-test                           ERROR             1.12s   killed by signal 6 SIGABRT
255/747 qemu:qtest+qtest-arm / qtest-arm/npcm7xx_emc-test                         ERROR             1.05s   killed by signal 6 SIGABRT

check-functional:
 22/203 qemu:func-thorough+func-arm-thorough+thorough / func-arm-arm_quanta_gsj                      ERROR             0.79s   exit status 1
 38/203 qemu:func-quick+func-aarch64 / func-aarch64-migration                                        ERROR             1.97s   exit status 1
 45/203 qemu:func-quick+func-arm / func-arm-migration                                                ERROR             1.90s   exit status 1

Fixes: cf76c4e174e1 ("hw/misc: Add nr_regs and cold_reset_values to NPCM CLK")
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Reviewed-by: Hao Wu <wuhaotsh@google.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/misc/npcm_clk.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/misc/npcm_clk.c b/hw/misc/npcm_clk.c
index d1f29759d5..0e85974cf9 100644
--- a/hw/misc/npcm_clk.c
+++ b/hw/misc/npcm_clk.c
@@ -964,8 +964,9 @@ static void npcm_clk_enter_reset(Object *obj, ResetType type)
     NPCMCLKState *s = NPCM_CLK(obj);
     NPCMCLKClass *c = NPCM_CLK_GET_CLASS(s);
 
-    g_assert(sizeof(s->regs) >= c->nr_regs * sizeof(uint32_t));
-    memcpy(s->regs, c->cold_reset_values, sizeof(s->regs));
+    size_t sizeof_regs = c->nr_regs * sizeof(uint32_t);
+    g_assert(sizeof(s->regs) >= sizeof_regs);
+    memcpy(s->regs, c->cold_reset_values, sizeof_regs);
     s->ref_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     npcm7xx_clk_update_all_clocks(s);
     /*

From b513766ee968dbfca31034b185f0a0fcf99f4269 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:51 +0100
Subject: [PATCH 28/43] hw/usb/hcd-dwc3: Align global registers size with Linux

While at it add missing GUSB2RHBCTL register as found in i.MX 8M Plus reference
manual.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-2-shentey@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/usb/hcd-dwc3.c         | 5 +++++
 include/hw/usb/hcd-dwc3.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c
index 9ce9ba0b04..0bceee2712 100644
--- a/hw/usb/hcd-dwc3.c
+++ b/hw/usb/hcd-dwc3.c
@@ -343,6 +343,8 @@ REG32(GFLADJ, 0x530)
     FIELD(GFLADJ, GFLADJ_REFCLK_FLADJ, 8, 14)
     FIELD(GFLADJ, GFLADJ_30MHZ_SDBND_SEL, 7, 1)
     FIELD(GFLADJ, GFLADJ_30MHZ, 0, 6)
+REG32(GUSB2RHBCTL, 0x540)
+    FIELD(GUSB2RHBCTL, OVRD_L1TIMEOUT, 0, 4)
 
 #define DWC3_GLOBAL_OFFSET 0xC100
 static void reset_csr(USBDWC3 * s)
@@ -560,6 +562,9 @@ static const RegisterAccessInfo usb_dwc3_regs_info[] = {
         .rsvd = 0x40,
         .ro = 0x400040,
         .unimp = 0xffffffff,
+    },{ .name = "GUSB2RHBCTL",  .addr = A_GUSB2RHBCTL,
+        .rsvd = 0xfffffff0,
+        .unimp = 0xffffffff,
     }
 };
 
diff --git a/include/hw/usb/hcd-dwc3.h b/include/hw/usb/hcd-dwc3.h
index f752a27e94..dbdf12b21d 100644
--- a/include/hw/usb/hcd-dwc3.h
+++ b/include/hw/usb/hcd-dwc3.h
@@ -35,7 +35,7 @@
 #define USB_DWC3(obj) \
      OBJECT_CHECK(USBDWC3, (obj), TYPE_USB_DWC3)
 
-#define USB_DWC3_R_MAX ((0x530 / 4) + 1)
+#define USB_DWC3_R_MAX (0x600 / 4)
 #define DWC3_SIZE 0x10000
 
 typedef struct USBDWC3 {

From faa2150a527b1919646316dba268b71ced8762a6 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:52 +0100
Subject: [PATCH 29/43] hw/pci-host/designware: Prevent device attachment on
 internal PCIe root bus

On the real device, the PCIe root bus is only connected to a PCIe bridge and
does not allow for direct attachment of devices. Doing so in QEMU results in no
PCI devices being detected by Linux. Instead, PCI devices should plug into the
secondary PCIe bus spawned by the internal PCIe bridge.

Unfortunately, QEMU defaults to plugging devices into the PCIe root bus. To work
around this, every PCI device created on the command line needs an extra
`bus=dw-pcie` option which is error prone. Fix that by marking the PCIe root bus
as full which makes QEMU decend into the child PCIe bus.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-3-shentey@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/pci-host/designware.c         | 18 +++++++++++++++++-
 include/hw/pci-host/designware.h |  7 +++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 3e8c36e6a7..c07740bfaa 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -55,6 +55,17 @@
 #define DESIGNWARE_PCIE_ATU_DEVFN(x)               (((x) >> 16) & 0xff)
 #define DESIGNWARE_PCIE_ATU_UPPER_TARGET           0x91C
 
+static void designware_pcie_root_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    /*
+     * Designware has only a single root complex. Enforce the limit on the
+     * parent bus
+     */
+    k->max_dev = 1;
+}
+
 static DesignwarePCIEHost *
 designware_pcie_root_to_host(DesignwarePCIERoot *root)
 {
@@ -699,7 +710,7 @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
                                      &s->pci.memory,
                                      &s->pci.io,
                                      0, 4,
-                                     TYPE_PCIE_BUS);
+                                     TYPE_DESIGNWARE_PCIE_ROOT_BUS);
     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
 
     memory_region_init(&s->pci.address_space_root,
@@ -754,6 +765,11 @@ static void designware_pcie_host_init(Object *obj)
 
 static const TypeInfo designware_pcie_types[] = {
     {
+        .name           = TYPE_DESIGNWARE_PCIE_ROOT_BUS,
+        .parent         = TYPE_PCIE_BUS,
+        .instance_size  = sizeof(DesignwarePCIERootBus),
+        .class_init     = designware_pcie_root_bus_class_init,
+    }, {
         .name           = TYPE_DESIGNWARE_PCIE_HOST,
         .parent         = TYPE_PCI_HOST_BRIDGE,
         .instance_size  = sizeof(DesignwarePCIEHost),
diff --git a/include/hw/pci-host/designware.h b/include/hw/pci-host/designware.h
index bf8b278978..a35a3bd06c 100644
--- a/include/hw/pci-host/designware.h
+++ b/include/hw/pci-host/designware.h
@@ -25,12 +25,19 @@
 #include "hw/pci/pci_bridge.h"
 #include "qom/object.h"
 
+#define TYPE_DESIGNWARE_PCIE_ROOT_BUS "designware-pcie-root-BUS"
+OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIERootBus, DESIGNWARE_PCIE_ROOT_BUS)
+
 #define TYPE_DESIGNWARE_PCIE_HOST "designware-pcie-host"
 OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIEHost, DESIGNWARE_PCIE_HOST)
 
 #define TYPE_DESIGNWARE_PCIE_ROOT "designware-pcie-root"
 OBJECT_DECLARE_SIMPLE_TYPE(DesignwarePCIERoot, DESIGNWARE_PCIE_ROOT)
 
+struct DesignwarePCIERootBus {
+    PCIBus parent;
+};
+
 typedef struct DesignwarePCIEViewport {
     DesignwarePCIERoot *root;
 

From 0f520f0a9d9516fb3563a9b69c820ac73d2017aa Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:53 +0100
Subject: [PATCH 30/43] hw/gpio/pca955*: Move Kconfig switches next to
 implementations

The move of the Kconfig bits to hw/gpio is fixing a bug in 6328d8ffa6cb9d
("misc/pca955*: Move models under hw/gpio"), which moved the code but forgot to
move the Kconfig sections.

Fixes: 6328d8ffa6cb9d "misc/pca955*: Move models under hw/gpio"
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-4-shentey@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/gpio/Kconfig | 8 ++++++++
 hw/misc/Kconfig | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/hw/gpio/Kconfig b/hw/gpio/Kconfig
index c423e10f59..a209294c20 100644
--- a/hw/gpio/Kconfig
+++ b/hw/gpio/Kconfig
@@ -16,6 +16,14 @@ config SIFIVE_GPIO
 config STM32L4X5_GPIO
     bool
 
+config PCA9552
+    bool
+    depends on I2C
+
+config PCA9554
+    bool
+    depends on I2C
+
 config PCF8574
     bool
     depends on I2C
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 8f9ce2f68c..4271e2f4ac 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -30,14 +30,6 @@ config EDU
     default y if TEST_DEVICES
     depends on PCI && MSI_NONBROKEN
 
-config PCA9552
-    bool
-    depends on I2C
-
-config PCA9554
-    bool
-    depends on I2C
-
 config I2C_ECHO
     bool
     default y if TEST_DEVICES

From a4eefc69b23713c4e5981d9d91a6e15dfd4496fe Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:54 +0100
Subject: [PATCH 31/43] hw/arm: Add i.MX 8M Plus EVK board

As a first step, implement the bare minimum: CPUs, RAM, interrupt controller,
serial. All other devices of the A53 memory map are represented as
TYPE_UNIMPLEMENTED_DEVICE, i.e. the whole memory map is provided. This allows
for running Linux without it crashing due to invalid memory accesses.

Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-5-shentey@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: drop 'static const' from serial_table[] definition to avoid
 compile failure on GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 MAINTAINERS                    |   9 +
 docs/system/arm/imx8mp-evk.rst |  54 +++++
 docs/system/target-arm.rst     |   1 +
 hw/arm/Kconfig                 |  12 ++
 hw/arm/fsl-imx8mp.c            | 367 +++++++++++++++++++++++++++++++++
 hw/arm/imx8mp-evk.c            |  55 +++++
 hw/arm/meson.build             |   2 +
 include/hw/arm/fsl-imx8mp.h    | 189 +++++++++++++++++
 8 files changed, 689 insertions(+)
 create mode 100644 docs/system/arm/imx8mp-evk.rst
 create mode 100644 hw/arm/fsl-imx8mp.c
 create mode 100644 hw/arm/imx8mp-evk.c
 create mode 100644 include/hw/arm/fsl-imx8mp.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 1911949526..374fe98724 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -820,6 +820,15 @@ F: hw/pci-host/designware.c
 F: include/hw/pci-host/designware.h
 F: docs/system/arm/mcimx7d-sabre.rst
 
+MCIMX8MP-EVK / i.MX8MP
+M: Bernhard Beschow <shentey@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/imx8mp-evk.c
+F: hw/arm/fsl-imx8mp.c
+F: include/hw/arm/fsl-imx8mp.h
+F: docs/system/arm/imx8mp-evk.rst
+
 MPS2 / MPS3
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
new file mode 100644
index 0000000000..b23fdcc743
--- /dev/null
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -0,0 +1,54 @@
+NXP i.MX 8M Plus Evaluation Kit (``imx8mp-evk``)
+================================================
+
+The ``imx8mp-evk`` machine models the i.MX 8M Plus Evaluation Kit, based on an
+i.MX 8M Plus SoC.
+
+Supported devices
+-----------------
+
+The ``imx8mp-evk`` machine implements the following devices:
+
+ * Up to 4 Cortex-A53 cores
+ * Generic Interrupt Controller (GICv3)
+ * 4 UARTs
+
+Boot options
+------------
+
+The ``imx8mp-evk`` machine can start a Linux kernel directly using the standard
+``-kernel`` functionality.
+
+Direct Linux Kernel Boot
+''''''''''''''''''''''''
+
+Probably the easiest way to get started with a whole Linux system on the machine
+is to generate an image with Buildroot. Version 2024.11.1 is tested at the time
+of writing and involves two steps. First run the following commands in the
+toplevel directory of the Buildroot source tree:
+
+.. code-block:: bash
+
+  $ echo "BR2_TARGET_ROOTFS_CPIO=y" >> configs/freescale_imx8mpevk_defconfig
+  $ make freescale_imx8mpevk_defconfig
+  $ make
+
+Once finished successfully there is an ``output/image`` subfolder. Navigate into
+it and patch the device tree with the following commands which will remove the
+``cpu-idle-states`` properties from CPU nodes:
+
+.. code-block:: bash
+
+  $ dtc imx8mp-evk.dtb | sed '/cpu-idle-states/d' > imx8mp-evk-patched.dts
+  $ dtc imx8mp-evk-patched.dts -o imx8mp-evk-patched.dtb
+
+Now that everything is prepared the machine can be started as follows:
+
+.. code-block:: bash
+
+  $ qemu-system-aarch64 -M imx8mp-evk -smp 4 -m 3G \
+      -display none -serial null -serial stdio \
+      -kernel Image \
+      -dtb imx8mp-evk-patched.dtb \
+      -initrd rootfs.cpio \
+      -append "root=/dev/ram"
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 9aaa9c414c..a43ec8f10e 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -95,6 +95,7 @@ Board-specific documentation
    arm/imx25-pdk
    arm/mcimx6ul-evk
    arm/mcimx7d-sabre
+   arm/imx8mp-evk
    arm/orangepi
    arm/raspi
    arm/collie
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 504841ccab..0a7de40861 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -593,6 +593,18 @@ config FSL_IMX7
     select UNIMP
     select USB_CHIPIDEA
 
+config FSL_IMX8MP
+    bool
+    select ARM_GIC
+    select IMX
+    select UNIMP
+
+config FSL_IMX8MP_EVK
+    bool
+    default y
+    depends on TCG && AARCH64
+    select FSL_IMX8MP
+
 config ARM_SMMUV3
     bool
 
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
new file mode 100644
index 0000000000..084b1d3bb1
--- /dev/null
+++ b/hw/arm/fsl-imx8mp.c
@@ -0,0 +1,367 @@
+/*
+ * i.MX 8M Plus SoC Implementation
+ *
+ * Based on hw/arm/fsl-imx6.c
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/bsa.h"
+#include "hw/arm/fsl-imx8mp.h"
+#include "hw/intc/arm_gicv3.h"
+#include "hw/misc/unimp.h"
+#include "hw/boards.h"
+#include "system/system.h"
+#include "target/arm/cpu-qom.h"
+#include "qapi/error.h"
+#include "qobject/qlist.h"
+
+static const struct {
+    hwaddr addr;
+    size_t size;
+    const char *name;
+} fsl_imx8mp_memmap[] = {
+    [FSL_IMX8MP_RAM] = { FSL_IMX8MP_RAM_START, FSL_IMX8MP_RAM_SIZE_MAX, "ram" },
+    [FSL_IMX8MP_DDR_PHY_BROADCAST] = { 0x3dc00000, 4 * MiB, "ddr_phy_broadcast" },
+    [FSL_IMX8MP_DDR_PERF_MON] = { 0x3d800000, 4 * MiB, "ddr_perf_mon" },
+    [FSL_IMX8MP_DDR_CTL] = { 0x3d400000, 4 * MiB, "ddr_ctl" },
+    [FSL_IMX8MP_DDR_BLK_CTRL] = { 0x3d000000, 1 * MiB, "ddr_blk_ctrl" },
+    [FSL_IMX8MP_DDR_PHY] = { 0x3c000000, 16 * MiB, "ddr_phy" },
+    [FSL_IMX8MP_AUDIO_DSP] = { 0x3b000000, 16 * MiB, "audio_dsp" },
+    [FSL_IMX8MP_GIC_DIST] = { 0x38800000, 512 * KiB, "gic_dist" },
+    [FSL_IMX8MP_GIC_REDIST] = { 0x38880000, 512 * KiB, "gic_redist" },
+    [FSL_IMX8MP_NPU] = { 0x38500000, 2 * MiB, "npu" },
+    [FSL_IMX8MP_VPU] = { 0x38340000, 2 * MiB, "vpu" },
+    [FSL_IMX8MP_VPU_BLK_CTRL] = { 0x38330000, 2 * MiB, "vpu_blk_ctrl" },
+    [FSL_IMX8MP_VPU_VC8000E_ENCODER] = { 0x38320000, 2 * MiB, "vpu_vc8000e_encoder" },
+    [FSL_IMX8MP_VPU_G2_DECODER] = { 0x38310000, 2 * MiB, "vpu_g2_decoder" },
+    [FSL_IMX8MP_VPU_G1_DECODER] = { 0x38300000, 2 * MiB, "vpu_g1_decoder" },
+    [FSL_IMX8MP_USB2] = { 0x38200000, 1 * MiB, "usb2" },
+    [FSL_IMX8MP_USB1] = { 0x38100000, 1 * MiB, "usb1" },
+    [FSL_IMX8MP_GPU2D] = { 0x38008000, 32 * KiB, "gpu2d" },
+    [FSL_IMX8MP_GPU3D] = { 0x38000000, 32 * KiB, "gpu3d" },
+    [FSL_IMX8MP_QSPI1_RX_BUFFER] = { 0x34000000, 32 * MiB, "qspi1_rx_buffer" },
+    [FSL_IMX8MP_PCIE1] = { 0x33800000, 4 * MiB, "pcie1" },
+    [FSL_IMX8MP_QSPI1_TX_BUFFER] = { 0x33008000, 32 * KiB, "qspi1_tx_buffer" },
+    [FSL_IMX8MP_APBH_DMA] = { 0x33000000, 32 * KiB, "apbh_dma" },
+
+    /* AIPS-5 Begin */
+    [FSL_IMX8MP_MU_3_B] = { 0x30e90000, 64 * KiB, "mu_3_b" },
+    [FSL_IMX8MP_MU_3_A] = { 0x30e80000, 64 * KiB, "mu_3_a" },
+    [FSL_IMX8MP_MU_2_B] = { 0x30e70000, 64 * KiB, "mu_2_b" },
+    [FSL_IMX8MP_MU_2_A] = { 0x30e60000, 64 * KiB, "mu_2_a" },
+    [FSL_IMX8MP_EDMA_CHANNELS] = { 0x30e40000, 128 * KiB, "edma_channels" },
+    [FSL_IMX8MP_EDMA_MANAGEMENT_PAGE] = { 0x30e30000, 64 * KiB, "edma_management_page" },
+    [FSL_IMX8MP_AUDIO_BLK_CTRL] = { 0x30e20000, 64 * KiB, "audio_blk_ctrl" },
+    [FSL_IMX8MP_SDMA2] = { 0x30e10000, 64 * KiB, "sdma2" },
+    [FSL_IMX8MP_SDMA3] = { 0x30e00000, 64 * KiB, "sdma3" },
+    [FSL_IMX8MP_AIPS5_CONFIGURATION] = { 0x30df0000, 64 * KiB, "aips5_configuration" },
+    [FSL_IMX8MP_SPBA2] = { 0x30cf0000, 64 * KiB, "spba2" },
+    [FSL_IMX8MP_AUDIO_XCVR_RX] = { 0x30cc0000, 64 * KiB, "audio_xcvr_rx" },
+    [FSL_IMX8MP_HDMI_TX_AUDLNK_MSTR] = { 0x30cb0000, 64 * KiB, "hdmi_tx_audlnk_mstr" },
+    [FSL_IMX8MP_PDM] = { 0x30ca0000, 64 * KiB, "pdm" },
+    [FSL_IMX8MP_ASRC] = { 0x30c90000, 64 * KiB, "asrc" },
+    [FSL_IMX8MP_SAI7] = { 0x30c80000, 64 * KiB, "sai7" },
+    [FSL_IMX8MP_SAI6] = { 0x30c60000, 64 * KiB, "sai6" },
+    [FSL_IMX8MP_SAI5] = { 0x30c50000, 64 * KiB, "sai5" },
+    [FSL_IMX8MP_SAI3] = { 0x30c30000, 64 * KiB, "sai3" },
+    [FSL_IMX8MP_SAI2] = { 0x30c20000, 64 * KiB, "sai2" },
+    [FSL_IMX8MP_SAI1] = { 0x30c10000, 64 * KiB, "sai1" },
+    /* AIPS-5 End */
+
+    /* AIPS-4 Begin */
+    [FSL_IMX8MP_HDMI_TX] = { 0x32fc0000, 128 * KiB, "hdmi_tx" },
+    [FSL_IMX8MP_TZASC] = { 0x32f80000, 64 * KiB, "tzasc" },
+    [FSL_IMX8MP_HSIO_BLK_CTL] = { 0x32f10000, 64 * KiB, "hsio_blk_ctl" },
+    [FSL_IMX8MP_PCIE_PHY1] = { 0x32f00000, 64 * KiB, "pcie_phy1" },
+    [FSL_IMX8MP_MEDIA_BLK_CTL] = { 0x32ec0000, 64 * KiB, "media_blk_ctl" },
+    [FSL_IMX8MP_LCDIF2] = { 0x32e90000, 64 * KiB, "lcdif2" },
+    [FSL_IMX8MP_LCDIF1] = { 0x32e80000, 64 * KiB, "lcdif1" },
+    [FSL_IMX8MP_MIPI_DSI1] = { 0x32e60000, 64 * KiB, "mipi_dsi1" },
+    [FSL_IMX8MP_MIPI_CSI2] = { 0x32e50000, 64 * KiB, "mipi_csi2" },
+    [FSL_IMX8MP_MIPI_CSI1] = { 0x32e40000, 64 * KiB, "mipi_csi1" },
+    [FSL_IMX8MP_IPS_DEWARP] = { 0x32e30000, 64 * KiB, "ips_dewarp" },
+    [FSL_IMX8MP_ISP2] = { 0x32e20000, 64 * KiB, "isp2" },
+    [FSL_IMX8MP_ISP1] = { 0x32e10000, 64 * KiB, "isp1" },
+    [FSL_IMX8MP_ISI] = { 0x32e00000, 64 * KiB, "isi" },
+    [FSL_IMX8MP_AIPS4_CONFIGURATION] = { 0x32df0000, 64 * KiB, "aips4_configuration" },
+    /* AIPS-4 End */
+
+    [FSL_IMX8MP_INTERCONNECT] = { 0x32700000, 1 * MiB, "interconnect" },
+
+    /* AIPS-3 Begin */
+    [FSL_IMX8MP_ENET2_TSN] = { 0x30bf0000, 64 * KiB, "enet2_tsn" },
+    [FSL_IMX8MP_ENET1] = { 0x30be0000, 64 * KiB, "enet1" },
+    [FSL_IMX8MP_SDMA1] = { 0x30bd0000, 64 * KiB, "sdma1" },
+    [FSL_IMX8MP_QSPI] = { 0x30bb0000, 64 * KiB, "qspi" },
+    [FSL_IMX8MP_USDHC3] = { 0x30b60000, 64 * KiB, "usdhc3" },
+    [FSL_IMX8MP_USDHC2] = { 0x30b50000, 64 * KiB, "usdhc2" },
+    [FSL_IMX8MP_USDHC1] = { 0x30b40000, 64 * KiB, "usdhc1" },
+    [FSL_IMX8MP_I2C6] = { 0x30ae0000, 64 * KiB, "i2c6" },
+    [FSL_IMX8MP_I2C5] = { 0x30ad0000, 64 * KiB, "i2c5" },
+    [FSL_IMX8MP_SEMAPHORE_HS] = { 0x30ac0000, 64 * KiB, "semaphore_hs" },
+    [FSL_IMX8MP_MU_1_B] = { 0x30ab0000, 64 * KiB, "mu_1_b" },
+    [FSL_IMX8MP_MU_1_A] = { 0x30aa0000, 64 * KiB, "mu_1_a" },
+    [FSL_IMX8MP_AUD_IRQ_STEER] = { 0x30a80000, 64 * KiB, "aud_irq_steer" },
+    [FSL_IMX8MP_UART4] = { 0x30a60000, 64 * KiB, "uart4" },
+    [FSL_IMX8MP_I2C4] = { 0x30a50000, 64 * KiB, "i2c4" },
+    [FSL_IMX8MP_I2C3] = { 0x30a40000, 64 * KiB, "i2c3" },
+    [FSL_IMX8MP_I2C2] = { 0x30a30000, 64 * KiB, "i2c2" },
+    [FSL_IMX8MP_I2C1] = { 0x30a20000, 64 * KiB, "i2c1" },
+    [FSL_IMX8MP_AIPS3_CONFIGURATION] = { 0x309f0000, 64 * KiB, "aips3_configuration" },
+    [FSL_IMX8MP_CAAM] = { 0x30900000, 256 * KiB, "caam" },
+    [FSL_IMX8MP_SPBA1] = { 0x308f0000, 64 * KiB, "spba1" },
+    [FSL_IMX8MP_FLEXCAN2] = { 0x308d0000, 64 * KiB, "flexcan2" },
+    [FSL_IMX8MP_FLEXCAN1] = { 0x308c0000, 64 * KiB, "flexcan1" },
+    [FSL_IMX8MP_UART2] = { 0x30890000, 64 * KiB, "uart2" },
+    [FSL_IMX8MP_UART3] = { 0x30880000, 64 * KiB, "uart3" },
+    [FSL_IMX8MP_UART1] = { 0x30860000, 64 * KiB, "uart1" },
+    [FSL_IMX8MP_ECSPI3] = { 0x30840000, 64 * KiB, "ecspi3" },
+    [FSL_IMX8MP_ECSPI2] = { 0x30830000, 64 * KiB, "ecspi2" },
+    [FSL_IMX8MP_ECSPI1] = { 0x30820000, 64 * KiB, "ecspi1" },
+    /* AIPS-3 End */
+
+    /* AIPS-2 Begin */
+    [FSL_IMX8MP_QOSC] = { 0x307f0000, 64 * KiB, "qosc" },
+    [FSL_IMX8MP_PERFMON2] = { 0x307d0000, 64 * KiB, "perfmon2" },
+    [FSL_IMX8MP_PERFMON1] = { 0x307c0000, 64 * KiB, "perfmon1" },
+    [FSL_IMX8MP_GPT4] = { 0x30700000, 64 * KiB, "gpt4" },
+    [FSL_IMX8MP_GPT5] = { 0x306f0000, 64 * KiB, "gpt5" },
+    [FSL_IMX8MP_GPT6] = { 0x306e0000, 64 * KiB, "gpt6" },
+    [FSL_IMX8MP_SYSCNT_CTRL] = { 0x306c0000, 64 * KiB, "syscnt_ctrl" },
+    [FSL_IMX8MP_SYSCNT_CMP] = { 0x306b0000, 64 * KiB, "syscnt_cmp" },
+    [FSL_IMX8MP_SYSCNT_RD] = { 0x306a0000, 64 * KiB, "syscnt_rd" },
+    [FSL_IMX8MP_PWM4] = { 0x30690000, 64 * KiB, "pwm4" },
+    [FSL_IMX8MP_PWM3] = { 0x30680000, 64 * KiB, "pwm3" },
+    [FSL_IMX8MP_PWM2] = { 0x30670000, 64 * KiB, "pwm2" },
+    [FSL_IMX8MP_PWM1] = { 0x30660000, 64 * KiB, "pwm1" },
+    [FSL_IMX8MP_AIPS2_CONFIGURATION] = { 0x305f0000, 64 * KiB, "aips2_configuration" },
+    /* AIPS-2 End */
+
+    /* AIPS-1 Begin */
+    [FSL_IMX8MP_CSU] = { 0x303e0000, 64 * KiB, "csu" },
+    [FSL_IMX8MP_RDC] = { 0x303d0000, 64 * KiB, "rdc" },
+    [FSL_IMX8MP_SEMAPHORE2] = { 0x303c0000, 64 * KiB, "semaphore2" },
+    [FSL_IMX8MP_SEMAPHORE1] = { 0x303b0000, 64 * KiB, "semaphore1" },
+    [FSL_IMX8MP_GPC] = { 0x303a0000, 64 * KiB, "gpc" },
+    [FSL_IMX8MP_SRC] = { 0x30390000, 64 * KiB, "src" },
+    [FSL_IMX8MP_CCM] = { 0x30380000, 64 * KiB, "ccm" },
+    [FSL_IMX8MP_SNVS_HP] = { 0x30370000, 64 * KiB, "snvs_hp" },
+    [FSL_IMX8MP_ANA_PLL] = { 0x30360000, 64 * KiB, "ana_pll" },
+    [FSL_IMX8MP_OCOTP_CTRL] = { 0x30350000, 64 * KiB, "ocotp_ctrl" },
+    [FSL_IMX8MP_IOMUXC_GPR] = { 0x30340000, 64 * KiB, "iomuxc_gpr" },
+    [FSL_IMX8MP_IOMUXC] = { 0x30330000, 64 * KiB, "iomuxc" },
+    [FSL_IMX8MP_GPT3] = { 0x302f0000, 64 * KiB, "gpt3" },
+    [FSL_IMX8MP_GPT2] = { 0x302e0000, 64 * KiB, "gpt2" },
+    [FSL_IMX8MP_GPT1] = { 0x302d0000, 64 * KiB, "gpt1" },
+    [FSL_IMX8MP_WDOG3] = { 0x302a0000, 64 * KiB, "wdog3" },
+    [FSL_IMX8MP_WDOG2] = { 0x30290000, 64 * KiB, "wdog2" },
+    [FSL_IMX8MP_WDOG1] = { 0x30280000, 64 * KiB, "wdog1" },
+    [FSL_IMX8MP_ANA_OSC] = { 0x30270000, 64 * KiB, "ana_osc" },
+    [FSL_IMX8MP_ANA_TSENSOR] = { 0x30260000, 64 * KiB, "ana_tsensor" },
+    [FSL_IMX8MP_GPIO5] = { 0x30240000, 64 * KiB, "gpio5" },
+    [FSL_IMX8MP_GPIO4] = { 0x30230000, 64 * KiB, "gpio4" },
+    [FSL_IMX8MP_GPIO3] = { 0x30220000, 64 * KiB, "gpio3" },
+    [FSL_IMX8MP_GPIO2] = { 0x30210000, 64 * KiB, "gpio2" },
+    [FSL_IMX8MP_GPIO1] = { 0x30200000, 64 * KiB, "gpio1" },
+    [FSL_IMX8MP_AIPS1_CONFIGURATION] = { 0x301f0000, 64 * KiB, "aips1_configuration" },
+    /* AIPS-1 End */
+
+    [FSL_IMX8MP_A53_DAP] = { 0x28000000, 16 * MiB, "a53_dap" },
+    [FSL_IMX8MP_PCIE1_MEM] = { 0x18000000, 128 * MiB, "pcie1_mem" },
+    [FSL_IMX8MP_QSPI_MEM] = { 0x08000000, 256 * MiB, "qspi_mem" },
+    [FSL_IMX8MP_OCRAM] = { 0x00900000, 576 * KiB, "ocram" },
+    [FSL_IMX8MP_TCM_DTCM] = { 0x00800000, 128 * KiB, "tcm_dtcm" },
+    [FSL_IMX8MP_TCM_ITCM] = { 0x007e0000, 128 * KiB, "tcm_itcm" },
+    [FSL_IMX8MP_OCRAM_S] = { 0x00180000, 36 * KiB, "ocram_s" },
+    [FSL_IMX8MP_CAAM_MEM] = { 0x00100000, 32 * KiB, "caam_mem" },
+    [FSL_IMX8MP_BOOT_ROM_PROTECTED] = { 0x0003f000, 4 * KiB, "boot_rom_protected" },
+    [FSL_IMX8MP_BOOT_ROM] = { 0x00000000, 252 * KiB, "boot_rom" },
+};
+
+static void fsl_imx8mp_init(Object *obj)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    FslImx8mpState *s = FSL_IMX8MP(obj);
+    int i;
+
+    for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX8MP_NUM_CPUS); i++) {
+        g_autofree char *name = g_strdup_printf("cpu%d", i);
+        object_initialize_child(obj, name, &s->cpu[i],
+                                ARM_CPU_TYPE_NAME("cortex-a53"));
+    }
+
+    object_initialize_child(obj, "gic", &s->gic, TYPE_ARM_GICV3);
+
+    for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
+        g_autofree char *name = g_strdup_printf("uart%d", i + 1);
+        object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
+    }
+}
+
+static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    FslImx8mpState *s = FSL_IMX8MP(dev);
+    DeviceState *gicdev = DEVICE(&s->gic);
+    int i;
+
+    if (ms->smp.cpus > FSL_IMX8MP_NUM_CPUS) {
+        error_setg(errp, "%s: Only %d CPUs are supported (%d requested)",
+                   TYPE_FSL_IMX8MP, FSL_IMX8MP_NUM_CPUS, ms->smp.cpus);
+        return;
+    }
+
+    /* CPUs */
+    for (i = 0; i < ms->smp.cpus; i++) {
+        /* On uniprocessor, the CBAR is set to 0 */
+        if (ms->smp.cpus > 1) {
+            object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar",
+                                    fsl_imx8mp_memmap[FSL_IMX8MP_GIC_DIST].addr,
+                                    &error_abort);
+        }
+
+        /*
+         * CNTFID0 base frequency in Hz of system counter
+         */
+        object_property_set_int(OBJECT(&s->cpu[i]), "cntfrq", 8000000,
+                                &error_abort);
+
+        if (i) {
+            /*
+             * Secondary CPUs start in powered-down state (and can be
+             * powered up via the SRC system reset controller)
+             */
+            object_property_set_bool(OBJECT(&s->cpu[i]), "start-powered-off",
+                                     true, &error_abort);
+        }
+
+        if (!qdev_realize(DEVICE(&s->cpu[i]), NULL, errp)) {
+            return;
+        }
+    }
+
+    /* GIC */
+    {
+        SysBusDevice *gicsbd = SYS_BUS_DEVICE(&s->gic);
+        QList *redist_region_count;
+
+        qdev_prop_set_uint32(gicdev, "num-cpu", ms->smp.cpus);
+        qdev_prop_set_uint32(gicdev, "num-irq",
+                             FSL_IMX8MP_NUM_IRQS + GIC_INTERNAL);
+        redist_region_count = qlist_new();
+        qlist_append_int(redist_region_count, ms->smp.cpus);
+        qdev_prop_set_array(gicdev, "redist-region-count", redist_region_count);
+        object_property_set_link(OBJECT(&s->gic), "sysmem",
+                                 OBJECT(get_system_memory()), &error_fatal);
+        if (!sysbus_realize(gicsbd, errp)) {
+            return;
+        }
+        sysbus_mmio_map(gicsbd, 0, fsl_imx8mp_memmap[FSL_IMX8MP_GIC_DIST].addr);
+        sysbus_mmio_map(gicsbd, 1, fsl_imx8mp_memmap[FSL_IMX8MP_GIC_REDIST].addr);
+
+        /*
+         * Wire the outputs from each CPU's generic timer and the GICv3
+         * maintenance interrupt signal to the appropriate GIC PPI inputs, and
+         * the GIC's IRQ/FIQ interrupt outputs to the CPU's inputs.
+         */
+        for (i = 0; i < ms->smp.cpus; i++) {
+            DeviceState *cpudev = DEVICE(&s->cpu[i]);
+            int intidbase = FSL_IMX8MP_NUM_IRQS + i * GIC_INTERNAL;
+            qemu_irq irq;
+
+            /*
+             * Mapping from the output timer irq lines from the CPU to the
+             * GIC PPI inputs.
+             */
+            static const int timer_irqs[] = {
+                [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
+                [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
+                [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
+                [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
+            };
+
+            for (int j = 0; j < ARRAY_SIZE(timer_irqs); j++) {
+                irq = qdev_get_gpio_in(gicdev, intidbase + timer_irqs[j]);
+                qdev_connect_gpio_out(cpudev, j, irq);
+            }
+
+            irq = qdev_get_gpio_in(gicdev, intidbase + ARCH_GIC_MAINT_IRQ);
+            qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+                                        0, irq);
+
+            irq = qdev_get_gpio_in(gicdev, intidbase + VIRTUAL_PMU_IRQ);
+            qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, irq);
+
+            sysbus_connect_irq(gicsbd, i,
+                               qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+            sysbus_connect_irq(gicsbd, i + ms->smp.cpus,
+                               qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+        }
+    }
+
+    /* UARTs */
+    for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } serial_table[FSL_IMX8MP_NUM_UARTS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART1].addr, FSL_IMX8MP_UART1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART2].addr, FSL_IMX8MP_UART2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART3].addr, FSL_IMX8MP_UART3_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_UART4].addr, FSL_IMX8MP_UART4_IRQ },
+        };
+
+        qdev_prop_set_chr(DEVICE(&s->uart[i]), "chardev", serial_hd(i));
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart[i]), 0, serial_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+                           qdev_get_gpio_in(gicdev, serial_table[i].irq));
+    }
+
+    /* Unimplemented devices */
+    for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
+        switch (i) {
+        case FSL_IMX8MP_GIC_DIST:
+        case FSL_IMX8MP_GIC_REDIST:
+        case FSL_IMX8MP_RAM:
+        case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
+            /* device implemented and treated above */
+            break;
+
+        default:
+            create_unimplemented_device(fsl_imx8mp_memmap[i].name,
+                                        fsl_imx8mp_memmap[i].addr,
+                                        fsl_imx8mp_memmap[i].size);
+            break;
+        }
+    }
+}
+
+static void fsl_imx8mp_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = fsl_imx8mp_realize;
+
+    dc->desc = "i.MX 8M Plus SoC";
+}
+
+static const TypeInfo fsl_imx8mp_types[] = {
+    {
+        .name = TYPE_FSL_IMX8MP,
+        .parent = TYPE_DEVICE,
+        .instance_size = sizeof(FslImx8mpState),
+        .instance_init = fsl_imx8mp_init,
+        .class_init = fsl_imx8mp_class_init,
+    },
+};
+
+DEFINE_TYPES(fsl_imx8mp_types)
diff --git a/hw/arm/imx8mp-evk.c b/hw/arm/imx8mp-evk.c
new file mode 100644
index 0000000000..2756d4c21c
--- /dev/null
+++ b/hw/arm/imx8mp-evk.c
@@ -0,0 +1,55 @@
+/*
+ * NXP i.MX 8M Plus Evaluation Kit System Emulation
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/boot.h"
+#include "hw/arm/fsl-imx8mp.h"
+#include "hw/boards.h"
+#include "system/qtest.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+static void imx8mp_evk_init(MachineState *machine)
+{
+    static struct arm_boot_info boot_info;
+    FslImx8mpState *s;
+
+    if (machine->ram_size > FSL_IMX8MP_RAM_SIZE_MAX) {
+        error_report("RAM size " RAM_ADDR_FMT " above max supported (%08" PRIx64 ")",
+                     machine->ram_size, FSL_IMX8MP_RAM_SIZE_MAX);
+        exit(1);
+    }
+
+    boot_info = (struct arm_boot_info) {
+        .loader_start = FSL_IMX8MP_RAM_START,
+        .board_id = -1,
+        .ram_size = machine->ram_size,
+        .psci_conduit = QEMU_PSCI_CONDUIT_SMC,
+    };
+
+    s = FSL_IMX8MP(object_new(TYPE_FSL_IMX8MP));
+    object_property_add_child(OBJECT(machine), "soc", OBJECT(s));
+    qdev_realize(DEVICE(s), NULL, &error_fatal);
+
+    memory_region_add_subregion(get_system_memory(), FSL_IMX8MP_RAM_START,
+                                machine->ram);
+
+    if (!qtest_enabled()) {
+        arm_load_kernel(&s->cpu[0], machine, &boot_info);
+    }
+}
+
+static void imx8mp_evk_machine_init(MachineClass *mc)
+{
+    mc->desc = "NXP i.MX 8M Plus EVK Board";
+    mc->init = imx8mp_evk_init;
+    mc->max_cpus = FSL_IMX8MP_NUM_CPUS;
+    mc->default_ram_id = "imx8mp-evk.ram";
+}
+DEFINE_MACHINE("imx8mp-evk", imx8mp_evk_machine_init)
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 465c757f97..ac473ce7cd 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -54,6 +54,8 @@ arm_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-soc.c'))
 arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
 arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
+arm_ss.add(when: 'CONFIG_FSL_IMX8MP', if_true: files('fsl-imx8mp.c'))
+arm_ss.add(when: 'CONFIG_FSL_IMX8MP_EVK', if_true: files('imx8mp-evk.c'))
 arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
 arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
new file mode 100644
index 0000000000..57e23d1b69
--- /dev/null
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -0,0 +1,189 @@
+/*
+ * i.MX 8M Plus SoC Definitions
+ *
+ * Copyright (c) 2024, Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef FSL_IMX8MP_H
+#define FSL_IMX8MP_H
+
+#include "cpu.h"
+#include "hw/char/imx_serial.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "qom/object.h"
+#include "qemu/units.h"
+
+#define TYPE_FSL_IMX8MP "fsl-imx8mp"
+OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mpState, FSL_IMX8MP)
+
+#define FSL_IMX8MP_RAM_START        0x40000000
+#define FSL_IMX8MP_RAM_SIZE_MAX     (8 * GiB)
+
+enum FslImx8mpConfiguration {
+    FSL_IMX8MP_NUM_CPUS         = 4,
+    FSL_IMX8MP_NUM_IRQS         = 160,
+    FSL_IMX8MP_NUM_UARTS        = 4,
+};
+
+struct FslImx8mpState {
+    DeviceState    parent_obj;
+
+    ARMCPU             cpu[FSL_IMX8MP_NUM_CPUS];
+    GICv3State         gic;
+    IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
+};
+
+enum FslImx8mpMemoryRegions {
+    FSL_IMX8MP_A53_DAP,
+    FSL_IMX8MP_AIPS1_CONFIGURATION,
+    FSL_IMX8MP_AIPS2_CONFIGURATION,
+    FSL_IMX8MP_AIPS3_CONFIGURATION,
+    FSL_IMX8MP_AIPS4_CONFIGURATION,
+    FSL_IMX8MP_AIPS5_CONFIGURATION,
+    FSL_IMX8MP_ANA_OSC,
+    FSL_IMX8MP_ANA_PLL,
+    FSL_IMX8MP_ANA_TSENSOR,
+    FSL_IMX8MP_APBH_DMA,
+    FSL_IMX8MP_ASRC,
+    FSL_IMX8MP_AUDIO_BLK_CTRL,
+    FSL_IMX8MP_AUDIO_DSP,
+    FSL_IMX8MP_AUDIO_XCVR_RX,
+    FSL_IMX8MP_AUD_IRQ_STEER,
+    FSL_IMX8MP_BOOT_ROM,
+    FSL_IMX8MP_BOOT_ROM_PROTECTED,
+    FSL_IMX8MP_CAAM,
+    FSL_IMX8MP_CAAM_MEM,
+    FSL_IMX8MP_CCM,
+    FSL_IMX8MP_CSU,
+    FSL_IMX8MP_DDR_BLK_CTRL,
+    FSL_IMX8MP_DDR_CTL,
+    FSL_IMX8MP_DDR_PERF_MON,
+    FSL_IMX8MP_DDR_PHY,
+    FSL_IMX8MP_DDR_PHY_BROADCAST,
+    FSL_IMX8MP_ECSPI1,
+    FSL_IMX8MP_ECSPI2,
+    FSL_IMX8MP_ECSPI3,
+    FSL_IMX8MP_EDMA_CHANNELS,
+    FSL_IMX8MP_EDMA_MANAGEMENT_PAGE,
+    FSL_IMX8MP_ENET1,
+    FSL_IMX8MP_ENET2_TSN,
+    FSL_IMX8MP_FLEXCAN1,
+    FSL_IMX8MP_FLEXCAN2,
+    FSL_IMX8MP_GIC_DIST,
+    FSL_IMX8MP_GIC_REDIST,
+    FSL_IMX8MP_GPC,
+    FSL_IMX8MP_GPIO1,
+    FSL_IMX8MP_GPIO2,
+    FSL_IMX8MP_GPIO3,
+    FSL_IMX8MP_GPIO4,
+    FSL_IMX8MP_GPIO5,
+    FSL_IMX8MP_GPT1,
+    FSL_IMX8MP_GPT2,
+    FSL_IMX8MP_GPT3,
+    FSL_IMX8MP_GPT4,
+    FSL_IMX8MP_GPT5,
+    FSL_IMX8MP_GPT6,
+    FSL_IMX8MP_GPU2D,
+    FSL_IMX8MP_GPU3D,
+    FSL_IMX8MP_HDMI_TX,
+    FSL_IMX8MP_HDMI_TX_AUDLNK_MSTR,
+    FSL_IMX8MP_HSIO_BLK_CTL,
+    FSL_IMX8MP_I2C1,
+    FSL_IMX8MP_I2C2,
+    FSL_IMX8MP_I2C3,
+    FSL_IMX8MP_I2C4,
+    FSL_IMX8MP_I2C5,
+    FSL_IMX8MP_I2C6,
+    FSL_IMX8MP_INTERCONNECT,
+    FSL_IMX8MP_IOMUXC,
+    FSL_IMX8MP_IOMUXC_GPR,
+    FSL_IMX8MP_IPS_DEWARP,
+    FSL_IMX8MP_ISI,
+    FSL_IMX8MP_ISP1,
+    FSL_IMX8MP_ISP2,
+    FSL_IMX8MP_LCDIF1,
+    FSL_IMX8MP_LCDIF2,
+    FSL_IMX8MP_MEDIA_BLK_CTL,
+    FSL_IMX8MP_MIPI_CSI1,
+    FSL_IMX8MP_MIPI_CSI2,
+    FSL_IMX8MP_MIPI_DSI1,
+    FSL_IMX8MP_MU_1_A,
+    FSL_IMX8MP_MU_1_B,
+    FSL_IMX8MP_MU_2_A,
+    FSL_IMX8MP_MU_2_B,
+    FSL_IMX8MP_MU_3_A,
+    FSL_IMX8MP_MU_3_B,
+    FSL_IMX8MP_NPU,
+    FSL_IMX8MP_OCOTP_CTRL,
+    FSL_IMX8MP_OCRAM,
+    FSL_IMX8MP_OCRAM_S,
+    FSL_IMX8MP_PCIE1,
+    FSL_IMX8MP_PCIE1_MEM,
+    FSL_IMX8MP_PCIE_PHY1,
+    FSL_IMX8MP_PDM,
+    FSL_IMX8MP_PERFMON1,
+    FSL_IMX8MP_PERFMON2,
+    FSL_IMX8MP_PWM1,
+    FSL_IMX8MP_PWM2,
+    FSL_IMX8MP_PWM3,
+    FSL_IMX8MP_PWM4,
+    FSL_IMX8MP_QOSC,
+    FSL_IMX8MP_QSPI,
+    FSL_IMX8MP_QSPI1_RX_BUFFER,
+    FSL_IMX8MP_QSPI1_TX_BUFFER,
+    FSL_IMX8MP_QSPI_MEM,
+    FSL_IMX8MP_RAM,
+    FSL_IMX8MP_RDC,
+    FSL_IMX8MP_SAI1,
+    FSL_IMX8MP_SAI2,
+    FSL_IMX8MP_SAI3,
+    FSL_IMX8MP_SAI5,
+    FSL_IMX8MP_SAI6,
+    FSL_IMX8MP_SAI7,
+    FSL_IMX8MP_SDMA1,
+    FSL_IMX8MP_SDMA2,
+    FSL_IMX8MP_SDMA3,
+    FSL_IMX8MP_SEMAPHORE1,
+    FSL_IMX8MP_SEMAPHORE2,
+    FSL_IMX8MP_SEMAPHORE_HS,
+    FSL_IMX8MP_SNVS_HP,
+    FSL_IMX8MP_SPBA1,
+    FSL_IMX8MP_SPBA2,
+    FSL_IMX8MP_SRC,
+    FSL_IMX8MP_SYSCNT_CMP,
+    FSL_IMX8MP_SYSCNT_CTRL,
+    FSL_IMX8MP_SYSCNT_RD,
+    FSL_IMX8MP_TCM_DTCM,
+    FSL_IMX8MP_TCM_ITCM,
+    FSL_IMX8MP_TZASC,
+    FSL_IMX8MP_UART1,
+    FSL_IMX8MP_UART2,
+    FSL_IMX8MP_UART3,
+    FSL_IMX8MP_UART4,
+    FSL_IMX8MP_USB1,
+    FSL_IMX8MP_USB2,
+    FSL_IMX8MP_USDHC1,
+    FSL_IMX8MP_USDHC2,
+    FSL_IMX8MP_USDHC3,
+    FSL_IMX8MP_VPU,
+    FSL_IMX8MP_VPU_BLK_CTRL,
+    FSL_IMX8MP_VPU_G1_DECODER,
+    FSL_IMX8MP_VPU_G2_DECODER,
+    FSL_IMX8MP_VPU_VC8000E_ENCODER,
+    FSL_IMX8MP_WDOG1,
+    FSL_IMX8MP_WDOG2,
+    FSL_IMX8MP_WDOG3,
+};
+
+enum FslImx8mpIrqs {
+    FSL_IMX8MP_UART1_IRQ    = 26,
+    FSL_IMX8MP_UART2_IRQ    = 27,
+    FSL_IMX8MP_UART3_IRQ    = 28,
+    FSL_IMX8MP_UART4_IRQ    = 29,
+    FSL_IMX8MP_UART5_IRQ    = 30,
+    FSL_IMX8MP_UART6_IRQ    = 16,
+};
+
+#endif /* FSL_IMX8MP_H */

From 86c2dff9552ad5a9b2febf329a2dbd2620bc2145 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:55 +0100
Subject: [PATCH 32/43] hw/arm/fsl-imx8mp: Implement clock tree

Fixes quite a few stack traces during the Linux boot process. Also provides the
clocks for devices added later, e.g. enet1.

Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-6-shentey@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 MAINTAINERS                     |   2 +
 docs/system/arm/imx8mp-evk.rst  |   1 +
 hw/arm/Kconfig                  |   2 +
 hw/arm/fsl-imx8mp.c             |  20 ++++
 hw/misc/Kconfig                 |   6 ++
 hw/misc/imx8mp_analog.c         | 160 +++++++++++++++++++++++++++++
 hw/misc/imx8mp_ccm.c            | 175 ++++++++++++++++++++++++++++++++
 hw/misc/meson.build             |   2 +
 include/hw/arm/fsl-imx8mp.h     |   4 +
 include/hw/misc/imx8mp_analog.h |  81 +++++++++++++++
 include/hw/misc/imx8mp_ccm.h    |  30 ++++++
 11 files changed, 483 insertions(+)
 create mode 100644 hw/misc/imx8mp_analog.c
 create mode 100644 hw/misc/imx8mp_ccm.c
 create mode 100644 include/hw/misc/imx8mp_analog.h
 create mode 100644 include/hw/misc/imx8mp_ccm.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 374fe98724..8ea7fb4c7a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -826,7 +826,9 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/imx8mp-evk.c
 F: hw/arm/fsl-imx8mp.c
+F: hw/misc/imx8mp_*.c
 F: include/hw/arm/fsl-imx8mp.h
+F: include/hw/misc/imx8mp_*.h
 F: docs/system/arm/imx8mp-evk.rst
 
 MPS2 / MPS3
diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index b23fdcc743..f0df346113 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -12,6 +12,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * Up to 4 Cortex-A53 cores
  * Generic Interrupt Controller (GICv3)
  * 4 UARTs
+ * Clock Tree
 
 Boot options
 ------------
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 0a7de40861..f77c451ba3 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -596,6 +596,8 @@ config FSL_IMX7
 config FSL_IMX8MP
     bool
     select ARM_GIC
+    select FSL_IMX8MP_ANALOG
+    select FSL_IMX8MP_CCM
     select IMX
     select UNIMP
 
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 084b1d3bb1..bc15b25ca1 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -197,6 +197,10 @@ static void fsl_imx8mp_init(Object *obj)
 
     object_initialize_child(obj, "gic", &s->gic, TYPE_ARM_GICV3);
 
+    object_initialize_child(obj, "ccm", &s->ccm, TYPE_IMX8MP_CCM);
+
+    object_initialize_child(obj, "analog", &s->analog, TYPE_IMX8MP_ANALOG);
+
     for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
         g_autofree char *name = g_strdup_printf("uart%d", i + 1);
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
@@ -304,6 +308,20 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         }
     }
 
+    /* CCM */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->ccm), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->ccm), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_CCM].addr);
+
+    /* Analog */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->analog), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->analog), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_ANA_PLL].addr);
+
     /* UARTs */
     for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
         struct {
@@ -329,6 +347,8 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
     /* Unimplemented devices */
     for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
         switch (i) {
+        case FSL_IMX8MP_ANA_PLL:
+        case FSL_IMX8MP_CCM:
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
         case FSL_IMX8MP_RAM:
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 4271e2f4ac..82bd68b4bb 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -78,6 +78,12 @@ config IMX
     select SSI
     select USB_EHCI_SYSBUS
 
+config FSL_IMX8MP_ANALOG
+    bool
+
+config FSL_IMX8MP_CCM
+    bool
+
 config STM32_RCC
     bool
 
diff --git a/hw/misc/imx8mp_analog.c b/hw/misc/imx8mp_analog.c
new file mode 100644
index 0000000000..f7e7c83cc4
--- /dev/null
+++ b/hw/misc/imx8mp_analog.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus ANALOG IP block emulation code
+ *
+ * Based on hw/misc/imx7_ccm.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/misc/imx8mp_analog.h"
+#include "migration/vmstate.h"
+
+#define ANALOG_PLL_LOCK BIT(31)
+
+static void imx8mp_analog_reset(DeviceState *dev)
+{
+    IMX8MPAnalogState *s = IMX8MP_ANALOG(dev);
+
+    memset(s->analog, 0, sizeof(s->analog));
+
+    s->analog[ANALOG_AUDIO_PLL1_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_AUDIO_PLL1_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_AUDIO_PLL1_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL1_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL1_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_AUDIO_PLL2_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_AUDIO_PLL2_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_AUDIO_PLL2_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL2_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_AUDIO_PLL2_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_VIDEO_PLL1_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_VIDEO_PLL1_FDIV_CTL0] = 0x00145032;
+    s->analog[ANALOG_VIDEO_PLL1_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_VIDEO_PLL1_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_VIDEO_PLL1_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_DRAM_PLL_GEN_CTRL] = 0x00002010;
+    s->analog[ANALOG_DRAM_PLL_FDIV_CTL0] = 0x0012c032;
+    s->analog[ANALOG_DRAM_PLL_FDIV_CTL1] = 0x00000000;
+    s->analog[ANALOG_DRAM_PLL_SSCG_CTRL] = 0x00000000;
+    s->analog[ANALOG_DRAM_PLL_MNIT_CTRL] = 0x00100103;
+    s->analog[ANALOG_GPU_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_GPU_PLL_FDIV_CTL0] = 0x000c8031;
+    s->analog[ANALOG_GPU_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_GPU_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_VPU_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_VPU_PLL_FDIV_CTL0] = 0x0012c032;
+    s->analog[ANALOG_VPU_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_VPU_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_ARM_PLL_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_ARM_PLL_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_ARM_PLL_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_ARM_PLL_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL1_GEN_CTRL] = 0x0aaaa810;
+    s->analog[ANALOG_SYS_PLL1_FDIV_CTL0] = 0x00190032;
+    s->analog[ANALOG_SYS_PLL1_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL1_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL2_GEN_CTRL] = 0x0aaaa810;
+    s->analog[ANALOG_SYS_PLL2_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_SYS_PLL2_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL2_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_SYS_PLL3_GEN_CTRL] = 0x00000810;
+    s->analog[ANALOG_SYS_PLL3_FDIV_CTL0] = 0x000fa031;
+    s->analog[ANALOG_SYS_PLL3_LOCKD_CTRL] = 0x0010003f;
+    s->analog[ANALOG_SYS_PLL3_MNIT_CTRL] = 0x00280081;
+    s->analog[ANALOG_OSC_MISC_CFG] = 0x00000000;
+    s->analog[ANALOG_ANAMIX_PLL_MNIT_CTL] = 0x00000000;
+    s->analog[ANALOG_DIGPROG] = 0x00824010;
+
+    /* all PLLs need to be locked */
+    s->analog[ANALOG_AUDIO_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_AUDIO_PLL2_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_VIDEO_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_DRAM_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_GPU_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_VPU_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_ARM_PLL_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL1_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL2_GEN_CTRL] |= ANALOG_PLL_LOCK;
+    s->analog[ANALOG_SYS_PLL3_GEN_CTRL] |= ANALOG_PLL_LOCK;
+}
+
+static uint64_t imx8mp_analog_read(void *opaque, hwaddr offset, unsigned size)
+{
+    IMX8MPAnalogState *s = opaque;
+
+    return s->analog[offset >> 2];
+}
+
+static void imx8mp_analog_write(void *opaque, hwaddr offset,
+                                uint64_t value, unsigned size)
+{
+    IMX8MPAnalogState *s = opaque;
+
+    if (offset >> 2 == ANALOG_DIGPROG) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Guest write to read-only ANALOG_DIGPROG register\n");
+    } else {
+        s->analog[offset >> 2] = value;
+    }
+}
+
+static const struct MemoryRegionOps imx8mp_analog_ops = {
+    .read = imx8mp_analog_read,
+    .write = imx8mp_analog_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+static void imx8mp_analog_init(Object *obj)
+{
+    IMX8MPAnalogState *s = IMX8MP_ANALOG(obj);
+    SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init(&s->mmio.container, obj, TYPE_IMX8MP_ANALOG, 0x10000);
+
+    memory_region_init_io(&s->mmio.analog, obj, &imx8mp_analog_ops, s,
+                          TYPE_IMX8MP_ANALOG, sizeof(s->analog));
+    memory_region_add_subregion(&s->mmio.container, 0, &s->mmio.analog);
+
+    sysbus_init_mmio(sd, &s->mmio.container);
+}
+
+static const VMStateDescription imx8mp_analog_vmstate = {
+    .name = TYPE_IMX8MP_ANALOG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(analog, IMX8MPAnalogState, ANALOG_MAX),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void imx8mp_analog_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_legacy_reset(dc, imx8mp_analog_reset);
+    dc->vmsd  = &imx8mp_analog_vmstate;
+    dc->desc  = "i.MX 8M Plus Analog Module";
+}
+
+static const TypeInfo imx8mp_analog_types[] = {
+    {
+        .name          = TYPE_IMX8MP_ANALOG,
+        .parent        = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(IMX8MPAnalogState),
+        .instance_init = imx8mp_analog_init,
+        .class_init    = imx8mp_analog_class_init,
+    }
+};
+
+DEFINE_TYPES(imx8mp_analog_types);
diff --git a/hw/misc/imx8mp_ccm.c b/hw/misc/imx8mp_ccm.c
new file mode 100644
index 0000000000..1a1c932427
--- /dev/null
+++ b/hw/misc/imx8mp_ccm.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus CCM IP block emulation code
+ *
+ * Based on hw/misc/imx7_ccm.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/misc/imx8mp_ccm.h"
+#include "migration/vmstate.h"
+
+#include "trace.h"
+
+#define CKIH_FREQ 16000000 /* 16MHz crystal input */
+
+static void imx8mp_ccm_reset(DeviceState *dev)
+{
+    IMX8MPCCMState *s = IMX8MP_CCM(dev);
+
+    memset(s->ccm, 0, sizeof(s->ccm));
+}
+
+#define CCM_INDEX(offset)   (((offset) & ~(hwaddr)0xF) / sizeof(uint32_t))
+#define CCM_BITOP(offset)   ((offset) & (hwaddr)0xF)
+
+enum {
+    CCM_BITOP_NONE = 0x00,
+    CCM_BITOP_SET  = 0x04,
+    CCM_BITOP_CLR  = 0x08,
+    CCM_BITOP_TOG  = 0x0C,
+};
+
+static uint64_t imx8mp_set_clr_tog_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    const uint32_t *mmio = opaque;
+
+    return mmio[CCM_INDEX(offset)];
+}
+
+static void imx8mp_set_clr_tog_write(void *opaque, hwaddr offset,
+                                     uint64_t value, unsigned size)
+{
+    const uint8_t  bitop = CCM_BITOP(offset);
+    const uint32_t index = CCM_INDEX(offset);
+    uint32_t *mmio = opaque;
+
+    switch (bitop) {
+    case CCM_BITOP_NONE:
+        mmio[index]  = value;
+        break;
+    case CCM_BITOP_SET:
+        mmio[index] |= value;
+        break;
+    case CCM_BITOP_CLR:
+        mmio[index] &= ~value;
+        break;
+    case CCM_BITOP_TOG:
+        mmio[index] ^= value;
+        break;
+    };
+}
+
+static const struct MemoryRegionOps imx8mp_set_clr_tog_ops = {
+    .read = imx8mp_set_clr_tog_read,
+    .write = imx8mp_set_clr_tog_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        /*
+         * Our device would not work correctly if the guest was doing
+         * unaligned access. This might not be a limitation on the real
+         * device but in practice there is no reason for a guest to access
+         * this device unaligned.
+         */
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+static void imx8mp_ccm_init(Object *obj)
+{
+    SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+    IMX8MPCCMState *s = IMX8MP_CCM(obj);
+
+    memory_region_init_io(&s->iomem,
+                          obj,
+                          &imx8mp_set_clr_tog_ops,
+                          s->ccm,
+                          TYPE_IMX8MP_CCM ".ccm",
+                          sizeof(s->ccm));
+
+    sysbus_init_mmio(sd, &s->iomem);
+}
+
+static const VMStateDescription imx8mp_ccm_vmstate = {
+    .name = TYPE_IMX8MP_CCM,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(ccm, IMX8MPCCMState, CCM_MAX),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static uint32_t imx8mp_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock)
+{
+    /*
+     * This function is "consumed" by GPT emulation code. Some clocks
+     * have fixed frequencies and we can provide requested frequency
+     * easily. However for CCM provided clocks (like IPG) each GPT
+     * timer can have its own clock root.
+     * This means we need additional information when calling this
+     * function to know the requester's identity.
+     */
+    uint32_t freq = 0;
+
+    switch (clock) {
+    case CLK_NONE:
+        break;
+    case CLK_32k:
+        freq = CKIL_FREQ;
+        break;
+    case CLK_HIGH:
+        freq = CKIH_FREQ;
+        break;
+    case CLK_IPG:
+    case CLK_IPG_HIGH:
+        /*
+         * For now we don't have a way to figure out the device this
+         * function is called for. Until then the IPG derived clocks
+         * are left unimplemented.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Clock %d Not implemented\n",
+                      TYPE_IMX8MP_CCM, __func__, clock);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: unsupported clock %d\n",
+                      TYPE_IMX8MP_CCM, __func__, clock);
+        break;
+    }
+
+    trace_ccm_clock_freq(clock, freq);
+
+    return freq;
+}
+
+static void imx8mp_ccm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    IMXCCMClass *ccm = IMX_CCM_CLASS(klass);
+
+    device_class_set_legacy_reset(dc, imx8mp_ccm_reset);
+    dc->vmsd  = &imx8mp_ccm_vmstate;
+    dc->desc  = "i.MX 8M Plus Clock Control Module";
+
+    ccm->get_clock_frequency = imx8mp_ccm_get_clock_frequency;
+}
+
+static const TypeInfo imx8mp_ccm_types[] = {
+    {
+        .name          = TYPE_IMX8MP_CCM,
+        .parent        = TYPE_IMX_CCM,
+        .instance_size = sizeof(IMX8MPCCMState),
+        .instance_init = imx8mp_ccm_init,
+        .class_init    = imx8mp_ccm_class_init,
+    },
+};
+
+DEFINE_TYPES(imx8mp_ccm_types);
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index edd36a334d..0b5187a2f7 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -55,6 +55,8 @@ system_ss.add(when: 'CONFIG_AXP2XX_PMU', if_true: files('axp2xx.c'))
 system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('arm_sysctl.c'))
 system_ss.add(when: 'CONFIG_ECCMEMCTL', if_true: files('eccmemctl.c'))
 system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_pmu.c', 'exynos4210_clk.c', 'exynos4210_rng.c'))
+system_ss.add(when: 'CONFIG_FSL_IMX8MP_ANALOG', if_true: files('imx8mp_analog.c'))
+system_ss.add(when: 'CONFIG_FSL_IMX8MP_CCM', if_true: files('imx8mp_ccm.c'))
 system_ss.add(when: 'CONFIG_IMX', if_true: files(
   'imx25_ccm.c',
   'imx31_ccm.c',
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 57e23d1b69..ce5188e7f2 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -12,6 +12,8 @@
 #include "cpu.h"
 #include "hw/char/imx_serial.h"
 #include "hw/intc/arm_gicv3_common.h"
+#include "hw/misc/imx8mp_analog.h"
+#include "hw/misc/imx8mp_ccm.h"
 #include "qom/object.h"
 #include "qemu/units.h"
 
@@ -32,6 +34,8 @@ struct FslImx8mpState {
 
     ARMCPU             cpu[FSL_IMX8MP_NUM_CPUS];
     GICv3State         gic;
+    IMX8MPCCMState     ccm;
+    IMX8MPAnalogState  analog;
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
 };
 
diff --git a/include/hw/misc/imx8mp_analog.h b/include/hw/misc/imx8mp_analog.h
new file mode 100644
index 0000000000..955f03215a
--- /dev/null
+++ b/include/hw/misc/imx8mp_analog.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX8MP ANALOG IP block emulation code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef IMX8MP_ANALOG_H
+#define IMX8MP_ANALOG_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+enum IMX8MPAnalogRegisters {
+    ANALOG_AUDIO_PLL1_GEN_CTRL = 0x000 / 4,
+    ANALOG_AUDIO_PLL1_FDIV_CTL0 = 0x004 / 4,
+    ANALOG_AUDIO_PLL1_FDIV_CTL1 = 0x008 / 4,
+    ANALOG_AUDIO_PLL1_SSCG_CTRL = 0x00c / 4,
+    ANALOG_AUDIO_PLL1_MNIT_CTRL = 0x010 / 4,
+    ANALOG_AUDIO_PLL2_GEN_CTRL = 0x014 / 4,
+    ANALOG_AUDIO_PLL2_FDIV_CTL0 = 0x018 / 4,
+    ANALOG_AUDIO_PLL2_FDIV_CTL1 = 0x01c / 4,
+    ANALOG_AUDIO_PLL2_SSCG_CTRL = 0x020 / 4,
+    ANALOG_AUDIO_PLL2_MNIT_CTRL = 0x024 / 4,
+    ANALOG_VIDEO_PLL1_GEN_CTRL = 0x028 / 4,
+    ANALOG_VIDEO_PLL1_FDIV_CTL0 = 0x02c / 4,
+    ANALOG_VIDEO_PLL1_FDIV_CTL1 = 0x030 / 4,
+    ANALOG_VIDEO_PLL1_SSCG_CTRL = 0x034 / 4,
+    ANALOG_VIDEO_PLL1_MNIT_CTRL = 0x038 / 4,
+    ANALOG_DRAM_PLL_GEN_CTRL = 0x050 / 4,
+    ANALOG_DRAM_PLL_FDIV_CTL0 = 0x054 / 4,
+    ANALOG_DRAM_PLL_FDIV_CTL1 = 0x058 / 4,
+    ANALOG_DRAM_PLL_SSCG_CTRL = 0x05c / 4,
+    ANALOG_DRAM_PLL_MNIT_CTRL = 0x060 / 4,
+    ANALOG_GPU_PLL_GEN_CTRL = 0x064 / 4,
+    ANALOG_GPU_PLL_FDIV_CTL0 = 0x068 / 4,
+    ANALOG_GPU_PLL_LOCKD_CTRL = 0x06c / 4,
+    ANALOG_GPU_PLL_MNIT_CTRL = 0x070 / 4,
+    ANALOG_VPU_PLL_GEN_CTRL = 0x074 / 4,
+    ANALOG_VPU_PLL_FDIV_CTL0 = 0x078 / 4,
+    ANALOG_VPU_PLL_LOCKD_CTRL = 0x07c / 4,
+    ANALOG_VPU_PLL_MNIT_CTRL = 0x080 / 4,
+    ANALOG_ARM_PLL_GEN_CTRL = 0x084 / 4,
+    ANALOG_ARM_PLL_FDIV_CTL0 = 0x088 / 4,
+    ANALOG_ARM_PLL_LOCKD_CTRL = 0x08c / 4,
+    ANALOG_ARM_PLL_MNIT_CTRL = 0x090 / 4,
+    ANALOG_SYS_PLL1_GEN_CTRL = 0x094 / 4,
+    ANALOG_SYS_PLL1_FDIV_CTL0 = 0x098 / 4,
+    ANALOG_SYS_PLL1_LOCKD_CTRL = 0x09c / 4,
+    ANALOG_SYS_PLL1_MNIT_CTRL = 0x100 / 4,
+    ANALOG_SYS_PLL2_GEN_CTRL = 0x104 / 4,
+    ANALOG_SYS_PLL2_FDIV_CTL0 = 0x108 / 4,
+    ANALOG_SYS_PLL2_LOCKD_CTRL = 0x10c / 4,
+    ANALOG_SYS_PLL2_MNIT_CTRL = 0x110 / 4,
+    ANALOG_SYS_PLL3_GEN_CTRL = 0x114 / 4,
+    ANALOG_SYS_PLL3_FDIV_CTL0 = 0x118 / 4,
+    ANALOG_SYS_PLL3_LOCKD_CTRL = 0x11c / 4,
+    ANALOG_SYS_PLL3_MNIT_CTRL = 0x120 / 4,
+    ANALOG_OSC_MISC_CFG = 0x124 / 4,
+    ANALOG_ANAMIX_PLL_MNIT_CTL = 0x128 / 4,
+
+    ANALOG_DIGPROG = 0x800 / 4,
+    ANALOG_MAX,
+};
+
+#define TYPE_IMX8MP_ANALOG "imx8mp.analog"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX8MPAnalogState, IMX8MP_ANALOG)
+
+struct IMX8MPAnalogState {
+    SysBusDevice parent_obj;
+
+    struct {
+        MemoryRegion container;
+        MemoryRegion analog;
+    } mmio;
+
+    uint32_t analog[ANALOG_MAX];
+};
+
+#endif /* IMX8MP_ANALOG_H */
diff --git a/include/hw/misc/imx8mp_ccm.h b/include/hw/misc/imx8mp_ccm.h
new file mode 100644
index 0000000000..685c8582ff
--- /dev/null
+++ b/include/hw/misc/imx8mp_ccm.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * i.MX 8M Plus CCM IP block emulation code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef IMX8MP_CCM_H
+#define IMX8MP_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+enum IMX8MPCCMRegisters {
+    CCM_MAX = 0xc6fc / sizeof(uint32_t) + 1,
+};
+
+#define TYPE_IMX8MP_CCM "imx8mp.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX8MPCCMState, IMX8MP_CCM)
+
+struct IMX8MPCCMState {
+    IMXCCMState parent_obj;
+
+    MemoryRegion iomem;
+
+    uint32_t ccm[CCM_MAX];
+};
+
+#endif /* IMX8MP_CCM_H */

From 487967bed65083db33561edc1255ced422bfbff5 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:56 +0100
Subject: [PATCH 33/43] hw/arm/fsl-imx8mp: Add SNVS

SNVS contains an RTC which allows Linux to deal correctly with time. This is
particularly useful when handling persistent storage which will be done in the
next patch.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-7-shentey@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/fsl-imx8mp.c            | 10 ++++++++++
 include/hw/arm/fsl-imx8mp.h    |  2 ++
 3 files changed, 13 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index f0df346113..22541c5442 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -12,6 +12,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * Up to 4 Cortex-A53 cores
  * Generic Interrupt Controller (GICv3)
  * 4 UARTs
+ * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
 Boot options
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index bc15b25ca1..18c9c54ddc 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -201,6 +201,8 @@ static void fsl_imx8mp_init(Object *obj)
 
     object_initialize_child(obj, "analog", &s->analog, TYPE_IMX8MP_ANALOG);
 
+    object_initialize_child(obj, "snvs", &s->snvs, TYPE_IMX7_SNVS);
+
     for (i = 0; i < FSL_IMX8MP_NUM_UARTS; i++) {
         g_autofree char *name = g_strdup_printf("uart%d", i + 1);
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
@@ -344,6 +346,13 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, serial_table[i].irq));
     }
 
+    /* SNVS */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->snvs), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->snvs), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_SNVS_HP].addr);
+
     /* Unimplemented devices */
     for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
         switch (i) {
@@ -352,6 +361,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
         case FSL_IMX8MP_RAM:
+        case FSL_IMX8MP_SNVS_HP:
         case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
             /* device implemented and treated above */
             break;
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index ce5188e7f2..26e24e99a1 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -12,6 +12,7 @@
 #include "cpu.h"
 #include "hw/char/imx_serial.h"
 #include "hw/intc/arm_gicv3_common.h"
+#include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
 #include "hw/misc/imx8mp_ccm.h"
 #include "qom/object.h"
@@ -36,6 +37,7 @@ struct FslImx8mpState {
     GICv3State         gic;
     IMX8MPCCMState     ccm;
     IMX8MPAnalogState  analog;
+    IMX7SNVSState      snvs;
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
 };
 

From a81193c3e9a8220862120d8d4114191f3899f4b3 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:57 +0100
Subject: [PATCH 34/43] hw/arm/fsl-imx8mp: Add USDHC storage controllers

The USDHC emulation allows for running real-world images such as those generated
by Buildroot. Convert the board documentation accordingly instead of running a
Linux kernel with ephemeral storage.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-8-shentey@gmail.com
[PMM: drop 'static const' from usdhc_table[] for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst | 18 ++++++++++++------
 hw/arm/Kconfig                 |  1 +
 hw/arm/fsl-imx8mp.c            | 28 ++++++++++++++++++++++++++++
 hw/arm/imx8mp-evk.c            | 18 ++++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    |  7 +++++++
 5 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 22541c5442..879c822356 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -12,6 +12,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * Up to 4 Cortex-A53 cores
  * Generic Interrupt Controller (GICv3)
  * 4 UARTs
+ * 3 USDHC Storage Controllers
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
@@ -26,18 +27,23 @@ Direct Linux Kernel Boot
 
 Probably the easiest way to get started with a whole Linux system on the machine
 is to generate an image with Buildroot. Version 2024.11.1 is tested at the time
-of writing and involves two steps. First run the following commands in the
+of writing and involves three steps. First run the following commands in the
 toplevel directory of the Buildroot source tree:
 
 .. code-block:: bash
 
-  $ echo "BR2_TARGET_ROOTFS_CPIO=y" >> configs/freescale_imx8mpevk_defconfig
   $ make freescale_imx8mpevk_defconfig
   $ make
 
 Once finished successfully there is an ``output/image`` subfolder. Navigate into
-it and patch the device tree with the following commands which will remove the
-``cpu-idle-states`` properties from CPU nodes:
+it and resize the SD card image to a power of two:
+
+.. code-block:: bash
+
+  $ qemu-img resize sdcard.img 256M
+
+Finally, the device tree needs to be patched with the following commands which
+will remove the ``cpu-idle-states`` properties from CPU nodes:
 
 .. code-block:: bash
 
@@ -52,5 +58,5 @@ Now that everything is prepared the machine can be started as follows:
       -display none -serial null -serial stdio \
       -kernel Image \
       -dtb imx8mp-evk-patched.dtb \
-      -initrd rootfs.cpio \
-      -append "root=/dev/ram"
+      -append "root=/dev/mmcblk2p2" \
+      -drive file=sdcard.img,if=sd,bus=2,format=raw,id=mmcblk2
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index f77c451ba3..d2dda3213d 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -599,6 +599,7 @@ config FSL_IMX8MP
     select FSL_IMX8MP_ANALOG
     select FSL_IMX8MP_CCM
     select IMX
+    select SDHCI
     select UNIMP
 
 config FSL_IMX8MP_EVK
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 18c9c54ddc..da9eaeb6ff 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -207,6 +207,11 @@ static void fsl_imx8mp_init(Object *obj)
         g_autofree char *name = g_strdup_printf("uart%d", i + 1);
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
     }
+
+    for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        g_autofree char *name = g_strdup_printf("usdhc%d", i + 1);
+        object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
+    }
 }
 
 static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
@@ -346,6 +351,28 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, serial_table[i].irq));
     }
 
+    /* USDHCs */
+    for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } usdhc_table[FSL_IMX8MP_NUM_USDHCS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC1].addr, FSL_IMX8MP_USDHC1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC2].addr, FSL_IMX8MP_USDHC2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USDHC3].addr, FSL_IMX8MP_USDHC3_IRQ },
+        };
+
+        object_property_set_uint(OBJECT(&s->usdhc[i]), "vendor",
+                                 SDHCI_VENDOR_IMX, &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0, usdhc_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
+                           qdev_get_gpio_in(gicdev, usdhc_table[i].irq));
+    }
+
     /* SNVS */
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->snvs), errp)) {
         return;
@@ -363,6 +390,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_RAM:
         case FSL_IMX8MP_SNVS_HP:
         case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
+        case FSL_IMX8MP_USDHC1 ... FSL_IMX8MP_USDHC3:
             /* device implemented and treated above */
             break;
 
diff --git a/hw/arm/imx8mp-evk.c b/hw/arm/imx8mp-evk.c
index 2756d4c21c..27d9e9e8ee 100644
--- a/hw/arm/imx8mp-evk.c
+++ b/hw/arm/imx8mp-evk.c
@@ -11,6 +11,7 @@
 #include "hw/arm/boot.h"
 #include "hw/arm/fsl-imx8mp.h"
 #include "hw/boards.h"
+#include "hw/qdev-properties.h"
 #include "system/qtest.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -40,6 +41,23 @@ static void imx8mp_evk_init(MachineState *machine)
     memory_region_add_subregion(get_system_memory(), FSL_IMX8MP_RAM_START,
                                 machine->ram);
 
+    for (int i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
+        BusState *bus;
+        DeviceState *carddev;
+        BlockBackend *blk;
+        DriveInfo *di = drive_get(IF_SD, i, 0);
+
+        if (!di) {
+            continue;
+        }
+
+        blk = blk_by_legacy_dinfo(di);
+        bus = qdev_get_child_bus(DEVICE(&s->usdhc[i]), "sd-bus");
+        carddev = qdev_new(TYPE_SD_CARD);
+        qdev_prop_set_drive_err(carddev, "drive", blk, &error_fatal);
+        qdev_realize_and_unref(carddev, bus, &error_fatal);
+    }
+
     if (!qtest_enabled()) {
         arm_load_kernel(&s->cpu[0], machine, &boot_info);
     }
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 26e24e99a1..349d55ca88 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -15,6 +15,7 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
 #include "hw/misc/imx8mp_ccm.h"
+#include "hw/sd/sdhci.h"
 #include "qom/object.h"
 #include "qemu/units.h"
 
@@ -28,6 +29,7 @@ enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_CPUS         = 4,
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
+    FSL_IMX8MP_NUM_USDHCS       = 3,
 };
 
 struct FslImx8mpState {
@@ -39,6 +41,7 @@ struct FslImx8mpState {
     IMX8MPAnalogState  analog;
     IMX7SNVSState      snvs;
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
+    SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
 };
 
 enum FslImx8mpMemoryRegions {
@@ -184,6 +187,10 @@ enum FslImx8mpMemoryRegions {
 };
 
 enum FslImx8mpIrqs {
+    FSL_IMX8MP_USDHC1_IRQ   = 22,
+    FSL_IMX8MP_USDHC2_IRQ   = 23,
+    FSL_IMX8MP_USDHC3_IRQ   = 24,
+
     FSL_IMX8MP_UART1_IRQ    = 26,
     FSL_IMX8MP_UART2_IRQ    = 27,
     FSL_IMX8MP_UART3_IRQ    = 28,

From fd1deb5301f89eb86c0eecadb670beb98aa74ac5 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:58 +0100
Subject: [PATCH 35/43] hw/arm/fsl-imx8mp: Add PCIe support

Linux checks for the PLLs in the PHY to be locked, so implement a model
emulating that.

Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-9-shentey@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 MAINTAINERS                         |  2 +
 docs/system/arm/imx8mp-evk.rst      |  1 +
 hw/arm/Kconfig                      |  3 +
 hw/arm/fsl-imx8mp.c                 | 30 +++++++++
 hw/pci-host/Kconfig                 |  3 +
 hw/pci-host/fsl_imx8m_phy.c         | 98 +++++++++++++++++++++++++++++
 hw/pci-host/meson.build             |  1 +
 include/hw/arm/fsl-imx8mp.h         | 10 +++
 include/hw/pci-host/fsl_imx8m_phy.h | 28 +++++++++
 9 files changed, 176 insertions(+)
 create mode 100644 hw/pci-host/fsl_imx8m_phy.c
 create mode 100644 include/hw/pci-host/fsl_imx8m_phy.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 8ea7fb4c7a..2e7fc6fa91 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -827,8 +827,10 @@ S: Maintained
 F: hw/arm/imx8mp-evk.c
 F: hw/arm/fsl-imx8mp.c
 F: hw/misc/imx8mp_*.c
+F: hw/pci-host/fsl_imx8m_phy.c
 F: include/hw/arm/fsl-imx8mp.h
 F: include/hw/misc/imx8mp_*.h
+F: include/hw/pci-host/fsl_imx8m_phy.h
 F: docs/system/arm/imx8mp-evk.rst
 
 MPS2 / MPS3
diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 879c822356..18a8fdd278 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -13,6 +13,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * Generic Interrupt Controller (GICv3)
  * 4 UARTs
  * 3 USDHC Storage Controllers
+ * 1 Designware PCI Express Controller
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index d2dda3213d..be5a2c02b7 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -595,10 +595,13 @@ config FSL_IMX7
 
 config FSL_IMX8MP
     bool
+    imply PCI_DEVICES
     select ARM_GIC
     select FSL_IMX8MP_ANALOG
     select FSL_IMX8MP_CCM
     select IMX
+    select PCI_EXPRESS_DESIGNWARE
+    select PCI_EXPRESS_FSL_IMX8M_PHY
     select SDHCI
     select UNIMP
 
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index da9eaeb6ff..1ee681ac1d 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -212,6 +212,10 @@ static void fsl_imx8mp_init(Object *obj)
         g_autofree char *name = g_strdup_printf("usdhc%d", i + 1);
         object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
     }
+
+    object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
+    object_initialize_child(obj, "pcie_phy", &s->pcie_phy,
+                            TYPE_FSL_IMX8M_PCIE_PHY);
 }
 
 static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
@@ -380,6 +384,30 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->snvs), 0,
                     fsl_imx8mp_memmap[FSL_IMX8MP_SNVS_HP].addr);
 
+    /* PCIe */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_PCIE1].addr);
+
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 0,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTA_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 1,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTB_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 2,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTC_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 3,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_INTD_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie), 4,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_PCI_MSI_IRQ));
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie_phy), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie_phy), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_PCIE_PHY1].addr);
+
     /* Unimplemented devices */
     for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
         switch (i) {
@@ -387,6 +415,8 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_CCM:
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
+        case FSL_IMX8MP_PCIE1:
+        case FSL_IMX8MP_PCIE_PHY1:
         case FSL_IMX8MP_RAM:
         case FSL_IMX8MP_SNVS_HP:
         case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index c91880b237..35c0415242 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -99,6 +99,9 @@ config ASTRO
     bool
     select PCI
 
+config PCI_EXPRESS_FSL_IMX8M_PHY
+    bool
+
 config GT64120
     bool
     select PCI
diff --git a/hw/pci-host/fsl_imx8m_phy.c b/hw/pci-host/fsl_imx8m_phy.c
new file mode 100644
index 0000000000..aa304b102b
--- /dev/null
+++ b/hw/pci-host/fsl_imx8m_phy.c
@@ -0,0 +1,98 @@
+/*
+ * i.MX8 PCIe PHY emulation
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci-host/fsl_imx8m_phy.h"
+#include "hw/resettable.h"
+#include "migration/vmstate.h"
+
+#define CMN_REG075 0x1d4
+#define ANA_PLL_LOCK_DONE BIT(1)
+#define ANA_PLL_AFC_DONE BIT(0)
+
+static uint64_t fsl_imx8m_pcie_phy_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    FslImx8mPciePhyState *s = opaque;
+
+    if (offset == CMN_REG075) {
+        return s->data[offset] | ANA_PLL_LOCK_DONE | ANA_PLL_AFC_DONE;
+    }
+
+    return s->data[offset];
+}
+
+static void fsl_imx8m_pcie_phy_write(void *opaque, hwaddr offset,
+                                     uint64_t value, unsigned size)
+{
+    FslImx8mPciePhyState *s = opaque;
+
+    s->data[offset] = value;
+}
+
+static const MemoryRegionOps fsl_imx8m_pcie_phy_ops = {
+    .read = fsl_imx8m_pcie_phy_read,
+    .write = fsl_imx8m_pcie_phy_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void fsl_imx8m_pcie_phy_realize(DeviceState *dev, Error **errp)
+{
+    FslImx8mPciePhyState *s = FSL_IMX8M_PCIE_PHY(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &fsl_imx8m_pcie_phy_ops, s,
+                          TYPE_FSL_IMX8M_PCIE_PHY, ARRAY_SIZE(s->data));
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void fsl_imx8m_pcie_phy_reset_hold(Object *obj, ResetType type)
+{
+    FslImx8mPciePhyState *s = FSL_IMX8M_PCIE_PHY(obj);
+
+    memset(s->data, 0, sizeof(s->data));
+}
+
+static const VMStateDescription fsl_imx8m_pcie_phy_vmstate = {
+    .name = "fsl-imx8m-pcie-phy",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (const VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(data, FslImx8mPciePhyState,
+                            FSL_IMX8M_PCIE_PHY_DATA_SIZE),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void fsl_imx8m_pcie_phy_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->realize = fsl_imx8m_pcie_phy_realize;
+    dc->vmsd = &fsl_imx8m_pcie_phy_vmstate;
+    rc->phases.hold = fsl_imx8m_pcie_phy_reset_hold;
+}
+
+static const TypeInfo fsl_imx8m_pcie_phy_types[] = {
+    {
+        .name = TYPE_FSL_IMX8M_PCIE_PHY,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(FslImx8mPciePhyState),
+        .class_init = fsl_imx8m_pcie_phy_class_init,
+    }
+};
+
+DEFINE_TYPES(fsl_imx8m_pcie_phy_types)
diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index 3001e93a43..937a0f72ac 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -28,6 +28,7 @@ pci_ss.add(when: 'CONFIG_ARTICIA', if_true: files('articia.c'))
 pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c'))
 
 # ARM devices
+pci_ss.add(when: 'CONFIG_PCI_EXPRESS_FSL_IMX8M_PHY', if_true: files('fsl_imx8m_phy.c'))
 pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c'))
 
 # HPPA devices
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 349d55ca88..4c70c887a8 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -15,6 +15,8 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
 #include "hw/misc/imx8mp_ccm.h"
+#include "hw/pci-host/designware.h"
+#include "hw/pci-host/fsl_imx8m_phy.h"
 #include "hw/sd/sdhci.h"
 #include "qom/object.h"
 #include "qemu/units.h"
@@ -42,6 +44,8 @@ struct FslImx8mpState {
     IMX7SNVSState      snvs;
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
+    DesignwarePCIEHost pcie;
+    FslImx8mPciePhyState   pcie_phy;
 };
 
 enum FslImx8mpMemoryRegions {
@@ -197,6 +201,12 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_UART4_IRQ    = 29,
     FSL_IMX8MP_UART5_IRQ    = 30,
     FSL_IMX8MP_UART6_IRQ    = 16,
+
+    FSL_IMX8MP_PCI_INTA_IRQ = 126,
+    FSL_IMX8MP_PCI_INTB_IRQ = 125,
+    FSL_IMX8MP_PCI_INTC_IRQ = 124,
+    FSL_IMX8MP_PCI_INTD_IRQ = 123,
+    FSL_IMX8MP_PCI_MSI_IRQ  = 140,
 };
 
 #endif /* FSL_IMX8MP_H */
diff --git a/include/hw/pci-host/fsl_imx8m_phy.h b/include/hw/pci-host/fsl_imx8m_phy.h
new file mode 100644
index 0000000000..4f4875b37d
--- /dev/null
+++ b/include/hw/pci-host/fsl_imx8m_phy.h
@@ -0,0 +1,28 @@
+/*
+ * i.MX8 PCIe PHY emulation
+ *
+ * Copyright (c) 2025 Bernhard Beschow <shentey@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PCIHOST_FSLIMX8MPCIEPHY_H
+#define HW_PCIHOST_FSLIMX8MPCIEPHY_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "exec/memory.h"
+
+#define TYPE_FSL_IMX8M_PCIE_PHY "fsl-imx8m-pcie-phy"
+OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mPciePhyState, FSL_IMX8M_PCIE_PHY)
+
+#define FSL_IMX8M_PCIE_PHY_DATA_SIZE 0x800
+
+struct FslImx8mPciePhyState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint8_t data[FSL_IMX8M_PCIE_PHY_DATA_SIZE];
+};
+
+#endif

From a17c1d932ec6ae1a3364eaf34c0660f01f806267 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:46:59 +0100
Subject: [PATCH 36/43] hw/arm/fsl-imx8mp: Add GPIO controllers

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-10-shentey@gmail.com
[PMM: drop static const from gpio_table for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/fsl-imx8mp.c            | 55 ++++++++++++++++++++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    | 14 +++++++++
 3 files changed, 70 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 18a8fdd278..37d3630d09 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -14,6 +14,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 4 UARTs
  * 3 USDHC Storage Controllers
  * 1 Designware PCI Express Controller
+ * 5 GPIO Controllers
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 1ee681ac1d..541e4ab5b6 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -208,6 +208,11 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
+        g_autofree char *name = g_strdup_printf("gpio%d", i + 1);
+        object_initialize_child(obj, name, &s->gpio[i], TYPE_IMX_GPIO);
+    }
+
     for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
         g_autofree char *name = g_strdup_printf("usdhc%d", i + 1);
         object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
@@ -355,6 +360,55 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, serial_table[i].irq));
     }
 
+    /* GPIOs */
+    for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq_low;
+            unsigned int irq_high;
+        } gpio_table[FSL_IMX8MP_NUM_GPIOS] = {
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO1].addr,
+                FSL_IMX8MP_GPIO1_LOW_IRQ,
+                FSL_IMX8MP_GPIO1_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO2].addr,
+                FSL_IMX8MP_GPIO2_LOW_IRQ,
+                FSL_IMX8MP_GPIO2_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO3].addr,
+                FSL_IMX8MP_GPIO3_LOW_IRQ,
+                FSL_IMX8MP_GPIO3_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO4].addr,
+                FSL_IMX8MP_GPIO4_LOW_IRQ,
+                FSL_IMX8MP_GPIO4_HIGH_IRQ
+            },
+            {
+                fsl_imx8mp_memmap[FSL_IMX8MP_GPIO5].addr,
+                FSL_IMX8MP_GPIO5_LOW_IRQ,
+                FSL_IMX8MP_GPIO5_HIGH_IRQ
+            },
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), "has-edge-sel", true,
+                                 &error_abort);
+        object_property_set_bool(OBJECT(&s->gpio[i]), "has-upper-pin-irq",
+                                 true, &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(gicdev, gpio_table[i].irq_low));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 1,
+                           qdev_get_gpio_in(gicdev, gpio_table[i].irq_high));
+    }
+
     /* USDHCs */
     for (i = 0; i < FSL_IMX8MP_NUM_USDHCS; i++) {
         struct {
@@ -415,6 +469,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_CCM:
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
+        case FSL_IMX8MP_GPIO1 ... FSL_IMX8MP_GPIO5:
         case FSL_IMX8MP_PCIE1:
         case FSL_IMX8MP_PCIE_PHY1:
         case FSL_IMX8MP_RAM:
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 4c70c887a8..18ea52d083 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -11,6 +11,7 @@
 
 #include "cpu.h"
 #include "hw/char/imx_serial.h"
+#include "hw/gpio/imx_gpio.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
@@ -29,6 +30,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mpState, FSL_IMX8MP)
 
 enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_CPUS         = 4,
+    FSL_IMX8MP_NUM_GPIOS        = 5,
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
     FSL_IMX8MP_NUM_USDHCS       = 3,
@@ -39,6 +41,7 @@ struct FslImx8mpState {
 
     ARMCPU             cpu[FSL_IMX8MP_NUM_CPUS];
     GICv3State         gic;
+    IMXGPIOState       gpio[FSL_IMX8MP_NUM_GPIOS];
     IMX8MPCCMState     ccm;
     IMX8MPAnalogState  analog;
     IMX7SNVSState      snvs;
@@ -202,6 +205,17 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_UART5_IRQ    = 30,
     FSL_IMX8MP_UART6_IRQ    = 16,
 
+    FSL_IMX8MP_GPIO1_LOW_IRQ  = 64,
+    FSL_IMX8MP_GPIO1_HIGH_IRQ = 65,
+    FSL_IMX8MP_GPIO2_LOW_IRQ  = 66,
+    FSL_IMX8MP_GPIO2_HIGH_IRQ = 67,
+    FSL_IMX8MP_GPIO3_LOW_IRQ  = 68,
+    FSL_IMX8MP_GPIO3_HIGH_IRQ = 69,
+    FSL_IMX8MP_GPIO4_LOW_IRQ  = 70,
+    FSL_IMX8MP_GPIO4_HIGH_IRQ = 71,
+    FSL_IMX8MP_GPIO5_LOW_IRQ  = 72,
+    FSL_IMX8MP_GPIO5_HIGH_IRQ = 73,
+
     FSL_IMX8MP_PCI_INTA_IRQ = 126,
     FSL_IMX8MP_PCI_INTB_IRQ = 125,
     FSL_IMX8MP_PCI_INTC_IRQ = 124,

From 764f18afb2b749a9dcfd37bac5709e7a7bcd2589 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:00 +0100
Subject: [PATCH 37/43] hw/arm/fsl-imx8mp: Add I2C controllers

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-11-shentey@gmail.com
[PMM: drop static const from i2c_table for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/Kconfig                 |  2 ++
 hw/arm/fsl-imx8mp.c            | 29 +++++++++++++++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    | 11 +++++++++++
 4 files changed, 43 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 37d3630d09..ef0d997250 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -15,6 +15,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 3 USDHC Storage Controllers
  * 1 Designware PCI Express Controller
  * 5 GPIO Controllers
+ * 6 I2C Controllers
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index be5a2c02b7..28ae409c85 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -595,11 +595,13 @@ config FSL_IMX7
 
 config FSL_IMX8MP
     bool
+    imply I2C_DEVICES
     imply PCI_DEVICES
     select ARM_GIC
     select FSL_IMX8MP_ANALOG
     select FSL_IMX8MP_CCM
     select IMX
+    select IMX_I2C
     select PCI_EXPRESS_DESIGNWARE
     select PCI_EXPRESS_FSL_IMX8M_PHY
     select SDHCI
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 541e4ab5b6..750dbf9eab 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -208,6 +208,11 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
+        g_autofree char *name = g_strdup_printf("i2c%d", i + 1);
+        object_initialize_child(obj, name, &s->i2c[i], TYPE_IMX_I2C);
+    }
+
     for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
         g_autofree char *name = g_strdup_printf("gpio%d", i + 1);
         object_initialize_child(obj, name, &s->gpio[i], TYPE_IMX_GPIO);
@@ -360,6 +365,29 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, serial_table[i].irq));
     }
 
+    /* I2Cs */
+    for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } i2c_table[FSL_IMX8MP_NUM_I2CS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C1].addr, FSL_IMX8MP_I2C1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C2].addr, FSL_IMX8MP_I2C2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C3].addr, FSL_IMX8MP_I2C3_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C4].addr, FSL_IMX8MP_I2C4_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C5].addr, FSL_IMX8MP_I2C5_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_I2C6].addr, FSL_IMX8MP_I2C6_IRQ },
+        };
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c[i]), 0, i2c_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c[i]), 0,
+                           qdev_get_gpio_in(gicdev, i2c_table[i].irq));
+    }
+
     /* GPIOs */
     for (i = 0; i < FSL_IMX8MP_NUM_GPIOS; i++) {
         struct {
@@ -470,6 +498,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
         case FSL_IMX8MP_GPIO1 ... FSL_IMX8MP_GPIO5:
+        case FSL_IMX8MP_I2C1 ... FSL_IMX8MP_I2C6:
         case FSL_IMX8MP_PCIE1:
         case FSL_IMX8MP_PCIE_PHY1:
         case FSL_IMX8MP_RAM:
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 18ea52d083..2590056627 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -12,6 +12,7 @@
 #include "cpu.h"
 #include "hw/char/imx_serial.h"
 #include "hw/gpio/imx_gpio.h"
+#include "hw/i2c/imx_i2c.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
@@ -31,6 +32,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mpState, FSL_IMX8MP)
 enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_CPUS         = 4,
     FSL_IMX8MP_NUM_GPIOS        = 5,
+    FSL_IMX8MP_NUM_I2CS         = 6,
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
     FSL_IMX8MP_NUM_USDHCS       = 3,
@@ -45,6 +47,7 @@ struct FslImx8mpState {
     IMX8MPCCMState     ccm;
     IMX8MPAnalogState  analog;
     IMX7SNVSState      snvs;
+    IMXI2CState        i2c[FSL_IMX8MP_NUM_I2CS];
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
     DesignwarePCIEHost pcie;
@@ -205,6 +208,11 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_UART5_IRQ    = 30,
     FSL_IMX8MP_UART6_IRQ    = 16,
 
+    FSL_IMX8MP_I2C1_IRQ     = 35,
+    FSL_IMX8MP_I2C2_IRQ     = 36,
+    FSL_IMX8MP_I2C3_IRQ     = 37,
+    FSL_IMX8MP_I2C4_IRQ     = 38,
+
     FSL_IMX8MP_GPIO1_LOW_IRQ  = 64,
     FSL_IMX8MP_GPIO1_HIGH_IRQ = 65,
     FSL_IMX8MP_GPIO2_LOW_IRQ  = 66,
@@ -216,6 +224,9 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_GPIO5_LOW_IRQ  = 72,
     FSL_IMX8MP_GPIO5_HIGH_IRQ = 73,
 
+    FSL_IMX8MP_I2C5_IRQ     = 76,
+    FSL_IMX8MP_I2C6_IRQ     = 77,
+
     FSL_IMX8MP_PCI_INTA_IRQ = 126,
     FSL_IMX8MP_PCI_INTB_IRQ = 125,
     FSL_IMX8MP_PCI_INTC_IRQ = 124,

From 06908a84f036d7cefb834f8d67cf8b80a1791838 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:01 +0100
Subject: [PATCH 38/43] hw/arm/fsl-imx8mp: Add SPI controllers

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-12-shentey@gmail.com
[PMM: drop static const from spi_table for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/fsl-imx8mp.c            | 26 ++++++++++++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    |  8 ++++++++
 3 files changed, 35 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index ef0d997250..66e5865107 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -16,6 +16,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 1 Designware PCI Express Controller
  * 5 GPIO Controllers
  * 6 I2C Controllers
+ * 3 SPI Controllers
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 750dbf9eab..63f07eca8a 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -223,6 +223,11 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
+        g_autofree char *name = g_strdup_printf("spi%d", i + 1);
+        object_initialize_child(obj, name, &s->spi[i], TYPE_IMX_SPI);
+    }
+
     object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
     object_initialize_child(obj, "pcie_phy", &s->pcie_phy,
                             TYPE_FSL_IMX8M_PCIE_PHY);
@@ -459,6 +464,26 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, usdhc_table[i].irq));
     }
 
+    /* ECSPIs */
+    for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } spi_table[FSL_IMX8MP_NUM_ECSPIS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI1].addr, FSL_IMX8MP_ECSPI1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI2].addr, FSL_IMX8MP_ECSPI2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_ECSPI3].addr, FSL_IMX8MP_ECSPI3_IRQ },
+        };
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, spi_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->spi[i]), 0,
+                           qdev_get_gpio_in(gicdev, spi_table[i].irq));
+    }
+
     /* SNVS */
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->snvs), errp)) {
         return;
@@ -498,6 +523,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_GIC_DIST:
         case FSL_IMX8MP_GIC_REDIST:
         case FSL_IMX8MP_GPIO1 ... FSL_IMX8MP_GPIO5:
+        case FSL_IMX8MP_ECSPI1 ... FSL_IMX8MP_ECSPI3:
         case FSL_IMX8MP_I2C1 ... FSL_IMX8MP_I2C6:
         case FSL_IMX8MP_PCIE1:
         case FSL_IMX8MP_PCIE_PHY1:
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 2590056627..296a87eb50 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -20,6 +20,7 @@
 #include "hw/pci-host/designware.h"
 #include "hw/pci-host/fsl_imx8m_phy.h"
 #include "hw/sd/sdhci.h"
+#include "hw/ssi/imx_spi.h"
 #include "qom/object.h"
 #include "qemu/units.h"
 
@@ -31,6 +32,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(FslImx8mpState, FSL_IMX8MP)
 
 enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_CPUS         = 4,
+    FSL_IMX8MP_NUM_ECSPIS       = 3,
     FSL_IMX8MP_NUM_GPIOS        = 5,
     FSL_IMX8MP_NUM_I2CS         = 6,
     FSL_IMX8MP_NUM_IRQS         = 160,
@@ -47,6 +49,7 @@ struct FslImx8mpState {
     IMX8MPCCMState     ccm;
     IMX8MPAnalogState  analog;
     IMX7SNVSState      snvs;
+    IMXSPIState        spi[FSL_IMX8MP_NUM_ECSPIS];
     IMXI2CState        i2c[FSL_IMX8MP_NUM_I2CS];
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
@@ -208,6 +211,11 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_UART5_IRQ    = 30,
     FSL_IMX8MP_UART6_IRQ    = 16,
 
+    FSL_IMX8MP_ECSPI1_IRQ   = 31,
+    FSL_IMX8MP_ECSPI2_IRQ   = 32,
+    FSL_IMX8MP_ECSPI3_IRQ   = 33,
+    FSL_IMX8MP_ECSPI4_IRQ   = 34,
+
     FSL_IMX8MP_I2C1_IRQ     = 35,
     FSL_IMX8MP_I2C2_IRQ     = 36,
     FSL_IMX8MP_I2C3_IRQ     = 37,

From 1ac21eb8fbb0297716a6c525e91196a247302b2b Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:02 +0100
Subject: [PATCH 39/43] hw/arm/fsl-imx8mp: Add watchdog support

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-13-shentey@gmail.com
[PMM: drop static const from wdog_table for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/Kconfig                 |  1 +
 hw/arm/fsl-imx8mp.c            | 28 ++++++++++++++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    |  7 +++++++
 4 files changed, 37 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 66e5865107..904de9aa7d 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -17,6 +17,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 5 GPIO Controllers
  * 6 I2C Controllers
  * 3 SPI Controllers
+ * 3 Watchdogs
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 28ae409c85..98ac93a23f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -606,6 +606,7 @@ config FSL_IMX8MP
     select PCI_EXPRESS_FSL_IMX8M_PHY
     select SDHCI
     select UNIMP
+    select WDT_IMX2
 
 config FSL_IMX8MP_EVK
     bool
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 63f07eca8a..762f2a52d8 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -228,6 +228,11 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->spi[i], TYPE_IMX_SPI);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_WDTS; i++) {
+        g_autofree char *name = g_strdup_printf("wdt%d", i);
+        object_initialize_child(obj, name, &s->wdt[i], TYPE_IMX2_WDT);
+    }
+
     object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
     object_initialize_child(obj, "pcie_phy", &s->pcie_phy,
                             TYPE_FSL_IMX8M_PCIE_PHY);
@@ -491,6 +496,28 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->snvs), 0,
                     fsl_imx8mp_memmap[FSL_IMX8MP_SNVS_HP].addr);
 
+    /* Watchdogs */
+    for (i = 0; i < FSL_IMX8MP_NUM_WDTS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } wdog_table[FSL_IMX8MP_NUM_WDTS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG1].addr, FSL_IMX8MP_WDOG1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG2].addr, FSL_IMX8MP_WDOG2_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_WDOG3].addr, FSL_IMX8MP_WDOG3_IRQ },
+        };
+
+        object_property_set_bool(OBJECT(&s->wdt[i]), "pretimeout-support",
+                                 true, &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt[i]), 0, wdog_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->wdt[i]), 0,
+                           qdev_get_gpio_in(gicdev, wdog_table[i].irq));
+    }
+
     /* PCIe */
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie), errp)) {
         return;
@@ -531,6 +558,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_SNVS_HP:
         case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
         case FSL_IMX8MP_USDHC1 ... FSL_IMX8MP_USDHC3:
+        case FSL_IMX8MP_WDOG1 ... FSL_IMX8MP_WDOG3:
             /* device implemented and treated above */
             break;
 
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 296a87eb50..dfbdc6ac7f 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -21,6 +21,7 @@
 #include "hw/pci-host/fsl_imx8m_phy.h"
 #include "hw/sd/sdhci.h"
 #include "hw/ssi/imx_spi.h"
+#include "hw/watchdog/wdt_imx2.h"
 #include "qom/object.h"
 #include "qemu/units.h"
 
@@ -38,6 +39,7 @@ enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
     FSL_IMX8MP_NUM_USDHCS       = 3,
+    FSL_IMX8MP_NUM_WDTS         = 3,
 };
 
 struct FslImx8mpState {
@@ -53,6 +55,7 @@ struct FslImx8mpState {
     IMXI2CState        i2c[FSL_IMX8MP_NUM_I2CS];
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
+    IMX2WdtState       wdt[FSL_IMX8MP_NUM_WDTS];
     DesignwarePCIEHost pcie;
     FslImx8mPciePhyState   pcie_phy;
 };
@@ -235,6 +238,10 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_I2C5_IRQ     = 76,
     FSL_IMX8MP_I2C6_IRQ     = 77,
 
+    FSL_IMX8MP_WDOG1_IRQ    = 78,
+    FSL_IMX8MP_WDOG2_IRQ    = 79,
+    FSL_IMX8MP_WDOG3_IRQ    = 10,
+
     FSL_IMX8MP_PCI_INTA_IRQ = 126,
     FSL_IMX8MP_PCI_INTB_IRQ = 125,
     FSL_IMX8MP_PCI_INTC_IRQ = 124,

From f8b26121762c17af9869b0ec7ccbda6df4ea37f8 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:03 +0100
Subject: [PATCH 40/43] hw/arm/fsl-imx8mp: Implement general purpose timers

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-14-shentey@gmail.com
[PMM: drop static const from gpt_attrs for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/Kconfig                 |  1 +
 hw/arm/fsl-imx8mp.c            | 53 ++++++++++++++++++++++++++++++++++
 hw/timer/imx_gpt.c             | 25 ++++++++++++++++
 include/hw/arm/fsl-imx8mp.h    | 11 +++++++
 include/hw/timer/imx_gpt.h     |  1 +
 6 files changed, 92 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 904de9aa7d..4b195c917f 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -18,6 +18,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 6 I2C Controllers
  * 3 SPI Controllers
  * 3 Watchdogs
+ * 6 General Purpose Timers
  * Secure Non-Volatile Storage (SNVS) including an RTC
  * Clock Tree
 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 98ac93a23f..4e83895b91 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -602,6 +602,7 @@ config FSL_IMX8MP
     select FSL_IMX8MP_CCM
     select IMX
     select IMX_I2C
+    select OR_IRQ
     select PCI_EXPRESS_DESIGNWARE
     select PCI_EXPRESS_FSL_IMX8M_PHY
     select SDHCI
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 762f2a52d8..185c32ee58 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -208,6 +208,13 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->uart[i], TYPE_IMX_SERIAL);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_GPTS; i++) {
+        g_autofree char *name = g_strdup_printf("gpt%d", i + 1);
+        object_initialize_child(obj, name, &s->gpt[i], TYPE_IMX8MP_GPT);
+    }
+    object_initialize_child(obj, "gpt5-gpt6-irq", &s->gpt5_gpt6_irq,
+                            TYPE_OR_IRQ);
+
     for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
         g_autofree char *name = g_strdup_printf("i2c%d", i + 1);
         object_initialize_child(obj, name, &s->i2c[i], TYPE_IMX_I2C);
@@ -375,6 +382,52 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, serial_table[i].irq));
     }
 
+    /* GPTs */
+    object_property_set_int(OBJECT(&s->gpt5_gpt6_irq), "num-lines", 2,
+                            &error_abort);
+    if (!qdev_realize(DEVICE(&s->gpt5_gpt6_irq), NULL, errp)) {
+        return;
+    }
+
+    qdev_connect_gpio_out(DEVICE(&s->gpt5_gpt6_irq), 0,
+                          qdev_get_gpio_in(gicdev, FSL_IMX8MP_GPT5_GPT6_IRQ));
+
+    for (i = 0; i < FSL_IMX8MP_NUM_GPTS; i++) {
+        hwaddr gpt_addrs[FSL_IMX8MP_NUM_GPTS] = {
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT1].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT2].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT3].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT4].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT5].addr,
+            fsl_imx8mp_memmap[FSL_IMX8MP_GPT6].addr,
+        };
+
+        s->gpt[i].ccm = IMX_CCM(&s->ccm);
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpt[i]), errp)) {
+            return;
+        }
+
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpt[i]), 0, gpt_addrs[i]);
+
+        if (i < FSL_IMX8MP_NUM_GPTS - 2) {
+            static const unsigned int gpt_irqs[FSL_IMX8MP_NUM_GPTS - 2] = {
+                FSL_IMX8MP_GPT1_IRQ,
+                FSL_IMX8MP_GPT2_IRQ,
+                FSL_IMX8MP_GPT3_IRQ,
+                FSL_IMX8MP_GPT4_IRQ,
+            };
+
+            sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpt[i]), 0,
+                               qdev_get_gpio_in(gicdev, gpt_irqs[i]));
+        } else {
+            int irq = i - FSL_IMX8MP_NUM_GPTS + 2;
+
+            sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpt[i]), 0,
+                               qdev_get_gpio_in(DEVICE(&s->gpt5_gpt6_irq), irq));
+        }
+    }
+
     /* I2Cs */
     for (i = 0; i < FSL_IMX8MP_NUM_I2CS; i++) {
         struct {
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 11eca9fa4d..200a89225b 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -126,6 +126,17 @@ static const IMXClk imx7_gpt_clocks[] = {
     CLK_NONE,      /* 111 not defined */
 };
 
+static const IMXClk imx8mp_gpt_clocks[] = {
+    CLK_NONE,      /* 000 No clock source */
+    CLK_IPG,       /* 001 ipg_clk, 532MHz */
+    CLK_IPG_HIGH,  /* 010 ipg_clk_highfreq */
+    CLK_EXT,       /* 011 External clock */
+    CLK_32k,       /* 100 ipg_clk_32k */
+    CLK_HIGH,      /* 101 ipg_clk_16M */
+    CLK_NONE,      /* 110 not defined */
+    CLK_NONE,      /* 111 not defined */
+};
+
 /* Must be called from within ptimer_transaction_begin/commit block */
 static void imx_gpt_set_freq(IMXGPTState *s)
 {
@@ -552,6 +563,13 @@ static void imx7_gpt_init(Object *obj)
     s->clocks = imx7_gpt_clocks;
 }
 
+static void imx8mp_gpt_init(Object *obj)
+{
+    IMXGPTState *s = IMX_GPT(obj);
+
+    s->clocks = imx8mp_gpt_clocks;
+}
+
 static const TypeInfo imx25_gpt_info = {
     .name = TYPE_IMX25_GPT,
     .parent = TYPE_SYS_BUS_DEVICE,
@@ -584,6 +602,12 @@ static const TypeInfo imx7_gpt_info = {
     .instance_init = imx7_gpt_init,
 };
 
+static const TypeInfo imx8mp_gpt_info = {
+    .name = TYPE_IMX8MP_GPT,
+    .parent = TYPE_IMX25_GPT,
+    .instance_init = imx8mp_gpt_init,
+};
+
 static void imx_gpt_register_types(void)
 {
     type_register_static(&imx25_gpt_info);
@@ -591,6 +615,7 @@ static void imx_gpt_register_types(void)
     type_register_static(&imx6_gpt_info);
     type_register_static(&imx6ul_gpt_info);
     type_register_static(&imx7_gpt_info);
+    type_register_static(&imx8mp_gpt_info);
 }
 
 type_init(imx_gpt_register_types)
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index dfbdc6ac7f..975887751b 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -17,10 +17,12 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
 #include "hw/misc/imx8mp_ccm.h"
+#include "hw/or-irq.h"
 #include "hw/pci-host/designware.h"
 #include "hw/pci-host/fsl_imx8m_phy.h"
 #include "hw/sd/sdhci.h"
 #include "hw/ssi/imx_spi.h"
+#include "hw/timer/imx_gpt.h"
 #include "hw/watchdog/wdt_imx2.h"
 #include "qom/object.h"
 #include "qemu/units.h"
@@ -35,6 +37,7 @@ enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_CPUS         = 4,
     FSL_IMX8MP_NUM_ECSPIS       = 3,
     FSL_IMX8MP_NUM_GPIOS        = 5,
+    FSL_IMX8MP_NUM_GPTS         = 6,
     FSL_IMX8MP_NUM_I2CS         = 6,
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
@@ -47,6 +50,7 @@ struct FslImx8mpState {
 
     ARMCPU             cpu[FSL_IMX8MP_NUM_CPUS];
     GICv3State         gic;
+    IMXGPTState        gpt[FSL_IMX8MP_NUM_GPTS];
     IMXGPIOState       gpio[FSL_IMX8MP_NUM_GPIOS];
     IMX8MPCCMState     ccm;
     IMX8MPAnalogState  analog;
@@ -58,6 +62,7 @@ struct FslImx8mpState {
     IMX2WdtState       wdt[FSL_IMX8MP_NUM_WDTS];
     DesignwarePCIEHost pcie;
     FslImx8mPciePhyState   pcie_phy;
+    OrIRQState         gpt5_gpt6_irq;
 };
 
 enum FslImx8mpMemoryRegions {
@@ -224,6 +229,12 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_I2C3_IRQ     = 37,
     FSL_IMX8MP_I2C4_IRQ     = 38,
 
+    FSL_IMX8MP_GPT1_IRQ      = 55,
+    FSL_IMX8MP_GPT2_IRQ      = 54,
+    FSL_IMX8MP_GPT3_IRQ      = 53,
+    FSL_IMX8MP_GPT4_IRQ      = 52,
+    FSL_IMX8MP_GPT5_GPT6_IRQ = 51,
+
     FSL_IMX8MP_GPIO1_LOW_IRQ  = 64,
     FSL_IMX8MP_GPIO1_HIGH_IRQ = 65,
     FSL_IMX8MP_GPIO2_LOW_IRQ  = 66,
diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h
index 5a1230da35..5488f7e4df 100644
--- a/include/hw/timer/imx_gpt.h
+++ b/include/hw/timer/imx_gpt.h
@@ -80,6 +80,7 @@
 #define TYPE_IMX6_GPT "imx6.gpt"
 #define TYPE_IMX6UL_GPT "imx6ul.gpt"
 #define TYPE_IMX7_GPT "imx7.gpt"
+#define TYPE_IMX8MP_GPT "imx8mp.gpt"
 
 #define TYPE_IMX_GPT TYPE_IMX25_GPT
 

From 0c105b261551a9b9fed086a7b9ecd0b6d6063bc4 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:04 +0100
Subject: [PATCH 41/43] hw/arm/fsl-imx8mp: Add Ethernet controller

The i.MX 8M Plus SoC actually has two ethernet controllers, the usual ENET one
and a Designware one. There is no device model for the latter, so only add the
ENET one.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-15-shentey@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/Kconfig                 |  1 +
 hw/arm/fsl-imx8mp.c            | 24 ++++++++++++++++++++++++
 hw/arm/imx8mp-evk.c            |  1 +
 include/hw/arm/fsl-imx8mp.h    |  8 ++++++++
 5 files changed, 35 insertions(+)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 4b195c917f..917c1d5176 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -14,6 +14,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 4 UARTs
  * 3 USDHC Storage Controllers
  * 1 Designware PCI Express Controller
+ * 1 Ethernet Controller
  * 5 GPIO Controllers
  * 6 I2C Controllers
  * 3 SPI Controllers
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 4e83895b91..4d642db970 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -601,6 +601,7 @@ config FSL_IMX8MP
     select FSL_IMX8MP_ANALOG
     select FSL_IMX8MP_CCM
     select IMX
+    select IMX_FEC
     select IMX_I2C
     select OR_IRQ
     select PCI_EXPRESS_DESIGNWARE
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 185c32ee58..2dd3c97a02 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -240,6 +240,8 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->wdt[i], TYPE_IMX2_WDT);
     }
 
+    object_initialize_child(obj, "eth0", &s->enet, TYPE_IMX_ENET);
+
     object_initialize_child(obj, "pcie", &s->pcie, TYPE_DESIGNWARE_PCIE_HOST);
     object_initialize_child(obj, "pcie_phy", &s->pcie_phy,
                             TYPE_FSL_IMX8M_PCIE_PHY);
@@ -542,6 +544,21 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, spi_table[i].irq));
     }
 
+    /* ENET1 */
+    object_property_set_uint(OBJECT(&s->enet), "phy-num", s->phy_num,
+                             &error_abort);
+    object_property_set_uint(OBJECT(&s->enet), "tx-ring-num", 3, &error_abort);
+    qemu_configure_nic_device(DEVICE(&s->enet), true, NULL);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->enet), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->enet), 0,
+                    fsl_imx8mp_memmap[FSL_IMX8MP_ENET1].addr);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->enet), 0,
+                       qdev_get_gpio_in(gicdev, FSL_IMX8MP_ENET1_MAC_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->enet), 1,
+                       qdev_get_gpio_in(gicdev, FSL_IMX6_ENET1_MAC_1588_IRQ));
+
     /* SNVS */
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->snvs), errp)) {
         return;
@@ -604,6 +621,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_GIC_REDIST:
         case FSL_IMX8MP_GPIO1 ... FSL_IMX8MP_GPIO5:
         case FSL_IMX8MP_ECSPI1 ... FSL_IMX8MP_ECSPI3:
+        case FSL_IMX8MP_ENET1:
         case FSL_IMX8MP_I2C1 ... FSL_IMX8MP_I2C6:
         case FSL_IMX8MP_PCIE1:
         case FSL_IMX8MP_PCIE_PHY1:
@@ -624,10 +642,16 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
     }
 }
 
+static const Property fsl_imx8mp_properties[] = {
+    DEFINE_PROP_UINT32("fec1-phy-num", FslImx8mpState, phy_num, 0),
+    DEFINE_PROP_BOOL("fec1-phy-connected", FslImx8mpState, phy_connected, true),
+};
+
 static void fsl_imx8mp_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
 
+    device_class_set_props(dc, fsl_imx8mp_properties);
     dc->realize = fsl_imx8mp_realize;
 
     dc->desc = "i.MX 8M Plus SoC";
diff --git a/hw/arm/imx8mp-evk.c b/hw/arm/imx8mp-evk.c
index 27d9e9e8ee..e1a7892fd7 100644
--- a/hw/arm/imx8mp-evk.c
+++ b/hw/arm/imx8mp-evk.c
@@ -36,6 +36,7 @@ static void imx8mp_evk_init(MachineState *machine)
 
     s = FSL_IMX8MP(object_new(TYPE_FSL_IMX8MP));
     object_property_add_child(OBJECT(machine), "soc", OBJECT(s));
+    object_property_set_uint(OBJECT(s), "fec1-phy-num", 1, &error_fatal);
     qdev_realize(DEVICE(s), NULL, &error_fatal);
 
     memory_region_add_subregion(get_system_memory(), FSL_IMX8MP_RAM_START,
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 975887751b..e292c31a3d 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -17,6 +17,7 @@
 #include "hw/misc/imx7_snvs.h"
 #include "hw/misc/imx8mp_analog.h"
 #include "hw/misc/imx8mp_ccm.h"
+#include "hw/net/imx_fec.h"
 #include "hw/or-irq.h"
 #include "hw/pci-host/designware.h"
 #include "hw/pci-host/fsl_imx8m_phy.h"
@@ -58,11 +59,15 @@ struct FslImx8mpState {
     IMXSPIState        spi[FSL_IMX8MP_NUM_ECSPIS];
     IMXI2CState        i2c[FSL_IMX8MP_NUM_I2CS];
     IMXSerialState     uart[FSL_IMX8MP_NUM_UARTS];
+    IMXFECState        enet;
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
     IMX2WdtState       wdt[FSL_IMX8MP_NUM_WDTS];
     DesignwarePCIEHost pcie;
     FslImx8mPciePhyState   pcie_phy;
     OrIRQState         gpt5_gpt6_irq;
+
+    uint32_t           phy_num;
+    bool               phy_connected;
 };
 
 enum FslImx8mpMemoryRegions {
@@ -253,6 +258,9 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_WDOG2_IRQ    = 79,
     FSL_IMX8MP_WDOG3_IRQ    = 10,
 
+    FSL_IMX8MP_ENET1_MAC_IRQ    = 118,
+    FSL_IMX6_ENET1_MAC_1588_IRQ = 121,
+
     FSL_IMX8MP_PCI_INTA_IRQ = 126,
     FSL_IMX8MP_PCI_INTB_IRQ = 125,
     FSL_IMX8MP_PCI_INTC_IRQ = 124,

From 4226c39fea1490060163339ae45500bda1b1be05 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:05 +0100
Subject: [PATCH 42/43] hw/arm/fsl-imx8mp: Add USB support

Split the USB MMIO regions to better keep track of the implemented vs.
unimplemented regions.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-16-shentey@gmail.com
[PMM: drop "static const" from usb_table for GCC 7.5]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 docs/system/arm/imx8mp-evk.rst |  1 +
 hw/arm/Kconfig                 |  1 +
 hw/arm/fsl-imx8mp.c            | 37 ++++++++++++++++++++++++++++++++--
 include/hw/arm/fsl-imx8mp.h    | 12 +++++++++++
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/docs/system/arm/imx8mp-evk.rst b/docs/system/arm/imx8mp-evk.rst
index 917c1d5176..00527b0cbe 100644
--- a/docs/system/arm/imx8mp-evk.rst
+++ b/docs/system/arm/imx8mp-evk.rst
@@ -15,6 +15,7 @@ The ``imx8mp-evk`` machine implements the following devices:
  * 3 USDHC Storage Controllers
  * 1 Designware PCI Express Controller
  * 1 Ethernet Controller
+ * 2 Designware USB 3 Controllers
  * 5 GPIO Controllers
  * 6 I2C Controllers
  * 3 SPI Controllers
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 4d642db970..faa00d1db3 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -608,6 +608,7 @@ config FSL_IMX8MP
     select PCI_EXPRESS_FSL_IMX8M_PHY
     select SDHCI
     select UNIMP
+    select USB_DWC3
     select WDT_IMX2
 
 config FSL_IMX8MP_EVK
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 2dd3c97a02..0880f0c724 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -40,8 +40,14 @@ static const struct {
     [FSL_IMX8MP_VPU_VC8000E_ENCODER] = { 0x38320000, 2 * MiB, "vpu_vc8000e_encoder" },
     [FSL_IMX8MP_VPU_G2_DECODER] = { 0x38310000, 2 * MiB, "vpu_g2_decoder" },
     [FSL_IMX8MP_VPU_G1_DECODER] = { 0x38300000, 2 * MiB, "vpu_g1_decoder" },
-    [FSL_IMX8MP_USB2] = { 0x38200000, 1 * MiB, "usb2" },
-    [FSL_IMX8MP_USB1] = { 0x38100000, 1 * MiB, "usb1" },
+    [FSL_IMX8MP_USB2_GLUE] = { 0x382f0000, 0x100, "usb2_glue" },
+    [FSL_IMX8MP_USB2_OTG] = { 0x3820cc00, 0x100, "usb2_otg" },
+    [FSL_IMX8MP_USB2_DEV] = { 0x3820c700, 0x500, "usb2_dev" },
+    [FSL_IMX8MP_USB2] = { 0x38200000, 0xc700, "usb2" },
+    [FSL_IMX8MP_USB1_GLUE] = { 0x381f0000, 0x100, "usb1_glue" },
+    [FSL_IMX8MP_USB1_OTG] = { 0x3810cc00, 0x100, "usb1_otg" },
+    [FSL_IMX8MP_USB1_DEV] = { 0x3810c700, 0x500, "usb1_dev" },
+    [FSL_IMX8MP_USB1] = { 0x38100000, 0xc700, "usb1" },
     [FSL_IMX8MP_GPU2D] = { 0x38008000, 32 * KiB, "gpu2d" },
     [FSL_IMX8MP_GPU3D] = { 0x38000000, 32 * KiB, "gpu3d" },
     [FSL_IMX8MP_QSPI1_RX_BUFFER] = { 0x34000000, 32 * MiB, "qspi1_rx_buffer" },
@@ -230,6 +236,11 @@ static void fsl_imx8mp_init(Object *obj)
         object_initialize_child(obj, name, &s->usdhc[i], TYPE_IMX_USDHC);
     }
 
+    for (i = 0; i < FSL_IMX8MP_NUM_USBS; i++) {
+        g_autofree char *name = g_strdup_printf("usb%d", i);
+        object_initialize_child(obj, name, &s->usb[i], TYPE_USB_DWC3);
+    }
+
     for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
         g_autofree char *name = g_strdup_printf("spi%d", i + 1);
         object_initialize_child(obj, name, &s->spi[i], TYPE_IMX_SPI);
@@ -524,6 +535,27 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
                            qdev_get_gpio_in(gicdev, usdhc_table[i].irq));
     }
 
+    /* USBs */
+    for (i = 0; i < FSL_IMX8MP_NUM_USBS; i++) {
+        struct {
+            hwaddr addr;
+            unsigned int irq;
+        } usb_table[FSL_IMX8MP_NUM_USBS] = {
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USB1].addr, FSL_IMX8MP_USB1_IRQ },
+            { fsl_imx8mp_memmap[FSL_IMX8MP_USB2].addr, FSL_IMX8MP_USB2_IRQ },
+        };
+
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "p2", 1);
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "p3", 1);
+        qdev_prop_set_uint32(DEVICE(&s->usb[i].sysbus_xhci), "slots", 2);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->usb[i]), errp)) {
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->usb[i]), 0, usb_table[i].addr);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->usb[i].sysbus_xhci), 0,
+                           qdev_get_gpio_in(gicdev, usb_table[i].irq));
+    }
+
     /* ECSPIs */
     for (i = 0; i < FSL_IMX8MP_NUM_ECSPIS; i++) {
         struct {
@@ -628,6 +660,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_RAM:
         case FSL_IMX8MP_SNVS_HP:
         case FSL_IMX8MP_UART1 ... FSL_IMX8MP_UART4:
+        case FSL_IMX8MP_USB1 ... FSL_IMX8MP_USB2:
         case FSL_IMX8MP_USDHC1 ... FSL_IMX8MP_USDHC3:
         case FSL_IMX8MP_WDOG1 ... FSL_IMX8MP_WDOG3:
             /* device implemented and treated above */
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index e292c31a3d..5247e972b8 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -24,6 +24,7 @@
 #include "hw/sd/sdhci.h"
 #include "hw/ssi/imx_spi.h"
 #include "hw/timer/imx_gpt.h"
+#include "hw/usb/hcd-dwc3.h"
 #include "hw/watchdog/wdt_imx2.h"
 #include "qom/object.h"
 #include "qemu/units.h"
@@ -42,6 +43,7 @@ enum FslImx8mpConfiguration {
     FSL_IMX8MP_NUM_I2CS         = 6,
     FSL_IMX8MP_NUM_IRQS         = 160,
     FSL_IMX8MP_NUM_UARTS        = 4,
+    FSL_IMX8MP_NUM_USBS         = 2,
     FSL_IMX8MP_NUM_USDHCS       = 3,
     FSL_IMX8MP_NUM_WDTS         = 3,
 };
@@ -62,6 +64,7 @@ struct FslImx8mpState {
     IMXFECState        enet;
     SDHCIState         usdhc[FSL_IMX8MP_NUM_USDHCS];
     IMX2WdtState       wdt[FSL_IMX8MP_NUM_WDTS];
+    USBDWC3            usb[FSL_IMX8MP_NUM_USBS];
     DesignwarePCIEHost pcie;
     FslImx8mPciePhyState   pcie_phy;
     OrIRQState         gpt5_gpt6_irq;
@@ -199,6 +202,12 @@ enum FslImx8mpMemoryRegions {
     FSL_IMX8MP_UART4,
     FSL_IMX8MP_USB1,
     FSL_IMX8MP_USB2,
+    FSL_IMX8MP_USB1_DEV,
+    FSL_IMX8MP_USB2_DEV,
+    FSL_IMX8MP_USB1_OTG,
+    FSL_IMX8MP_USB2_OTG,
+    FSL_IMX8MP_USB1_GLUE,
+    FSL_IMX8MP_USB2_GLUE,
     FSL_IMX8MP_USDHC1,
     FSL_IMX8MP_USDHC2,
     FSL_IMX8MP_USDHC3,
@@ -234,6 +243,9 @@ enum FslImx8mpIrqs {
     FSL_IMX8MP_I2C3_IRQ     = 37,
     FSL_IMX8MP_I2C4_IRQ     = 38,
 
+    FSL_IMX8MP_USB1_IRQ     = 40,
+    FSL_IMX8MP_USB2_IRQ     = 41,
+
     FSL_IMX8MP_GPT1_IRQ      = 55,
     FSL_IMX8MP_GPT2_IRQ      = 54,
     FSL_IMX8MP_GPT3_IRQ      = 53,

From 1aaf3478684ff1cd02d1b36c32a00bfac9a5dbd5 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Sun, 23 Feb 2025 12:47:07 +0100
Subject: [PATCH 43/43] hw/arm/fsl-imx8mp: Add on-chip RAM

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-id: 20250223114708.1780-18-shentey@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/fsl-imx8mp.c         | 11 +++++++++++
 include/hw/arm/fsl-imx8mp.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 0880f0c724..1ea98e1463 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -644,6 +644,16 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcie_phy), 0,
                     fsl_imx8mp_memmap[FSL_IMX8MP_PCIE_PHY1].addr);
 
+    /* On-Chip RAM */
+    if (!memory_region_init_ram(&s->ocram, NULL, "imx8mp.ocram",
+                                fsl_imx8mp_memmap[FSL_IMX8MP_OCRAM].size,
+                                errp)) {
+        return;
+    }
+    memory_region_add_subregion(get_system_memory(),
+                                fsl_imx8mp_memmap[FSL_IMX8MP_OCRAM].addr,
+                                &s->ocram);
+
     /* Unimplemented devices */
     for (i = 0; i < ARRAY_SIZE(fsl_imx8mp_memmap); i++) {
         switch (i) {
@@ -655,6 +665,7 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
         case FSL_IMX8MP_ECSPI1 ... FSL_IMX8MP_ECSPI3:
         case FSL_IMX8MP_ENET1:
         case FSL_IMX8MP_I2C1 ... FSL_IMX8MP_I2C6:
+        case FSL_IMX8MP_OCRAM:
         case FSL_IMX8MP_PCIE1:
         case FSL_IMX8MP_PCIE_PHY1:
         case FSL_IMX8MP_RAM:
diff --git a/include/hw/arm/fsl-imx8mp.h b/include/hw/arm/fsl-imx8mp.h
index 5247e972b8..bc97fc416e 100644
--- a/include/hw/arm/fsl-imx8mp.h
+++ b/include/hw/arm/fsl-imx8mp.h
@@ -68,6 +68,7 @@ struct FslImx8mpState {
     DesignwarePCIEHost pcie;
     FslImx8mPciePhyState   pcie_phy;
     OrIRQState         gpt5_gpt6_irq;
+    MemoryRegion       ocram;
 
     uint32_t           phy_num;
     bool               phy_connected;