diff --git a/.gitlab-ci.d/base.yml b/.gitlab-ci.d/base.yml
index 188a770799..ef173a34e6 100644
--- a/.gitlab-ci.d/base.yml
+++ b/.gitlab-ci.d/base.yml
@@ -68,7 +68,7 @@ variables:
 
     #############################################################
     # Stage 2: fine tune execution of jobs in specific scenarios
-    # where the catch all logic is inapprorpaite
+    # where the catch all logic is inappropriate
     #############################################################
 
     # Optional jobs should not be run unless manually triggered
diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index 41d64d6680..e7f1f83c2c 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -15,8 +15,10 @@
   stage: build
   image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
   needs: []
+  # 20 mins larger than "timeout_in" in cirrus/build.yml
+  # as there's often a 5-10 minute delay before Cirrus CI
+  # actually starts the task
   timeout: 80m
-  allow_failure: true
   script:
     - source .gitlab-ci.d/cirrus/$NAME.vars
     - sed -e "s|[@]CI_REPOSITORY_URL@|$CI_REPOSITORY_URL|g"
diff --git a/.gitlab-ci.d/cirrus/build.yml b/.gitlab-ci.d/cirrus/build.yml
index a9444902ec..29d55c4aa3 100644
--- a/.gitlab-ci.d/cirrus/build.yml
+++ b/.gitlab-ci.d/cirrus/build.yml
@@ -16,6 +16,8 @@ env:
   TEST_TARGETS: "@TEST_TARGETS@"
 
 build_task:
+  # A little shorter than GitLab timeout in ../cirrus.yml
+  timeout_in: 60m
   install_script:
     - @UPDATE_COMMAND@
     - @INSTALL_COMMAND@ @PKGS@
diff --git a/MAINTAINERS b/MAINTAINERS
index 6111b6b4d9..355b1960ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -298,11 +298,9 @@ F: hw/openrisc/
 F: tests/tcg/openrisc/
 
 PowerPC TCG CPUs
+M: Nicholas Piggin <npiggin@gmail.com>
 M: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: Cédric Le Goater <clg@kaod.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
-R: Nicholas Piggin <npiggin@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: target/ppc/
@@ -438,10 +436,9 @@ F: target/mips/kvm*
 F: target/mips/sysemu/
 
 PPC KVM CPUs
-M: Daniel Henrique Barboza <danielhb413@gmail.com>
+M: Nicholas Piggin <npiggin@gmail.com>
+R: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: Cédric Le Goater <clg@kaod.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 S: Odd Fixes
 F: target/ppc/kvm.c
 
@@ -543,14 +540,6 @@ F: include/sysemu/xen.h
 F: include/sysemu/xen-mapcache.h
 F: stubs/xen-hw-stub.c
 
-Guest CPU Cores (HAXM)
----------------------
-X86 HAXM CPUs
-S: Orphan
-F: accel/stubs/hax-stub.c
-F: include/sysemu/hax.h
-F: target/i386/hax/
-
 Guest CPU Cores (NVMM)
 ----------------------
 NetBSD Virtual Machine Monitor (NVMM) CPU support
@@ -1034,6 +1023,16 @@ S: Maintained
 F: hw/ssi/xlnx-versal-ospi.c
 F: include/hw/ssi/xlnx-versal-ospi.h
 
+Xilinx Versal CFI
+M: Francisco Iglesias <francisco.iglesias@amd.com>
+S: Maintained
+F: hw/misc/xlnx-cfi-if.c
+F: include/hw/misc/xlnx-cfi-if.h
+F: hw/misc/xlnx-versal-cfu.c
+F: include/hw/misc/xlnx-versal-cfu.h
+F: hw/misc/xlnx-versal-cframe-reg.c
+F: include/hw/misc/xlnx-versal-cframe-reg.h
+
 STM32F100
 M: Alexandre Iooss <erdnaxe@crans.org>
 L: qemu-arm@nongnu.org
@@ -1428,10 +1427,10 @@ F: include/hw/rtc/m48t59.h
 F: tests/avocado/ppc_prep_40p.py
 
 sPAPR (pseries)
-M: Daniel Henrique Barboza <danielhb413@gmail.com>
+M: Nicholas Piggin <npiggin@gmail.com>
+R: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: Cédric Le Goater <clg@kaod.org>
 R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 R: Harsh Prateek Bora <harshpb@linux.ibm.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
@@ -1450,8 +1449,8 @@ F: tests/avocado/ppc_pseries.py
 
 PowerNV (Non-Virtualized)
 M: Cédric Le Goater <clg@kaod.org>
+M: Nicholas Piggin <npiggin@gmail.com>
 R: Frédéric Barrat <fbarrat@linux.ibm.com>
-R: Nicholas Piggin <npiggin@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: docs/system/ppc/powernv.rst
@@ -1495,12 +1494,9 @@ F: include/hw/pci-host/mv64361.h
 
 Virtual Open Firmware (VOF)
 M: Alexey Kardashevskiy <aik@ozlabs.ru>
-R: Cédric Le Goater <clg@kaod.org>
-R: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc/spapr_vof*
 F: hw/ppc/vof*
 F: include/hw/ppc/vof*
@@ -2256,6 +2252,13 @@ F: tests/qtest/nvme-test.c
 F: docs/system/devices/nvme.rst
 T: git git://git.infradead.org/qemu-nvme.git nvme-next
 
+ufs
+M: Jeuk Kim <jeuk20.kim@samsung.com>
+S: Supported
+F: hw/ufs/*
+F: include/block/ufs.h
+F: tests/qtest/ufs-test.c
+
 megasas
 M: Hannes Reinecke <hare@suse.com>
 L: qemu-block@nongnu.org
@@ -2948,12 +2951,17 @@ W: http://info.iet.unipi.it/~luigi/netmap/
 S: Maintained
 F: net/netmap.c
 
+AF_XDP network backend
+R: Ilya Maximets <i.maximets@ovn.org>
+F: net/af-xdp.c
+
 Host Memory Backends
 M: David Hildenbrand <david@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
 S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
+F: docs/system/vm-templating.rst
 T: git https://gitlab.com/ehabkost/qemu.git machine-next
 
 Cryptodev Backends
@@ -3701,6 +3709,7 @@ S: Supported
 F: block/parallels.c
 F: block/parallels-ext.c
 F: docs/interop/parallels.txt
+T: git https://src.openvz.org/scm/~den/qemu.git parallels
 
 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/Makefile b/Makefile
index 5d48dfac18..bfc4b2c8e9 100644
--- a/Makefile
+++ b/Makefile
@@ -164,14 +164,6 @@ ifneq ($(filter $(ninja-targets), $(ninja-cmd-goals)),)
 endif
 endif
 
-ifeq ($(CONFIG_PLUGIN),y)
-.PHONY: plugins
-plugins:
-	$(call quiet-command,\
-		$(MAKE) $(SUBDIR_MAKEFLAGS) -C contrib/plugins V="$(V)", \
-		"BUILD", "example plugins")
-endif # $(CONFIG_PLUGIN)
-
 else # config-host.mak does not exist
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 $(error Please call configure before running make)
@@ -184,15 +176,20 @@ include $(SRC_PATH)/tests/Makefile.include
 
 all: recurse-all
 
-ROMS_RULES=$(foreach t, all clean distclean, $(addsuffix /$(t), $(ROMS)))
-.PHONY: $(ROMS_RULES)
-$(ROMS_RULES):
+SUBDIR_RULES=$(foreach t, all clean distclean, $(addsuffix /$(t), $(SUBDIRS)))
+.PHONY: $(SUBDIR_RULES)
+$(SUBDIR_RULES):
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $(dir $@) V="$(V)" TARGET_DIR="$(dir $@)" $(notdir $@),)
 
+ifneq ($(filter contrib/plugins, $(SUBDIRS)),)
+.PHONY: plugins
+plugins: contrib/plugins/all
+endif
+
 .PHONY: recurse-all recurse-clean
-recurse-all: $(addsuffix /all, $(ROMS))
-recurse-clean: $(addsuffix /clean, $(ROMS))
-recurse-distclean: $(addsuffix /distclean, $(ROMS))
+recurse-all: $(addsuffix /all, $(SUBDIRS))
+recurse-clean: $(addsuffix /clean, $(SUBDIRS))
+recurse-distclean: $(addsuffix /distclean, $(SUBDIRS))
 
 ######################################################################
 
@@ -296,7 +293,7 @@ help:
 	$(call print-help,cscope,Generate cscope index)
 	$(call print-help,sparse,Run sparse on the QEMU source)
 	@echo  ''
-ifeq ($(CONFIG_PLUGIN),y)
+ifneq ($(filter contrib/plugins, $(SUBDIRS)),)
 	@echo  'Plugin targets:'
 	$(call print-help,plugins,Build the example TCG plugins)
 	@echo  ''
@@ -316,7 +313,7 @@ endif
 	@echo  'Documentation targets:'
 	$(call print-help,html man,Build documentation in specified format)
 	@echo  ''
-ifdef CONFIG_WIN32
+ifneq ($(filter msi, $(ninja-targets)),)
 	@echo  'Windows targets:'
 	$(call print-help,installer,Build NSIS-based installer for QEMU)
 	$(call print-help,msi,Build MSI-based installer for qemu-ga)
diff --git a/accel/Kconfig b/accel/Kconfig
index 8bdedb7d15..a30cf2eb48 100644
--- a/accel/Kconfig
+++ b/accel/Kconfig
@@ -4,9 +4,6 @@ config WHPX
 config NVMM
     bool
 
-config HAX
-    bool
-
 config HVF
     bool
 
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2ba7521695..ff1578bb32 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3763,6 +3763,7 @@ static void kvm_accel_instance_init(Object *obj)
     /* KVM dirty ring is by default off */
     s->kvm_dirty_ring_size = 0;
     s->kvm_dirty_ring_with_bitmap = false;
+    s->kvm_eager_split_size = 0;
     s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
     s->notify_window = 0;
     s->xen_version = 0;
diff --git a/accel/stubs/hax-stub.c b/accel/stubs/hax-stub.c
deleted file mode 100644
index 2fe31aaa9a..0000000000
--- a/accel/stubs/hax-stub.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2015, Intel Corporation
- *
- * Copyright 2016 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "sysemu/hax.h"
-
-bool hax_allowed;
-
-int hax_sync_vcpus(void)
-{
-    return 0;
-}
diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build
index f7a9486e06..6b0f200efe 100644
--- a/accel/stubs/meson.build
+++ b/accel/stubs/meson.build
@@ -1,5 +1,4 @@
 sysemu_stubs_ss = ss.source_set()
-sysemu_stubs_ss.add(when: 'CONFIG_HAX', if_false: files('hax-stub.c'))
 sysemu_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
 sysemu_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
 sysemu_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5052e9fc42..f748d36331 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1200,6 +1200,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
     write_flags = read_flags;
     if (is_ram) {
         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
+        assert(!(iotlb & ~TARGET_PAGE_MASK));
         /*
          * Computing is_clean is expensive; avoid all that unless
          * the page is actually writable.
@@ -1262,16 +1263,18 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
 
     /* refill the tlb */
     /*
-     * At this point iotlb contains a physical section number in the lower
-     * TARGET_PAGE_BITS, and either
-     *  + the ram_addr_t of the page base of the target RAM (RAM)
-     *  + the offset within section->mr of the page base (I/O, ROMD)
+     * When memory region is ram, iotlb contains a TARGET_PAGE_BITS
+     * aligned ram_addr_t of the page base of the target RAM.
+     * Otherwise, iotlb contains
+     *  - a physical section number in the lower TARGET_PAGE_BITS
+     *  - the offset within section->mr of the page base (I/O, ROMD) with the
+     *    TARGET_PAGE_BITS masked off.
      * We subtract addr_page (which is page aligned and thus won't
      * disturb the low bits) to give an offset which can be added to the
      * (non-page-aligned) vaddr of the eventual memory access to get
      * the MemoryRegion offset for the access. Note that the vaddr we
      * subtract here is that of the page base, and not the same as the
-     * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+     * vaddr we add back in io_prepare()/get_page_addr_code().
      */
     desc->fulltlb[index] = *full;
     full = &desc->fulltlb[index];
@@ -1354,116 +1357,41 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
                                           mmu_idx, retaddr);
 }
 
-static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
-                                          vaddr addr, unsigned size,
-                                          MMUAccessType access_type,
-                                          int mmu_idx, MemTxAttrs attrs,
-                                          MemTxResult response,
-                                          uintptr_t retaddr)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cpu->ignore_memory_transaction_failures &&
-        cc->tcg_ops->do_transaction_failed) {
-        cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
-                                           access_type, mmu_idx, attrs,
-                                           response, retaddr);
-    }
-}
-
-/*
- * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
- * because of the side effect of io_writex changing memory layout.
- */
-static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
-                            hwaddr mr_offset)
-{
-#ifdef CONFIG_PLUGIN
-    SavedIOTLB *saved = &cs->saved_iotlb;
-    saved->section = section;
-    saved->mr_offset = mr_offset;
-#endif
-}
-
-static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
-                         int mmu_idx, vaddr addr, uintptr_t retaddr,
-                         MMUAccessType access_type, MemOp op)
+static MemoryRegionSection *
+io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
+           MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
 {
     CPUState *cpu = env_cpu(env);
-    hwaddr mr_offset;
     MemoryRegionSection *section;
-    MemoryRegion *mr;
-    uint64_t val;
-    MemTxResult r;
+    hwaddr mr_offset;
 
-    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
-    mr = section->mr;
-    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, xlat, attrs);
+    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
     if (!cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
-    /*
-     * The memory_region_dispatch may trigger a flush/resize
-     * so for plugins we save the iotlb_data just in case.
-     */
-    save_iotlb_data(cpu, section, mr_offset);
-
-    {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
-    }
-
-    if (r != MEMTX_OK) {
-        hwaddr physaddr = mr_offset +
-            section->offset_within_address_space -
-            section->offset_within_region;
-
-        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
-                               mmu_idx, full->attrs, r, retaddr);
-    }
-    return val;
+    *out_offset = mr_offset;
+    return section;
 }
 
-static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
-                      int mmu_idx, uint64_t val, vaddr addr,
-                      uintptr_t retaddr, MemOp op)
+static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
+                      unsigned size, MMUAccessType access_type, int mmu_idx,
+                      MemTxResult response, uintptr_t retaddr)
 {
     CPUState *cpu = env_cpu(env);
-    hwaddr mr_offset;
-    MemoryRegionSection *section;
-    MemoryRegion *mr;
-    MemTxResult r;
 
-    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
-    mr = section->mr;
-    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
-    if (!cpu->can_do_io) {
-        cpu_io_recompile(cpu, retaddr);
-    }
-    cpu->mem_io_pc = retaddr;
+    if (!cpu->ignore_memory_transaction_failures) {
+        CPUClass *cc = CPU_GET_CLASS(cpu);
 
-    /*
-     * The memory_region_dispatch may trigger a flush/resize
-     * so for plugins we save the iotlb_data just in case.
-     */
-    save_iotlb_data(cpu, section, mr_offset);
+        if (cc->tcg_ops->do_transaction_failed) {
+            hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
 
-    {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
-    }
-
-    if (r != MEMTX_OK) {
-        hwaddr physaddr = mr_offset +
-            section->offset_within_address_space -
-            section->offset_within_region;
-
-        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
-                               MMU_DATA_STORE, mmu_idx, full->attrs, r,
-                               retaddr);
+            cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
+                                               access_type, mmu_idx,
+                                               full->attrs, response, retaddr);
+        }
     }
 }
 
@@ -1733,45 +1661,41 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
  * in the softmmu lookup code (or helper). We don't handle re-fills or
  * checking the victim table. This is purely informational.
  *
- * This almost never fails as the memory access being instrumented
- * should have just filled the TLB. The one corner case is io_writex
- * which can cause TLB flushes and potential resizing of the TLBs
- * losing the information we need. In those cases we need to recover
- * data from a copy of the CPUTLBEntryFull. As long as this always occurs
- * from the same thread (which a mem callback will be) this is safe.
+ * The one corner case is i/o write, which can cause changes to the
+ * address space.  Those changes, and the corresponding tlb flush,
+ * should be delayed until the next TB, so even then this ought not fail.
+ * But check, Just in Case.
  */
-
 bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
                        bool is_store, struct qemu_plugin_hwaddr *data)
 {
     CPUArchState *env = cpu->env_ptr;
     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
-    uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
+    MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
+    CPUTLBEntryFull *full;
 
-    if (likely(tlb_hit(tlb_addr, addr))) {
-        /* We must have an iotlb entry for MMIO */
-        if (tlb_addr & TLB_MMIO) {
-            CPUTLBEntryFull *full;
-            full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-            data->is_io = true;
-            data->v.io.section =
-                iotlb_to_section(cpu, full->xlat_section, full->attrs);
-            data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
-        } else {
-            data->is_io = false;
-            data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
-        }
-        return true;
-    } else {
-        SavedIOTLB *saved = &cpu->saved_iotlb;
-        data->is_io = true;
-        data->v.io.section = saved->section;
-        data->v.io.offset = saved->mr_offset;
-        return true;
+    if (unlikely(!tlb_hit(tlb_addr, addr))) {
+        return false;
     }
-}
 
+    full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+    data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+
+    /* We must have an iotlb entry for MMIO */
+    if (tlb_addr & TLB_MMIO) {
+        MemoryRegionSection *section =
+            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
+                             full->attrs);
+        data->is_io = true;
+        data->mr = section->mr;
+    } else {
+        data->is_io = false;
+        data->mr = NULL;
+    }
+    return true;
+}
 #endif
 
 /*
@@ -2115,45 +2039,88 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
  * Load @size bytes from @addr, which is memory-mapped i/o.
  * The bytes are concatenated in big-endian order with @ret_be.
  */
+static uint64_t int_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                                uint64_t ret_be, vaddr addr, int size,
+                                int mmu_idx, MMUAccessType type, uintptr_t ra,
+                                MemoryRegion *mr, hwaddr mr_offset)
+{
+    do {
+        MemOp this_mop;
+        unsigned this_size;
+        uint64_t val;
+        MemTxResult r;
+
+        /* Read aligned pieces up to 8 bytes. */
+        this_mop = ctz32(size | (int)addr | 8);
+        this_size = 1 << this_mop;
+        this_mop |= MO_BE;
+
+        r = memory_region_dispatch_read(mr, mr_offset, &val,
+                                        this_mop, full->attrs);
+        if (unlikely(r != MEMTX_OK)) {
+            io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
+        }
+        if (this_size == 8) {
+            return val;
+        }
+
+        ret_be = (ret_be << (this_size * 8)) | val;
+        addr += this_size;
+        mr_offset += this_size;
+        size -= this_size;
+    } while (size);
+
+    return ret_be;
+}
+
 static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
                                uint64_t ret_be, vaddr addr, int size,
                                int mmu_idx, MMUAccessType type, uintptr_t ra)
 {
-    uint64_t t;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t ret;
 
     tcg_debug_assert(size > 0 && size <= 8);
-    do {
-        /* Read aligned pieces up to 8 bytes. */
-        switch ((size | (int)addr) & 7) {
-        case 1:
-        case 3:
-        case 5:
-        case 7:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
-            ret_be = (ret_be << 8) | t;
-            size -= 1;
-            addr += 1;
-            break;
-        case 2:
-        case 6:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
-            ret_be = (ret_be << 16) | t;
-            size -= 2;
-            addr += 2;
-            break;
-        case 4:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
-            ret_be = (ret_be << 32) | t;
-            size -= 4;
-            addr += 4;
-            break;
-        case 0:
-            return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
-        default:
-            qemu_build_not_reached();
-        }
-    } while (size);
-    return ret_be;
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    ret = int_ld_mmio_beN(env, full, ret_be, addr, size, mmu_idx,
+                          type, ra, mr, mr_offset);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
+static Int128 do_ld16_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t ret_be, vaddr addr, int size,
+                               int mmu_idx, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t a, b;
+
+    tcg_debug_assert(size > 8 && size <= 16);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    a = int_ld_mmio_beN(env, full, ret_be, addr, size - 8, mmu_idx,
+                        MMU_DATA_LOAD, ra, mr, mr_offset);
+    b = int_ld_mmio_beN(env, full, ret_be, addr + size - 8, 8, mmu_idx,
+                        MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
+    qemu_mutex_unlock_iothread();
+
+    return int128_make128(b, a);
 }
 
 /**
@@ -2298,7 +2265,6 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
                               mmu_idx, type, ra);
     }
@@ -2349,12 +2315,7 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
-                           mmu_idx, MMU_DATA_LOAD, ra);
-        b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
-                           mmu_idx, MMU_DATA_LOAD, ra);
-        return int128_make128(b, a);
+        return do_ld16_mmio_beN(env, p->full, a, p->addr, size, mmu_idx, ra);
     }
 
     /*
@@ -2399,7 +2360,7 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
                        MMUAccessType type, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
+        return do_ld_mmio_beN(env, p->full, 0, p->addr, 1, mmu_idx, type, ra);
     } else {
         return *(uint8_t *)p->haddr;
     }
@@ -2411,7 +2372,6 @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint16_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap16(ret);
@@ -2432,7 +2392,6 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint32_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap32(ret);
@@ -2453,7 +2412,6 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint64_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap64(ret);
@@ -2612,12 +2570,8 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
     if (likely(!crosspage)) {
         if (unlikely(l.page[0].flags & TLB_MMIO)) {
-            QEMU_IOTHREAD_LOCK_GUARD();
-            a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
-                               l.mmu_idx, MMU_DATA_LOAD, ra);
-            b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
-                               l.mmu_idx, MMU_DATA_LOAD, ra);
-            ret = int128_make128(b, a);
+            ret = do_ld16_mmio_beN(env, l.page[0].full, 0, addr, 16,
+                                   l.mmu_idx, ra);
             if ((l.memop & MO_BSWAP) == MO_LE) {
                 ret = bswap128(ret);
             }
@@ -2759,46 +2713,88 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
  * The bytes to store are extracted in little-endian order from @val_le;
  * return the bytes of @val_le beyond @p->size that have not been stored.
  */
+static uint64_t int_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                                uint64_t val_le, vaddr addr, int size,
+                                int mmu_idx, uintptr_t ra,
+                                MemoryRegion *mr, hwaddr mr_offset)
+{
+    do {
+        MemOp this_mop;
+        unsigned this_size;
+        MemTxResult r;
+
+        /* Store aligned pieces up to 8 bytes. */
+        this_mop = ctz32(size | (int)addr | 8);
+        this_size = 1 << this_mop;
+        this_mop |= MO_LE;
+
+        r = memory_region_dispatch_write(mr, mr_offset, val_le,
+                                         this_mop, full->attrs);
+        if (unlikely(r != MEMTX_OK)) {
+            io_failed(env, full, addr, this_size, MMU_DATA_STORE,
+                      mmu_idx, r, ra);
+        }
+        if (this_size == 8) {
+            return 0;
+        }
+
+        val_le >>= this_size * 8;
+        addr += this_size;
+        mr_offset += this_size;
+        size -= this_size;
+    } while (size);
+
+    return val_le;
+}
+
 static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
                                uint64_t val_le, vaddr addr, int size,
                                int mmu_idx, uintptr_t ra)
 {
+    MemoryRegionSection *section;
+    hwaddr mr_offset;
+    MemoryRegion *mr;
+    MemTxAttrs attrs;
+    uint64_t ret;
+
     tcg_debug_assert(size > 0 && size <= 8);
 
-    do {
-        /* Store aligned pieces up to 8 bytes. */
-        switch ((size | (int)addr) & 7) {
-        case 1:
-        case 3:
-        case 5:
-        case 7:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
-            val_le >>= 8;
-            size -= 1;
-            addr += 1;
-            break;
-        case 2:
-        case 6:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
-            val_le >>= 16;
-            size -= 2;
-            addr += 2;
-            break;
-        case 4:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
-            val_le >>= 32;
-            size -= 4;
-            addr += 4;
-            break;
-        case 0:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
-            return 0;
-        default:
-            qemu_build_not_reached();
-        }
-    } while (size);
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
 
-    return val_le;
+    qemu_mutex_lock_iothread();
+    ret = int_st_mmio_leN(env, full, val_le, addr, size, mmu_idx,
+                          ra, mr, mr_offset);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
+static uint64_t do_st16_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                                 Int128 val_le, vaddr addr, int size,
+                                 int mmu_idx, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t ret;
+
+    tcg_debug_assert(size > 8 && size <= 16);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    int_st_mmio_leN(env, full, int128_getlo(val_le), addr, 8,
+                    mmu_idx, ra, mr, mr_offset);
+    ret = int_st_mmio_leN(env, full, int128_gethi(val_le), addr + 8,
+                          size - 8, mmu_idx, ra, mr, mr_offset + 8);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
 }
 
 /*
@@ -2812,7 +2808,6 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         return do_st_mmio_leN(env, p->full, val_le, p->addr,
                               p->size, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
@@ -2867,11 +2862,8 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        do_st_mmio_leN(env, p->full, int128_getlo(val_le),
-                       p->addr, 8, mmu_idx, ra);
-        return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
-                              p->addr + 8, size - 8, mmu_idx, ra);
+        return do_st16_mmio_leN(env, p->full, val_le, p->addr,
+                                size, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         return int128_gethi(val_le) >> ((size - 8) * 8);
     }
@@ -2915,7 +2907,7 @@ static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
                     int mmu_idx, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
+        do_st_mmio_leN(env, p->full, val, p->addr, 1, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
     } else {
@@ -2930,7 +2922,6 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap16(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -2950,7 +2941,6 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap32(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -2970,7 +2960,6 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap64(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -3098,11 +3087,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
             if ((l.memop & MO_BSWAP) != MO_LE) {
                 val = bswap128(val);
             }
-            a = int128_getlo(val);
-            b = int128_gethi(val);
-            QEMU_IOTHREAD_LOCK_GUARD();
-            do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
-            do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
+            do_st16_mmio_leN(env, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
         } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
             /* nothing */
         } else {
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 166bef173b..8ace783707 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -11,7 +11,9 @@ tcg_ss.add(files(
 ))
 tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
 tcg_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c'))
-tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
+if get_option('plugins')
+  tcg_ss.add(files('plugin-gen.c'))
+endif
 tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
 tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index c406b2f7b7..32ae8af61c 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -1,5 +1,5 @@
 /*
- * Translation Block Maintaince
+ * Translation Block Maintenance
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index b276262007..4b0dfb4be7 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
                 break;
             case EXCP_HALTED:
                 /*
-                 * during start-up the vCPU is reset and the thread is
-                 * kicked several times. If we don't ensure we go back
-                 * to sleep in the halted state we won't cleanly
-                 * start-up when the vCPU is enabled.
-                 *
-                 * cpu->halted should ensure we sleep in wait_io_event
+                 * Usually cpu->halted is set, but may have already been
+                 * reset by another thread by the time we arrive here.
                  */
-                g_assert(cpu->halted);
                 break;
             case EXCP_ATOMIC:
                 qemu_mutex_unlock_iothread();
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 6c99f952ca..afca89baa1 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1042,6 +1042,32 @@ DO_CMP2(64)
 #undef DO_CMP1
 #undef DO_CMP2
 
+#define DO_CMP1(NAME, TYPE, OP)                                            \
+void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc)           \
+{                                                                          \
+    intptr_t oprsz = simd_oprsz(desc);                                     \
+    TYPE inv = simd_data(desc), b = b64;                                   \
+    for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) {                   \
+        *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv);               \
+    }                                                                      \
+    clear_high(d, oprsz, desc);                                            \
+}
+
+#define DO_CMP2(SZ) \
+    DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==)    \
+    DO_CMP1(gvec_lts##SZ, int##SZ##_t, <)      \
+    DO_CMP1(gvec_les##SZ, int##SZ##_t, <=)     \
+    DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <)    \
+    DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
+
+DO_CMP2(8)
+DO_CMP2(16)
+DO_CMP2(32)
+DO_CMP2(64)
+
+#undef DO_CMP1
+#undef DO_CMP2
+
 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index a6f7daf05e..18fbaa82ed 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -297,6 +297,31 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
 //// --- Begin LibAFL code ---
diff --git a/audio/mixeng.h b/audio/mixeng.h
index f9de7cffeb..a5f56d2c26 100644
--- a/audio/mixeng.h
+++ b/audio/mixeng.h
@@ -38,7 +38,7 @@ typedef struct st_sample st_sample;
 typedef void (t_sample) (struct st_sample *dst, const void *src, int samples);
 typedef void (f_sample) (void *dst, const struct st_sample *src, int samples);
 
-/* indices: [stereo][signed][swap endiannes][8, 16 or 32-bits] */
+/* indices: [stereo][signed][swap endianness][8, 16 or 32-bits] */
 extern t_sample *mixeng_conv[2][2][2][3];
 extern f_sample *mixeng_clip[2][2][2][3];
 
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 4d183f7237..e5006bd215 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -252,10 +252,11 @@ static void cryptodev_backend_throttle_timer_cb(void *opaque)
             continue;
         }
 
-        throttle_account(&backend->ts, true, ret);
+        throttle_account(&backend->ts, THROTTLE_WRITE, ret);
         cryptodev_backend_operation(backend, op_info);
         if (throttle_enabled(&backend->tc) &&
-            throttle_schedule_timer(&backend->ts, &backend->tt, true)) {
+            throttle_schedule_timer(&backend->ts, &backend->tt,
+                                    THROTTLE_WRITE)) {
             break;
         }
     }
@@ -271,7 +272,7 @@ int cryptodev_backend_crypto_operation(
         goto do_account;
     }
 
-    if (throttle_schedule_timer(&backend->ts, &backend->tt, true) ||
+    if (throttle_schedule_timer(&backend->ts, &backend->tt, THROTTLE_WRITE) ||
         !QTAILQ_EMPTY(&backend->opinfos)) {
         QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next);
         return 0;
@@ -283,7 +284,7 @@ do_account:
         return ret;
     }
 
-    throttle_account(&backend->ts, true, ret);
+    throttle_account(&backend->ts, THROTTLE_WRITE, ret);
 
     return cryptodev_backend_operation(backend, op_info);
 }
@@ -341,8 +342,7 @@ static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field,
     if (!enabled) {
         throttle_init(&backend->ts);
         throttle_timers_init(&backend->tt, qemu_get_aio_context(),
-                             QEMU_CLOCK_REALTIME,
-                             cryptodev_backend_throttle_timer_cb, /* FIXME */
+                             QEMU_CLOCK_REALTIME, NULL,
                              cryptodev_backend_throttle_timer_cb, backend);
     }
 
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index b4335a80e6..361d4a8103 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -18,6 +18,8 @@
 #include "sysemu/hostmem.h"
 #include "qom/object_interfaces.h"
 #include "qom/object.h"
+#include "qapi/visitor.h"
+#include "qapi/qapi-visit-common.h"
 
 OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)
 
@@ -31,6 +33,7 @@ struct HostMemoryBackendFile {
     bool discard_data;
     bool is_pmem;
     bool readonly;
+    OnOffAuto rom;
 };
 
 static void
@@ -53,15 +56,39 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         return;
     }
 
+    switch (fb->rom) {
+    case ON_OFF_AUTO_AUTO:
+        /* Traditionally, opening the file readonly always resulted in ROM. */
+        fb->rom = fb->readonly ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+        break;
+    case ON_OFF_AUTO_ON:
+        if (!fb->readonly) {
+            error_setg(errp, "property 'rom' = 'on' is not supported with"
+                       " 'readonly' = 'off'");
+            return;
+        }
+        break;
+    case ON_OFF_AUTO_OFF:
+        if (fb->readonly && backend->share) {
+            error_setg(errp, "property 'rom' = 'off' is incompatible with"
+                       " 'readonly' = 'on' and 'share' = 'on'");
+            return;
+        }
+        break;
+    default:
+        assert(false);
+    }
+
     name = host_memory_backend_get_name(backend);
     ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
+    ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
     ram_flags |= RAM_NAMED_FILE;
     memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
                                      backend->size, fb->align, ram_flags,
-                                     fb->mem_path, fb->offset, fb->readonly,
-                                     errp);
+                                     fb->mem_path, fb->offset, errp);
     g_free(name);
 #endif
 }
@@ -201,6 +228,32 @@ static void file_memory_backend_set_readonly(Object *obj, bool value,
     fb->readonly = value;
 }
 
+static void file_memory_backend_get_rom(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
+    OnOffAuto rom = fb->rom;
+
+    visit_type_OnOffAuto(v, name, &rom, errp);
+}
+
+static void file_memory_backend_set_rom(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(errp, "cannot change property '%s' of %s.", name,
+                   object_get_typename(obj));
+        return;
+    }
+
+    visit_type_OnOffAuto(v, name, &fb->rom, errp);
+}
+
 static void file_backend_unparent(Object *obj)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -243,6 +296,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
     object_class_property_add_bool(oc, "readonly",
         file_memory_backend_get_readonly,
         file_memory_backend_set_readonly);
+    object_class_property_add(oc, "rom", "OnOffAuto",
+        file_memory_backend_get_rom, file_memory_backend_set_rom, NULL, NULL);
+    object_class_property_set_description(oc, "rom",
+        "Whether to create Read Only Memory (ROM)");
 }
 
 static void file_backend_instance_finalize(Object *o)
diff --git a/backends/tpm/tpm_ioctl.h b/backends/tpm/tpm_ioctl.h
index b1d31768a6..1933ab6855 100644
--- a/backends/tpm/tpm_ioctl.h
+++ b/backends/tpm/tpm_ioctl.h
@@ -238,7 +238,7 @@ struct ptm_lockstorage {
         } req; /* request */
         struct {
             ptm_res tpm_result;
-        } resp; /* reponse */
+        } resp; /* response */
     } u;
 };
 
diff --git a/backends/tpm/tpm_util.c b/backends/tpm/tpm_util.c
index a6e6d3e72f..1856589c3b 100644
--- a/backends/tpm/tpm_util.c
+++ b/backends/tpm/tpm_util.c
@@ -112,12 +112,8 @@ static int tpm_util_request(int fd,
                             void *response,
                             size_t responselen)
 {
-    fd_set readfds;
+    GPollFD fds[1] = { {.fd = fd, .events = G_IO_IN } };
     int n;
-    struct timeval tv = {
-        .tv_sec = 1,
-        .tv_usec = 0,
-    };
 
     n = write(fd, request, requestlen);
     if (n < 0) {
@@ -127,11 +123,8 @@ static int tpm_util_request(int fd,
         return -EFAULT;
     }
 
-    FD_ZERO(&readfds);
-    FD_SET(fd, &readfds);
-
     /* wait for a second */
-    n = select(fd + 1, &readfds, NULL, NULL, &tv);
+    n = RETRY_ON_EINTR(g_poll(fds, 1, 1000));
     if (n != 1) {
         return -errno;
     }
diff --git a/block.c b/block.c
index 0af890f647..e7f349b25c 100644
--- a/block.c
+++ b/block.c
@@ -91,9 +91,11 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
 static bool bdrv_recurse_has_child(BlockDriverState *bs,
                                    BlockDriverState *child);
 
-static void bdrv_replace_child_noperm(BdrvChild *child,
-                                      BlockDriverState *new_bs);
-static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
+static void GRAPH_WRLOCK
+bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs);
+
+static void GRAPH_WRLOCK
+bdrv_remove_child(BdrvChild *child, Transaction *tran);
 
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue,
@@ -415,7 +417,7 @@ BlockDriverState *bdrv_new(void)
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    qemu_co_mutex_init(&bs->reqs_lock);
+    qemu_mutex_init(&bs->reqs_lock);
     qemu_mutex_init(&bs->dirty_bitmap_mutex);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
@@ -661,8 +663,10 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
     blk = blk_co_new_open(filename, NULL, options,
                           BDRV_O_RDWR | BDRV_O_RESIZE, errp);
     if (!blk) {
-        error_prepend(errp, "Protocol driver '%s' does not support image "
-                      "creation, and opening the image failed: ",
+        error_prepend(errp, "Protocol driver '%s' does not support creating "
+                      "new images, so an existing image must be selected as "
+                      "the target; however, opening the given target as an "
+                      "existing image failed: ",
                       drv->format_name);
         return -EINVAL;
     }
@@ -1697,7 +1701,9 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
 open_failed:
     bs->drv = NULL;
     if (bs->file != NULL) {
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, bs->file);
+        bdrv_graph_wrunlock();
         assert(!bs->file);
     }
     g_free(bs->opaque);
@@ -2113,7 +2119,6 @@ static int bdrv_fill_options(QDict **options, const char *filename,
 
 typedef struct BlockReopenQueueEntry {
      bool prepared;
-     bool perms_checked;
      BDRVReopenState state;
      QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
 } BlockReopenQueueEntry;
@@ -2199,7 +2204,8 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
     return false;
 }
 
-static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
+static bool GRAPH_RDLOCK
+bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
 {
     BdrvChild *a, *b;
     GLOBAL_STATE_CODE();
@@ -2224,11 +2230,12 @@ static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
     return false;
 }
 
-static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
-                            BdrvChild *c, BdrvChildRole role,
-                            BlockReopenQueue *reopen_queue,
-                            uint64_t parent_perm, uint64_t parent_shared,
-                            uint64_t *nperm, uint64_t *nshared)
+static void GRAPH_RDLOCK
+bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
+                BdrvChild *c, BdrvChildRole role,
+                BlockReopenQueue *reopen_queue,
+                uint64_t parent_perm, uint64_t parent_shared,
+                uint64_t *nperm, uint64_t *nshared)
 {
     assert(bs->drv && bs->drv->bdrv_child_perm);
     GLOBAL_STATE_CODE();
@@ -2252,8 +2259,8 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
  * simplest way to satisfy this criteria: use only result of
  * bdrv_topological_dfs() or NULL as @list parameter.
  */
-static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
-                                    BlockDriverState *bs)
+static GSList * GRAPH_RDLOCK
+bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs)
 {
     BdrvChild *child;
     g_autoptr(GHashTable) local_found = NULL;
@@ -2316,7 +2323,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
     tran_add(tran, &bdrv_child_set_pem_drv, s);
 }
 
-static void bdrv_drv_set_perm_commit(void *opaque)
+static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque)
 {
     BlockDriverState *bs = opaque;
     uint64_t cumulative_perms, cumulative_shared_perms;
@@ -2329,7 +2336,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
     }
 }
 
-static void bdrv_drv_set_perm_abort(void *opaque)
+static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque)
 {
     BlockDriverState *bs = opaque;
     GLOBAL_STATE_CODE();
@@ -2344,9 +2351,13 @@ TransactionActionDrv bdrv_drv_set_perm_drv = {
     .commit = bdrv_drv_set_perm_commit,
 };
 
-static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
-                             uint64_t shared_perm, Transaction *tran,
-                             Error **errp)
+/*
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                  Transaction *tran, Error **errp)
 {
     GLOBAL_STATE_CODE();
     if (!bs->drv) {
@@ -2372,20 +2383,22 @@ typedef struct BdrvReplaceChildState {
     BlockDriverState *old_bs;
 } BdrvReplaceChildState;
 
-static void bdrv_replace_child_commit(void *opaque)
+static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque)
 {
     BdrvReplaceChildState *s = opaque;
     GLOBAL_STATE_CODE();
 
-    bdrv_unref(s->old_bs);
+    bdrv_schedule_unref(s->old_bs);
 }
 
-static void bdrv_replace_child_abort(void *opaque)
+static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque)
 {
     BdrvReplaceChildState *s = opaque;
     BlockDriverState *new_bs = s->child->bs;
 
     GLOBAL_STATE_CODE();
+    assert_bdrv_graph_writable();
+
     /* old_bs reference is transparently moved from @s to @s->child */
     if (!s->child->bs) {
         /*
@@ -2402,6 +2415,7 @@ static void bdrv_replace_child_abort(void *opaque)
     }
     assert(s->child->quiesced_parent);
     bdrv_replace_child_noperm(s->child, s->old_bs);
+
     bdrv_unref(new_bs);
 }
 
@@ -2419,10 +2433,14 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
  * kept drained until the transaction is completed.
  *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ *
  * The function doesn't update permissions, caller is responsible for this.
  */
-static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
-                                    Transaction *tran)
+static void GRAPH_WRLOCK
+bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
+                        Transaction *tran)
 {
     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
 
@@ -2438,6 +2456,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
     if (new_bs) {
         bdrv_ref(new_bs);
     }
+
     bdrv_replace_child_noperm(child, new_bs);
     /* old_bs reference is transparently moved from @child to @s */
 }
@@ -2445,9 +2464,13 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
 /*
  * Refresh permissions in @bs subtree. The function is intended to be called
  * after some graph modification that was done without permission update.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
  */
-static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                                  Transaction *tran, Error **errp)
+static int GRAPH_RDLOCK
+bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
+                       Transaction *tran, Error **errp)
 {
     BlockDriver *drv = bs->drv;
     BdrvChild *c;
@@ -2520,9 +2543,13 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
 /*
  * @list is a product of bdrv_topological_dfs() (may be called several times) -
  * a topologically sorted subgraph.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
  */
-static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q,
-                                 Transaction *tran, Error **errp)
+static int GRAPH_RDLOCK
+bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
+                      Error **errp)
 {
     int ret;
     BlockDriverState *bs;
@@ -2548,9 +2575,13 @@ static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q,
  * @list is any list of nodes. List is completed by all subtrees and
  * topologically sorted. It's not a problem if some node occurs in the @list
  * several times.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
  */
-static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
-                                   Transaction *tran, Error **errp)
+static int GRAPH_RDLOCK
+bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
+                        Error **errp)
 {
     g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
     g_autoptr(GSList) refresh_list = NULL;
@@ -2609,9 +2640,14 @@ char *bdrv_perm_names(uint64_t perm)
 }
 
 
-/* @tran is allowed to be NULL. In this case no rollback is possible */
-static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran,
-                              Error **errp)
+/*
+ * @tran is allowed to be NULL. In this case no rollback is possible.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp)
 {
     int ret;
     Transaction *local_tran = NULL;
@@ -2857,8 +2893,8 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
  * If @new_bs is non-NULL, the parent of @child must already be drained through
  * @child and the caller must hold the AioContext lock for @new_bs.
  */
-static void bdrv_replace_child_noperm(BdrvChild *child,
-                                      BlockDriverState *new_bs)
+static void GRAPH_WRLOCK
+bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
 {
     BlockDriverState *old_bs = child->bs;
     int new_bs_quiesce_counter;
@@ -2893,8 +2929,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
     }
 
-    /* TODO Pull this up into the callers to avoid polling here */
-    bdrv_graph_wrlock(new_bs);
     if (old_bs) {
         if (child->klass->detach) {
             child->klass->detach(child);
@@ -2910,7 +2944,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
             child->klass->attach(child);
         }
     }
-    bdrv_graph_wrunlock();
 
     /*
      * If the parent was drained through this BdrvChild previously, but new_bs
@@ -2945,12 +2978,14 @@ typedef struct BdrvAttachChildCommonState {
     AioContext *old_child_ctx;
 } BdrvAttachChildCommonState;
 
-static void bdrv_attach_child_common_abort(void *opaque)
+static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
 {
     BdrvAttachChildCommonState *s = opaque;
     BlockDriverState *bs = s->child->bs;
 
     GLOBAL_STATE_CODE();
+    assert_bdrv_graph_writable();
+
     bdrv_replace_child_noperm(s->child, NULL);
 
     if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
@@ -2975,7 +3010,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
         tran_commit(tran);
     }
 
-    bdrv_unref(bs);
+    bdrv_schedule_unref(bs);
     bdrv_child_free(s->child);
 }
 
@@ -2989,19 +3024,23 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
  *
  * Function doesn't update permissions, caller is responsible for this.
  *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ *
  * Returns new created child.
  *
  * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
  * @child_bs can move to a different AioContext in this function. Callers must
  * make sure that their AioContext locking is still correct after this.
  */
-static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
-                                           const char *child_name,
-                                           const BdrvChildClass *child_class,
-                                           BdrvChildRole child_role,
-                                           uint64_t perm, uint64_t shared_perm,
-                                           void *opaque,
-                                           Transaction *tran, Error **errp)
+static BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child_common(BlockDriverState *child_bs,
+                         const char *child_name,
+                         const BdrvChildClass *child_class,
+                         BdrvChildRole child_role,
+                         uint64_t perm, uint64_t shared_perm,
+                         void *opaque,
+                         Transaction *tran, Error **errp)
 {
     BdrvChild *new_child;
     AioContext *parent_ctx, *new_child_ctx;
@@ -3104,14 +3143,18 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
  * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
  * @child_bs can move to a different AioContext in this function. Callers must
  * make sure that their AioContext locking is still correct after this.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
  */
-static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs,
-                                           BlockDriverState *child_bs,
-                                           const char *child_name,
-                                           const BdrvChildClass *child_class,
-                                           BdrvChildRole child_role,
-                                           Transaction *tran,
-                                           Error **errp)
+static BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+                         BlockDriverState *child_bs,
+                         const char *child_name,
+                         const BdrvChildClass *child_class,
+                         BdrvChildRole child_role,
+                         Transaction *tran,
+                         Error **errp)
 {
     uint64_t perm, shared_perm;
 
@@ -3156,6 +3199,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
 
     GLOBAL_STATE_CODE();
 
+    bdrv_graph_wrlock(child_bs);
+
     child = bdrv_attach_child_common(child_bs, child_name, child_class,
                                    child_role, perm, shared_perm, opaque,
                                    tran, errp);
@@ -3168,6 +3213,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
 
 out:
     tran_finalize(tran, ret);
+    bdrv_graph_wrunlock();
 
     bdrv_unref(child_bs);
 
@@ -3213,7 +3259,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 out:
     tran_finalize(tran, ret);
 
-    bdrv_unref(child_bs);
+    bdrv_schedule_unref(child_bs);
 
     return ret < 0 ? NULL : child;
 }
@@ -3243,7 +3289,7 @@ void bdrv_root_unref_child(BdrvChild *child)
                                     NULL);
     }
 
-    bdrv_unref(child_bs);
+    bdrv_schedule_unref(child_bs);
 }
 
 typedef struct BdrvSetInheritsFrom {
@@ -3287,8 +3333,9 @@ static void bdrv_set_inherits_from(BlockDriverState *bs,
  * @root that point to @root, where necessary.
  * @tran is allowed to be NULL. In this case no rollback is possible
  */
-static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
-                                     Transaction *tran)
+static void GRAPH_WRLOCK
+bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
+                         Transaction *tran)
 {
     BdrvChild *c;
 
@@ -3325,7 +3372,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
 }
 
 
-static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
+static void GRAPH_RDLOCK
+bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
 {
     BdrvChild *c;
     GLOBAL_STATE_CODE();
@@ -3366,16 +3414,23 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
  * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
  * callers which don't need their own reference any more must call bdrv_unref().
  *
+ * If the respective child is already present (i.e. we're detaching a node),
+ * that child node must be drained.
+ *
  * Function doesn't update permissions, caller is responsible for this.
  *
  * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
  * @child_bs can move to a different AioContext in this function. Callers must
  * make sure that their AioContext locking is still correct after this.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
  */
-static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
-                                           BlockDriverState *child_bs,
-                                           bool is_backing,
-                                           Transaction *tran, Error **errp)
+static int GRAPH_WRLOCK
+bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
+                                BlockDriverState *child_bs,
+                                bool is_backing,
+                                Transaction *tran, Error **errp)
 {
     bool update_inherits_from =
         bdrv_inherits_from_recursive(child_bs, parent_bs);
@@ -3426,6 +3481,7 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
     }
 
     if (child) {
+        assert(child->bs->quiesce_counter);
         bdrv_unset_inherits_from(parent_bs, child, tran);
         bdrv_remove_child(child, tran);
     }
@@ -3452,9 +3508,7 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
     }
 
 out:
-    bdrv_graph_rdlock_main_loop();
     bdrv_refresh_limits(parent_bs, tran, NULL);
-    bdrv_graph_rdunlock_main_loop();
 
     return 0;
 }
@@ -3463,10 +3517,17 @@ out:
  * The caller must hold the AioContext lock for @backing_hd. Both @bs and
  * @backing_hd can move to a different AioContext in this function. Callers must
  * make sure that their AioContext locking is still correct after this.
+ *
+ * If a backing child is already present (i.e. we're detaching a node), that
+ * child node must be drained.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
  */
-static int bdrv_set_backing_noperm(BlockDriverState *bs,
-                                   BlockDriverState *backing_hd,
-                                   Transaction *tran, Error **errp)
+static int GRAPH_WRLOCK
+bdrv_set_backing_noperm(BlockDriverState *bs,
+                        BlockDriverState *backing_hd,
+                        Transaction *tran, Error **errp)
 {
     GLOBAL_STATE_CODE();
     return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
@@ -3481,6 +3542,10 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs,
 
     GLOBAL_STATE_CODE();
     assert(bs->quiesce_counter > 0);
+    if (bs->backing) {
+        assert(bs->backing->bs->quiesce_counter > 0);
+    }
+    bdrv_graph_wrlock(backing_hd);
 
     ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
     if (ret < 0) {
@@ -3490,18 +3555,22 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs,
     ret = bdrv_refresh_perms(bs, tran, errp);
 out:
     tran_finalize(tran, ret);
+    bdrv_graph_wrunlock();
     return ret;
 }
 
 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
                         Error **errp)
 {
+    BlockDriverState *drain_bs = bs->backing ? bs->backing->bs : bs;
     int ret;
     GLOBAL_STATE_CODE();
 
-    bdrv_drained_begin(bs);
+    bdrv_ref(drain_bs);
+    bdrv_drained_begin(drain_bs);
     ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
-    bdrv_drained_end(bs);
+    bdrv_drained_end(drain_bs);
+    bdrv_unref(drain_bs);
 
     return ret;
 }
@@ -3711,11 +3780,13 @@ BdrvChild *bdrv_open_child(const char *filename,
         return NULL;
     }
 
+    bdrv_graph_wrlock(NULL);
     ctx = bdrv_get_aio_context(bs);
     aio_context_acquire(ctx);
     child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
                               errp);
     aio_context_release(ctx);
+    bdrv_graph_wrunlock();
 
     return child;
 }
@@ -3900,6 +3971,9 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
     GLOBAL_STATE_CODE();
     assert(!qemu_in_coroutine());
 
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (reference) {
         bool options_non_empty = options ? qdict_size(options) : false;
         qobject_unref(options);
@@ -4539,7 +4613,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
      * reconfiguring the fd and that's why it does it in raw_check_perm(), not
      * in raw_reopen_prepare() which is called with "old" permissions.
      */
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
+    bdrv_graph_rdunlock_main_loop();
+
     if (ret < 0) {
         goto abort;
     }
@@ -4560,7 +4637,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
         aio_context_release(ctx);
     }
 
+    bdrv_graph_wrlock(NULL);
     tran_commit(tran);
+    bdrv_graph_wrunlock();
 
     QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
         BlockDriverState *bs = bs_entry->state.bs;
@@ -4577,7 +4656,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
     goto cleanup;
 
 abort:
+    bdrv_graph_wrlock(NULL);
     tran_abort(tran);
+    bdrv_graph_wrunlock();
+
     QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
         if (bs_entry->prepared) {
             ctx = bdrv_get_aio_context(bs_entry->state.bs);
@@ -4643,6 +4725,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
  * true and reopen_state->new_backing_bs contains a pointer to the new
  * backing BlockDriverState (or NULL).
  *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ *
  * Return 0 on success, otherwise return < 0 and set @errp.
  *
  * The caller must hold the AioContext lock of @reopen_state->bs.
@@ -4727,6 +4812,11 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
         reopen_state->old_file_bs = old_child_bs;
     }
 
+    if (old_child_bs) {
+        bdrv_ref(old_child_bs);
+        bdrv_drained_begin(old_child_bs);
+    }
+
     old_ctx = bdrv_get_aio_context(bs);
     ctx = bdrv_get_aio_context(new_child_bs);
     if (old_ctx != ctx) {
@@ -4734,14 +4824,23 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
         aio_context_acquire(ctx);
     }
 
+    bdrv_graph_wrlock(new_child_bs);
+
     ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
                                           tran, errp);
 
+    bdrv_graph_wrunlock();
+
     if (old_ctx != ctx) {
         aio_context_release(ctx);
         aio_context_acquire(old_ctx);
     }
 
+    if (old_child_bs) {
+        bdrv_drained_end(old_child_bs);
+        bdrv_unref(old_child_bs);
+    }
+
     return ret;
 }
 
@@ -4762,6 +4861,9 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
  * commit() for any other BDS that have been left in a prepare() state
  *
  * The caller must hold the AioContext lock of @reopen_state->bs.
+ *
+ * After calling this function, the transaction @change_child_tran may only be
+ * completed while holding a writer lock for the graph.
  */
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue,
@@ -5063,9 +5165,11 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv = NULL;
     }
 
+    bdrv_graph_wrlock(NULL);
     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
         bdrv_unref_child(bs, child);
     }
+    bdrv_graph_wrunlock();
 
     assert(!bs->backing);
     assert(!bs->file);
@@ -5120,7 +5224,7 @@ void bdrv_close_all(void)
     assert(QTAILQ_EMPTY(&all_bdrv_states));
 }
 
-static bool should_update_child(BdrvChild *c, BlockDriverState *to)
+static bool GRAPH_RDLOCK should_update_child(BdrvChild *c, BlockDriverState *to)
 {
     GQueue *queue;
     GHashTable *found;
@@ -5209,45 +5313,47 @@ static TransactionActionDrv bdrv_remove_child_drv = {
     .commit = bdrv_remove_child_commit,
 };
 
-/* Function doesn't update permissions, caller is responsible for this. */
-static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
+/*
+ * Function doesn't update permissions, caller is responsible for this.
+ *
+ * @child->bs (if non-NULL) must be drained.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ */
+static void GRAPH_WRLOCK bdrv_remove_child(BdrvChild *child, Transaction *tran)
 {
     if (!child) {
         return;
     }
 
     if (child->bs) {
-        BlockDriverState *bs = child->bs;
-        bdrv_drained_begin(bs);
+        assert(child->quiesced_parent);
         bdrv_replace_child_tran(child, NULL, tran);
-        bdrv_drained_end(bs);
     }
 
     tran_add(tran, &bdrv_remove_child_drv, child);
 }
 
-static void undrain_on_clean_cb(void *opaque)
-{
-    bdrv_drained_end(opaque);
-}
-
-static TransactionActionDrv undrain_on_clean = {
-    .clean = undrain_on_clean_cb,
-};
-
-static int bdrv_replace_node_noperm(BlockDriverState *from,
-                                    BlockDriverState *to,
-                                    bool auto_skip, Transaction *tran,
-                                    Error **errp)
+/*
+ * Both @from and @to (if non-NULL) must be drained. @to must be kept drained
+ * until the transaction is completed.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ */
+static int GRAPH_WRLOCK
+bdrv_replace_node_noperm(BlockDriverState *from,
+                         BlockDriverState *to,
+                         bool auto_skip, Transaction *tran,
+                         Error **errp)
 {
     BdrvChild *c, *next;
 
     GLOBAL_STATE_CODE();
 
-    bdrv_drained_begin(from);
-    bdrv_drained_begin(to);
-    tran_add(tran, &undrain_on_clean, from);
-    tran_add(tran, &undrain_on_clean, to);
+    assert(from->quiesce_counter);
+    assert(to->quiesce_counter);
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
@@ -5310,6 +5416,9 @@ static int bdrv_replace_node_common(BlockDriverState *from,
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
     assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
     bdrv_drained_begin(from);
+    bdrv_drained_begin(to);
+
+    bdrv_graph_wrlock(to);
 
     /*
      * Do the replacement without permission update.
@@ -5323,6 +5432,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
     }
 
     if (detach_subchain) {
+        /* to_cow_parent is already drained because from is drained */
         bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
     }
 
@@ -5338,7 +5448,9 @@ static int bdrv_replace_node_common(BlockDriverState *from,
 
 out:
     tran_finalize(tran, ret);
+    bdrv_graph_wrunlock();
 
+    bdrv_drained_end(to);
     bdrv_drained_end(from);
     bdrv_unref(from);
 
@@ -5388,6 +5500,22 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
     assert(!bs_new->backing);
 
     old_context = bdrv_get_aio_context(bs_top);
+    bdrv_drained_begin(bs_top);
+
+    /*
+     * bdrv_drained_begin() requires that only the AioContext of the drained
+     * node is locked, and at this point it can still differ from the AioContext
+     * of bs_top.
+     */
+    new_context = bdrv_get_aio_context(bs_new);
+    aio_context_release(old_context);
+    aio_context_acquire(new_context);
+    bdrv_drained_begin(bs_new);
+    aio_context_release(new_context);
+    aio_context_acquire(old_context);
+    new_context = NULL;
+
+    bdrv_graph_wrlock(bs_top);
 
     child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
                                      &child_of_bds, bdrv_backing_role(bs_new),
@@ -5398,10 +5526,9 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
     }
 
     /*
-     * bdrv_attach_child_noperm could change the AioContext of bs_top.
-     * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
-     * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
-     * that assumes the new lock is taken.
+     * bdrv_attach_child_noperm could change the AioContext of bs_top and
+     * bs_new, but at least they are in the same AioContext now. This is the
+     * AioContext that we need to lock for the rest of the function.
      */
     new_context = bdrv_get_aio_context(bs_top);
 
@@ -5419,9 +5546,11 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
 out:
     tran_finalize(tran, ret);
 
-    bdrv_graph_rdlock_main_loop();
     bdrv_refresh_limits(bs_top, NULL, NULL);
-    bdrv_graph_rdunlock_main_loop();
+    bdrv_graph_wrunlock();
+
+    bdrv_drained_end(bs_top);
+    bdrv_drained_end(bs_new);
 
     if (new_context && old_context != new_context) {
         aio_context_release(new_context);
@@ -5445,6 +5574,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
     bdrv_ref(old_bs);
     bdrv_drained_begin(old_bs);
     bdrv_drained_begin(new_bs);
+    bdrv_graph_wrlock(new_bs);
 
     bdrv_replace_child_tran(child, new_bs, tran);
 
@@ -5455,6 +5585,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
 
     tran_finalize(tran, ret);
 
+    bdrv_graph_wrunlock();
     bdrv_drained_end(old_bs);
     bdrv_drained_end(new_bs);
     bdrv_unref(old_bs);
@@ -5476,6 +5607,8 @@ static void bdrv_delete(BlockDriverState *bs)
 
     bdrv_close(bs);
 
+    qemu_mutex_destroy(&bs->reqs_lock);
+
     g_free(bs);
 }
 
@@ -5808,9 +5941,11 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
         backing_file_str = base->filename;
     }
 
+    bdrv_graph_rdlock_main_loop();
     QLIST_FOREACH(c, &top->parents, next_parent) {
         updated_children = g_slist_prepend(updated_children, c);
     }
+    bdrv_graph_rdunlock_main_loop();
 
     /*
      * It seems correct to pass detach_subchain=true here, but it triggers
@@ -6733,6 +6868,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp)
     BdrvDirtyBitmap *bm;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     if (!bs->drv)  {
         return -ENOMEDIUM;
@@ -6863,6 +6999,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
     uint64_t cumulative_perms, cumulative_shared_perms;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     if (!bs->drv) {
         return -ENOMEDIUM;
@@ -7041,6 +7178,23 @@ void bdrv_unref(BlockDriverState *bs)
     }
 }
 
+/*
+ * Release a BlockDriverState reference while holding the graph write lock.
+ *
+ * Calling bdrv_unref() directly is forbidden while holding the graph lock
+ * because bdrv_close() both involves polling and taking the graph lock
+ * internally. bdrv_schedule_unref() instead delays decreasing the refcount and
+ * possibly closing @bs until the graph lock is released.
+ */
+void bdrv_schedule_unref(BlockDriverState *bs)
+{
+    if (!bs) {
+        return;
+    }
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            (QEMUBHFunc *) bdrv_unref, bs);
+}
+
 struct BdrvOpBlocker {
     Error *reason;
     QLIST_ENTRY(BdrvOpBlocker) list;
@@ -7537,17 +7691,21 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
         return true;
     }
 
+    bdrv_graph_rdlock_main_loop();
     QLIST_FOREACH(c, &bs->parents, next_parent) {
         if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) {
+            bdrv_graph_rdunlock_main_loop();
             return false;
         }
     }
 
     QLIST_FOREACH(c, &bs->children, next) {
         if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) {
+            bdrv_graph_rdunlock_main_loop();
             return false;
         }
     }
+    bdrv_graph_rdunlock_main_loop();
 
     state = g_new(BdrvStateSetAioContext, 1);
     *state = (BdrvStateSetAioContext) {
@@ -7589,7 +7747,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
     /*
      * Recursion phase: go through all nodes of the graph.
      * Take care of checking that all nodes support changing AioContext
-     * and drain them, builing a linear list of callbacks to run if everything
+     * and drain them, building a linear list of callbacks to run if everything
      * is successful (the transaction itself).
      */
     tran = tran_new();
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index 3ea7141cb5..a0d70729bb 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -251,7 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
     ret = 0;
 fail_log:
     if (ret < 0) {
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, s->log_file);
+        bdrv_graph_wrunlock();
         s->log_file = NULL;
     }
 fail:
@@ -263,8 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs)
 {
     BDRVBlkLogWritesState *s = bs->opaque;
 
+    bdrv_graph_wrlock(NULL);
     bdrv_unref_child(bs, s->log_file);
     s->log_file = NULL;
+    bdrv_graph_wrunlock();
 }
 
 static int64_t coroutine_fn GRAPH_RDLOCK
diff --git a/block/blkverify.c b/block/blkverify.c
index 7326461f30..dae9716a26 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -151,8 +151,10 @@ static void blkverify_close(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
+    bdrv_graph_wrlock(NULL);
     bdrv_unref_child(bs, s->test_file);
     s->test_file = NULL;
+    bdrv_graph_wrunlock();
 }
 
 static int64_t coroutine_fn GRAPH_RDLOCK
diff --git a/block/block-backend.c b/block/block-backend.c
index 4009ed5fed..efe2e7cbf8 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -33,8 +33,6 @@
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-
 typedef struct BlockBackendAioNotifier {
     void (*attached_aio_context)(AioContext *new_context, void *opaque);
     void (*detach_aio_context)(void *opaque);
@@ -103,7 +101,6 @@ typedef struct BlockBackendAIOCB {
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
-    .get_aio_context = blk_aiocb_get_aio_context,
     .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
@@ -121,6 +118,10 @@ static QTAILQ_HEAD(, BlockBackend) block_backends =
 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
     QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
 
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                    Error **errp);
+
 static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
                                      int *child_flags, QDict *child_options,
                                      int parent_flags, QDict *parent_options)
@@ -186,7 +187,7 @@ static void blk_vm_state_changed(void *opaque, bool running, RunState state)
  *
  * If an error is returned, the VM cannot be allowed to be resumed.
  */
-static void blk_root_activate(BdrvChild *child, Error **errp)
+static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp)
 {
     BlockBackend *blk = child->opaque;
     Error *local_err = NULL;
@@ -207,7 +208,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
      */
     saved_shared_perm = blk->shared_perm;
 
-    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
+    blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         blk->disable_perm = true;
@@ -226,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
         return;
     }
 
-    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+    blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         blk->disable_perm = true;
@@ -259,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk)
     return blk->force_allow_inactivate;
 }
 
-static int blk_root_inactivate(BdrvChild *child)
+static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
@@ -911,7 +912,10 @@ void blk_remove_bs(BlockBackend *blk)
     blk_drain(blk);
     root = blk->root;
     blk->root = NULL;
+
+    bdrv_graph_wrlock(NULL);
     bdrv_root_unref_child(root);
+    bdrv_graph_wrunlock();
 }
 
 /*
@@ -953,8 +957,9 @@ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
 /*
  * Sets the permission bitmasks that the user of the BlockBackend needs.
  */
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                    Error **errp)
 {
     int ret;
     GLOBAL_STATE_CODE();
@@ -972,6 +977,15 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
     return 0;
 }
 
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp)
+{
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    return blk_set_perm_locked(blk, perm, shared_perm, errp);
+}
+
 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
 {
     GLOBAL_STATE_CODE();
@@ -1341,7 +1355,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
     /* throttling disk I/O */
     if (blk->public.throttle_group_member.throttle_state) {
         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, false);
+                bytes, THROTTLE_READ);
     }
 
     ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
@@ -1415,7 +1429,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
     /* throttling disk I/O */
     if (blk->public.throttle_group_member.throttle_state) {
         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, true);
+                bytes, THROTTLE_WRITE);
     }
 
     if (!blk->enable_write_cache) {
@@ -1533,7 +1547,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
     acb->blk = blk;
     acb->ret = ret;
 
-    replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+    replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                      error_callback_bh, acb);
     return &acb->common;
 }
@@ -1545,16 +1559,8 @@ typedef struct BlkAioEmAIOCB {
     bool has_returned;
 } BlkAioEmAIOCB;
 
-static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
-{
-    BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
-
-    return blk_get_aio_context(acb->rwco.blk);
-}
-
 static const AIOCBInfo blk_aio_em_aiocb_info = {
     .aiocb_size         = sizeof(BlkAioEmAIOCB),
-    .get_aio_context    = blk_aio_em_aiocb_get_aio_context,
 };
 
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
@@ -1595,11 +1601,11 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(co_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
+    aio_co_enter(qemu_get_current_aio_context(), co);
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                          blk_aio_complete_bh, acb);
     }
 
@@ -1901,11 +1907,11 @@ BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
+    aio_co_enter(qemu_get_current_aio_context(), co);
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                          blk_aio_complete_bh, acb);
     }
 
@@ -1942,11 +1948,11 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
+    aio_co_enter(qemu_get_current_aio_context(), co);
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                          blk_aio_complete_bh, acb);
     }
 
@@ -1982,10 +1988,10 @@ BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
+    aio_co_enter(qemu_get_current_aio_context(), co);
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                          blk_aio_complete_bh, acb);
     }
 
@@ -2434,12 +2440,6 @@ AioContext *blk_get_aio_context(BlockBackend *blk)
     return blk->ctx;
 }
 
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
-{
-    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
-    return blk_get_aio_context(blk_acb->blk);
-}
-
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
                         Error **errp)
 {
diff --git a/block/block-copy.c b/block/block-copy.c
index e13d7bc6b6..1c60368d72 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -67,7 +67,7 @@ typedef struct BlockCopyCallState {
     QLIST_ENTRY(BlockCopyCallState) list;
 
     /*
-     * Fields that report information about return values and erros.
+     * Fields that report information about return values and errors.
      * Protected by lock in BlockCopyState.
      */
     bool error_is_read;
@@ -462,7 +462,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
  * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
  * s->len only to cover last cluster when s->len is not aligned to clusters.
  *
- * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ * No sync here: neither bitmap nor intersecting requests handling, only copy.
  *
  * @method is an in-out argument, so that copy_range can be either extended to
  * a full-size buffer or disabled if the copy_range attempt fails.  The output
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index b866e42271..aeaff3bb82 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -341,11 +341,11 @@ static void cbw_refresh_filename(BlockDriverState *bs)
             bs->file->bs->filename);
 }
 
-static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
-                           BdrvChildRole role,
-                           BlockReopenQueue *reopen_queue,
-                           uint64_t perm, uint64_t shared,
-                           uint64_t *nperm, uint64_t *nshared)
+static void GRAPH_RDLOCK
+cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+               BlockReopenQueue *reopen_queue,
+               uint64_t perm, uint64_t shared,
+               uint64_t *nperm, uint64_t *nshared)
 {
     if (!(role & BDRV_CHILD_FILTERED)) {
         /*
@@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs)
     s->bcs = NULL;
 }
 
-BlockDriver bdrv_cbw_filter = {
+static BlockDriver bdrv_cbw_filter = {
     .format_name = "copy-before-write",
     .instance_size = sizeof(BDRVCopyBeforeWriteState),
 
diff --git a/block/crypto.c b/block/crypto.c
index 6ee8d46d30..c9c9a39fa3 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -777,7 +777,7 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
     return spec_info;
 }
 
-static int
+static int GRAPH_RDLOCK
 block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
 {
     BlockCrypto *crypto = bs->opaque;
@@ -793,7 +793,7 @@ block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
     return ret;
 }
 
-static void
+static void GRAPH_RDLOCK
 block_crypto_amend_cleanup(BlockDriverState *bs)
 {
     BlockCrypto *crypto = bs->opaque;
@@ -841,6 +841,8 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
     QCryptoBlockAmendOptions *amend_options = NULL;
     int ret = -EINVAL;
 
+    assume_graph_lock(); /* FIXME */
+
     assert(crypto);
     assert(crypto->block);
 
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 83b05548e7..172f73cef4 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -138,7 +138,7 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
 
     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
                        on_vduse_vq_kick, NULL, NULL, NULL, vq);
-    /* Make sure we don't miss any kick afer reconnecting */
+    /* Make sure we don't miss any kick after reconnecting */
     eventfd_write(vduse_queue_get_fd(vq), 1);
 }
 
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index f7b5073605..fe2cee3a78 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -1,5 +1,5 @@
 /*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
  *
  * Parts of the code based on nbd/server.c.
  *
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
index fcf46fc8a5..77fb5c0131 100644
--- a/block/export/vhost-user-blk-server.h
+++ b/block/export/vhost-user-blk-server.h
@@ -1,5 +1,5 @@
 /*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
  *
  * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
  * Copyright (c) 2020 Red Hat, Inc.
diff --git a/block/file-posix.c b/block/file-posix.c
index b16e9c21a1..50e2b20d5c 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1159,9 +1159,9 @@ static int raw_reopen_prepare(BDRVReopenState *state,
      * As part of reopen prepare we also want to create new fd by
      * raw_reconfigure_getfd(). But it wants updated "perm", when in
      * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
-     * permission update. Happily, permission update is always a part (a seprate
-     * stage) of bdrv_reopen_multiple() so we can rely on this fact and
-     * reconfigure fd in raw_check_perm().
+     * permission update. Happily, permission update is always a part
+     * (a separate stage) of bdrv_reopen_multiple() so we can rely on this
+     * fact and reconfigure fd in raw_check_perm().
      */
 
     s->reopen_state = state;
@@ -1412,11 +1412,9 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
     BlockZoneModel zoned;
     int ret;
 
-    bs->bl.zoned = BLK_Z_NONE;
-
     ret = get_sysfs_zoned_model(st, &zoned);
     if (ret < 0 || zoned == BLK_Z_NONE) {
-        return;
+        goto no_zoned;
     }
     bs->bl.zoned = zoned;
 
@@ -1437,10 +1435,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Unable to read chunk_sectors "
                                      "sysfs attribute");
-        return;
+        goto no_zoned;
     } else if (!ret) {
         error_setg(errp, "Read 0 from chunk_sectors sysfs attribute");
-        return;
+        goto no_zoned;
     }
     bs->bl.zone_size = ret << BDRV_SECTOR_BITS;
 
@@ -1448,10 +1446,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Unable to read nr_zones "
                                      "sysfs attribute");
-        return;
+        goto no_zoned;
     } else if (!ret) {
         error_setg(errp, "Read 0 from nr_zones sysfs attribute");
-        return;
+        goto no_zoned;
     }
     bs->bl.nr_zones = ret;
 
@@ -1472,10 +1470,15 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
     ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "report wps failed");
-        bs->wps = NULL;
-        return;
+        goto no_zoned;
     }
     qemu_co_mutex_init(&bs->wps->colock);
+    return;
+
+no_zoned:
+    bs->bl.zoned = BLK_Z_NONE;
+    g_free(bs->wps);
+    bs->wps = NULL;
 }
 #else /* !defined(CONFIG_BLKZONED) */
 static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
@@ -2452,9 +2455,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
     if (fd_open(bs) < 0)
         return -EIO;
 #if defined(CONFIG_BLKZONED)
-    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
         qemu_co_mutex_lock(&bs->wps->colock);
-        if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
+        if (type & QEMU_AIO_ZONE_APPEND) {
             int index = offset / bs->bl.zone_size;
             offset = bs->wps->wp[index];
         }
@@ -2502,11 +2506,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
 
 out:
 #if defined(CONFIG_BLKZONED)
-{
-    BlockZoneWps *wps = bs->wps;
-    if (ret == 0) {
-        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
-            && wps && bs->bl.zone_size) {
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
+        BlockZoneWps *wps = bs->wps;
+        if (ret == 0) {
             uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
             if (!BDRV_ZT_IS_CONV(*wp)) {
                 if (type & QEMU_AIO_ZONE_APPEND) {
@@ -2519,17 +2522,12 @@ out:
                     *wp = offset + bytes;
                 }
             }
-        }
-    } else {
-        if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
+        } else {
             update_zones_wp(bs, s->fd, 0, 1);
         }
-    }
 
-    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
         qemu_co_mutex_unlock(&wps->colock);
     }
-}
 #endif
     return ret;
 }
@@ -3374,7 +3372,7 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
  * of an array of zone descriptors.
  * zones is an array of zone descriptors to hold zone information on reply;
  * offset can be any byte within the entire size of the device;
- * nr_zones is the maxium number of sectors the command should operate on.
+ * nr_zones is the maximum number of sectors the command should operate on.
  */
 #if defined(CONFIG_BLKZONED)
 static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset,
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01ae8..58a799065f 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -95,7 +95,7 @@ static uint32_t reader_count(void)
 
     QEMU_LOCK_GUARD(&aio_context_list_lock);
 
-    /* rd can temporarly be negative, but the total will *always* be >= 0 */
+    /* rd can temporarily be negative, but the total will *always* be >= 0 */
     rd = orphaned_reader_count;
     QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) {
         rd += qatomic_read(&brdv_graph->reader_count);
@@ -163,17 +163,29 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
 void bdrv_graph_wrunlock(void)
 {
     GLOBAL_STATE_CODE();
-    QEMU_LOCK_GUARD(&aio_context_list_lock);
     assert(qatomic_read(&has_writer));
 
-    /*
-     * No need for memory barriers, this works in pair with
-     * the slow path of rdlock() and both take the lock.
-     */
-    qatomic_store_release(&has_writer, 0);
+    WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
+        /*
+         * No need for memory barriers, this works in pair with
+         * the slow path of rdlock() and both take the lock.
+         */
+        qatomic_store_release(&has_writer, 0);
 
-    /* Wake up all coroutine that are waiting to read the graph */
-    qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+        /* Wake up all coroutines that are waiting to read the graph */
+        qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+    }
+
+    /*
+     * Run any BHs that were scheduled during the wrlock section and that
+     * callers might expect to have finished (in particular, this is important
+     * for bdrv_schedule_unref()).
+     *
+     * Do this only after restarting coroutines so that nested event loops in
+     * BHs don't deadlock if their condition relies on the coroutine making
+     * progress.
+     */
+    aio_bh_poll(qemu_get_aio_context());
 }
 
 void coroutine_fn bdrv_graph_co_rdlock(void)
diff --git a/block/io.c b/block/io.c
index 76e7df18d8..209a6da0c8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -342,7 +342,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
      * timer callback), it is a bug in the caller that should be fixed. */
     assert(data.done);
 
-    /* Reaquire the AioContext of bs if we dropped it */
+    /* Reacquire the AioContext of bs if we dropped it */
     if (ctx != co_ctx) {
         aio_context_acquire(ctx);
     }
@@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
         qatomic_dec(&req->bs->serialising_in_flight);
     }
 
-    qemu_co_mutex_lock(&req->bs->reqs_lock);
+    qemu_mutex_lock(&req->bs->reqs_lock);
     QLIST_REMOVE(req, list);
+    qemu_mutex_unlock(&req->bs->reqs_lock);
+
+    /*
+     * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
+     * anymore because the request has been removed from the list, so it's safe
+     * to restart the queue outside reqs_lock to minimize the critical section.
+     */
     qemu_co_queue_restart_all(&req->wait_queue);
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
 }
 
 /**
@@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
 
     qemu_co_queue_init(&req->wait_queue);
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 }
 
 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
@@ -787,9 +793,9 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
         return;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     bdrv_wait_serialising_requests_locked(self);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 }
 
 void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
@@ -797,12 +803,12 @@ void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
 {
     IO_CODE();
 
-    qemu_co_mutex_lock(&req->bs->reqs_lock);
+    qemu_mutex_lock(&req->bs->reqs_lock);
 
     tracked_request_set_serialising(req, align);
     bdrv_wait_serialising_requests_locked(req);
 
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
+    qemu_mutex_unlock(&req->bs->reqs_lock);
 }
 
 int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
@@ -2944,25 +2950,18 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 /**************************************************************/
 /* async I/Os */
 
+/**
+ * Synchronously cancels an acb. Must be called with the BQL held and the acb
+ * must be processed with the BQL held too (IOThreads are not allowed).
+ *
+ * Use bdrv_aio_cancel_async() instead when possible.
+ */
 void bdrv_aio_cancel(BlockAIOCB *acb)
 {
-    IO_CODE();
+    GLOBAL_STATE_CODE();
     qemu_aio_ref(acb);
     bdrv_aio_cancel_async(acb);
-    while (acb->refcnt > 1) {
-        if (acb->aiocb_info->get_aio_context) {
-            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
-        } else if (acb->bs) {
-            /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
-             * assert that we're not using an I/O thread.  Thread-safe
-             * code should use bdrv_aio_cancel_async exclusively.
-             */
-            assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
-            aio_poll(bdrv_get_aio_context(acb->bs), true);
-        } else {
-            abort();
-        }
-    }
+    AIO_WAIT_WHILE_UNLOCKED(NULL, acb->refcnt > 1);
     qemu_aio_unref(acb);
 }
 
@@ -2996,7 +2995,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
         goto early_exit;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     current_gen = qatomic_read(&bs->write_gen);
 
     /* Wait until any previous flushes are completed */
@@ -3006,7 +3005,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 
     /* Flushes reach this point in nondecreasing current_gen order.  */
     bs->active_flush_req = true;
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
@@ -3094,11 +3093,11 @@ out:
         bs->flushed_gen = current_gen;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     bs->active_flush_req = false;
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 
 early_exit:
     bdrv_dec_in_flight(bs);
diff --git a/block/iscsi.c b/block/iscsi.c
index 34f97ab646..5640c8b565 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
         return NULL;
     }
 
+    /* Must use malloc(): this is freed via scsi_free_scsi_task() */
     acb->task = malloc(sizeof(struct scsi_task));
     if (acb->task == NULL) {
         error_report("iSCSI: Failed to allocate task for scsi command. %s",
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 561c71a9ae..1a51503271 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -227,7 +227,7 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 
     /* If we are nested we have to notify the level above that we are done
      * by setting event_max to zero, upper level will then jump out of it's
-     * own `for` loop.  If we are the last all counters droped to zero. */
+     * own `for` loop.  If we are the last all counters dropped to zero. */
     s->event_max = 0;
     s->event_idx = 0;
 }
diff --git a/block/meson.build b/block/meson.build
index 529fc172c6..f351b9d0d3 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,41 +4,41 @@ block_ss.add(files(
   'aio_task.c',
   'amend.c',
   'backup.c',
-  'copy-before-write.c',
   'blkdebug.c',
   'blklogwrites.c',
   'blkverify.c',
   'block-backend.c',
   'block-copy.c',
-  'graph-lock.c',
   'commit.c',
+  'copy-before-write.c',
   'copy-on-read.c',
-  'preallocate.c',
-  'progress_meter.c',
   'create.c',
   'crypto.c',
   'dirty-bitmap.c',
   'filter-compress.c',
+  'graph-lock.c',
   'io.c',
   'mirror.c',
   'nbd.c',
   'null.c',
   'plug.c',
+  'preallocate.c',
+  'progress_meter.c',
   'qapi.c',
+  'qcow2.c',
   'qcow2-bitmap.c',
   'qcow2-cache.c',
   'qcow2-cluster.c',
   'qcow2-refcount.c',
   'qcow2-snapshot.c',
   'qcow2-threads.c',
-  'qcow2.c',
   'quorum.c',
   'raw-format.c',
   'reqlist.c',
   'snapshot.c',
   'snapshot-access.c',
-  'throttle-groups.c',
   'throttle.c',
+  'throttle-groups.c',
   'write-threshold.c',
 ), zstd, zlib, gnutls)
 
diff --git a/block/mirror.c b/block/mirror.c
index e213a892db..3cc0757a03 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -502,7 +502,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
     job_pause_point(&s->common.job);
 
-    /* Find the number of consective dirty chunks following the first dirty
+    /* Find the number of consecutive dirty chunks following the first dirty
      * one, and wait for in flight requests in them. */
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     while (nb_chunks * s->granularity < s->buf_size) {
@@ -702,8 +702,12 @@ static int mirror_exit_common(Job *job)
      * mirror_top_bs from now on, so keep it drained. */
     bdrv_drained_begin(mirror_top_bs);
     bs_opaque->stop = true;
+
+    bdrv_graph_rdlock_main_loop();
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
+    bdrv_graph_rdunlock_main_loop();
+
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
         BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
@@ -1670,6 +1674,8 @@ static BlockJob *mirror_start_job(
     uint64_t target_perms, target_shared_perms;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
     }
@@ -1906,8 +1912,10 @@ fail:
     }
 
     bs_opaque->stop = true;
+    bdrv_graph_rdlock_main_loop();
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
+    bdrv_graph_rdunlock_main_loop();
     bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
     bdrv_unref(mirror_top_bs);
diff --git a/block/nbd.c b/block/nbd.c
index 5322e66166..cc48580df7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -352,7 +352,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
     }
 
     qio_channel_set_blocking(s->ioc, false, NULL);
-    qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs));
+    qio_channel_set_follow_coroutine_ctx(s->ioc, true);
 
     /* successfully connected */
     WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
@@ -397,7 +397,6 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s)
 
     /* Finalize previous connection if any */
     if (s->ioc) {
-        qio_channel_detach_aio_context(s->ioc);
         yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
                                  nbd_yank, s->bs);
         object_unref(OBJECT(s->ioc));
@@ -2089,10 +2088,6 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
      * the reconnect_delay_timer cannot be active here.
      */
     assert(!s->reconnect_delay_timer);
-
-    if (s->ioc) {
-        qio_channel_attach_aio_context(s->ioc, new_context);
-    }
 }
 
 static void nbd_detach_aio_context(BlockDriverState *bs)
@@ -2101,10 +2096,6 @@ static void nbd_detach_aio_context(BlockDriverState *bs)
 
     assert(!s->open_timer);
     assert(!s->reconnect_delay_timer);
-
-    if (s->ioc) {
-        qio_channel_detach_aio_context(s->ioc);
-    }
 }
 
 static BlockDriver bdrv_nbd = {
diff --git a/block/parallels.c b/block/parallels.c
index 18e34aef28..d026ce9e2f 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
     return MIN(nb_sectors, ret);
 }
 
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+    off -= s->data_start << BDRV_SECTOR_BITS;
+    return off / s->cluster_size;
+}
+
 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
                             int nb_sectors, int *pnum)
 {
@@ -172,13 +178,82 @@ static void parallels_set_bat_entry(BDRVParallelsState *s,
     bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
 }
 
+static int mark_used(BlockDriverState *bs, unsigned long *bitmap,
+                     uint32_t bitmap_size, int64_t off, uint32_t count)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t cluster_index = host_cluster_index(s, off);
+    unsigned long next_used;
+    if (cluster_index + count > bitmap_size) {
+        return -E2BIG;
+    }
+    next_used = find_next_bit(bitmap, bitmap_size, cluster_index);
+    if (next_used < cluster_index + count) {
+        return -EBUSY;
+    }
+    bitmap_set(bitmap, cluster_index, count);
+    return 0;
+}
+
+/*
+ * Collect used bitmap. The image can contain errors, we should fill the
+ * bitmap anyway, as much as we can. This information will be used for
+ * error resolution.
+ */
+static int parallels_fill_used_bitmap(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t payload_bytes;
+    uint32_t i;
+    int err = 0;
+
+    payload_bytes = bdrv_getlength(bs->file->bs);
+    if (payload_bytes < 0) {
+        return payload_bytes;
+    }
+    payload_bytes -= s->data_start * BDRV_SECTOR_SIZE;
+    if (payload_bytes < 0) {
+        return -EINVAL;
+    }
+
+    s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
+    if (s->used_bmap_size == 0) {
+        return 0;
+    }
+    s->used_bmap = bitmap_try_new(s->used_bmap_size);
+    if (s->used_bmap == NULL) {
+        return -ENOMEM;
+    }
+
+    for (i = 0; i < s->bat_size; i++) {
+        int err2;
+        int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1);
+        if (err2 < 0 && err == 0) {
+            err = err2;
+        }
+    }
+    return err;
+}
+
+static void parallels_free_used_bitmap(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    s->used_bmap_size = 0;
+    g_free(s->used_bmap);
+}
+
 static int64_t coroutine_fn GRAPH_RDLOCK
 allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                   int nb_sectors, int *pnum)
 {
     int ret = 0;
     BDRVParallelsState *s = bs->opaque;
-    int64_t pos, space, idx, to_allocate, i, len;
+    int64_t i, pos, idx, to_allocate, first_free, host_off;
 
     pos = block_status(s, sector_num, nb_sectors, pnum);
     if (pos > 0) {
@@ -188,7 +263,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
     idx = sector_num / s->tracks;
     to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
 
-    /* This function is called only by parallels_co_writev(), which will never
+    /*
+     * This function is called only by parallels_co_writev(), which will never
      * pass a sector_num at or beyond the end of the image (because the block
      * layer never passes such a sector_num to that function). Therefore, idx
      * is always below s->bat_size.
@@ -196,24 +272,25 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
      * exceed the image end. Therefore, idx + to_allocate cannot exceed
      * s->bat_size.
      * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
-     * will always fit into a uint32_t. */
+     * will always fit into a uint32_t.
+     */
     assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
 
-    space = to_allocate * s->tracks;
-    len = bdrv_co_getlength(bs->file->bs);
-    if (len < 0) {
-        return len;
-    }
-    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
-        space += s->prealloc_size;
+    first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
+    if (first_free == s->used_bmap_size) {
+        uint32_t new_usedsize;
+        int64_t bytes = to_allocate * s->cluster_size;
+        bytes += s->prealloc_size * BDRV_SECTOR_SIZE;
+
+        host_off = s->data_end * BDRV_SECTOR_SIZE;
+
         /*
          * We require the expanded size to read back as zero. If the
          * user permitted truncation, we try that; but if it fails, we
          * force the safer-but-slower fallocate.
          */
         if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
-            ret = bdrv_co_truncate(bs->file,
-                                   (s->data_end + space) << BDRV_SECTOR_BITS,
+            ret = bdrv_co_truncate(bs->file, host_off + bytes,
                                    false, PREALLOC_MODE_OFF,
                                    BDRV_REQ_ZERO_WRITE, NULL);
             if (ret == -ENOTSUP) {
@@ -221,22 +298,53 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
             }
         }
         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_co_pwrite_zeroes(bs->file,
-                                        s->data_end << BDRV_SECTOR_BITS,
-                                        space << BDRV_SECTOR_BITS, 0);
+            ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0);
         }
         if (ret < 0) {
             return ret;
         }
+
+        new_usedsize = s->used_bmap_size + bytes / s->cluster_size;
+        s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
+                                          new_usedsize);
+        s->used_bmap_size = new_usedsize;
+    } else {
+        int64_t next_used;
+        next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
+
+        /* Not enough continuous clusters in the middle, adjust the size */
+        if (next_used - first_free < to_allocate) {
+            to_allocate = next_used - first_free;
+            *pnum = (idx + to_allocate) * s->tracks - sector_num;
+        }
+
+        host_off = s->data_start * BDRV_SECTOR_SIZE;
+        host_off += first_free * s->cluster_size;
+
+        /*
+         * No need to preallocate if we are using tail area from the above
+         * branch. In the other case we are likely re-using hole. Preallocate
+         * the space if required by the prealloc_mode.
+         */
+        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
+                host_off < s->data_end * BDRV_SECTOR_SIZE) {
+            ret = bdrv_co_pwrite_zeroes(bs->file, host_off,
+                                        s->cluster_size * to_allocate, 0);
+            if (ret < 0) {
+                return ret;
+            }
+        }
     }
 
-    /* Try to read from backing to fill empty clusters
+    /*
+     * Try to read from backing to fill empty clusters
      * FIXME: 1. previous write_zeroes may be redundant
      *        2. most of data we read from backing will be rewritten by
      *           parallels_co_writev. On aligned-to-cluster write we do not need
      *           this read at all.
      *        3. it would be good to combine write of data from backing and new
-     *           data into one write call */
+     *           data into one write call.
+     */
     if (bs->backing) {
         int64_t nb_cow_sectors = to_allocate * s->tracks;
         int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
@@ -257,9 +365,18 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
         }
     }
 
+    ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate);
+    if (ret < 0) {
+        /* Image consistency is broken. Alarm! */
+        return ret;
+    }
     for (i = 0; i < to_allocate; i++) {
-        parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
-        s->data_end += s->tracks;
+        parallels_set_bat_entry(s, idx + i,
+                host_off / BDRV_SECTOR_SIZE / s->off_multiplier);
+        host_off += s->cluster_size;
+    }
+    if (host_off > s->data_end * BDRV_SECTOR_SIZE) {
+        s->data_end = host_off / BDRV_SECTOR_SIZE;
     }
 
     return bat2sect(s, idx) + sector_num % s->tracks;
@@ -420,6 +537,64 @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     return ret;
 }
 
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    int ret = 0;
+    uint32_t cluster, count;
+    BDRVParallelsState *s = bs->opaque;
+
+    /*
+     * The image does not support ZERO mark inside the BAT, which means that
+     * stale data could be exposed from the backing file.
+     */
+    if (bs->backing) {
+        return -ENOTSUP;
+    }
+
+    if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) {
+        return -ENOTSUP;
+    } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) {
+        return -ENOTSUP;
+    }
+
+    cluster = offset / s->cluster_size;
+    count = bytes / s->cluster_size;
+
+    qemu_co_mutex_lock(&s->lock);
+    for (; count > 0; cluster++, count--) {
+        int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size);
+        if (ret < 0) {
+            goto done;
+        }
+
+        parallels_set_bat_entry(s, cluster, 0);
+        bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1);
+    }
+done:
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           BdrvRequestFlags flags)
+{
+    /*
+     * The zero flag is missed in the Parallels format specification. We can
+     * resort to discard if we have no backing file (this condition is checked
+     * inside parallels_co_pdiscard().
+     */
+    return parallels_co_pdiscard(bs, offset, bytes);
+}
+
+
 static void parallels_check_unclean(BlockDriverState *bs,
                                     BdrvCheckResult *res,
                                     BdrvCheckMode fix)
@@ -440,6 +615,91 @@ static void parallels_check_unclean(BlockDriverState *bs,
     }
 }
 
+/*
+ * Returns true if data_off is correct, otherwise false. In both cases
+ * correct_offset is set to the proper value.
+ */
+static bool parallels_test_data_off(BDRVParallelsState *s,
+                                    int64_t file_nb_sectors,
+                                    uint32_t *correct_offset)
+{
+    uint32_t data_off, min_off;
+    bool old_magic;
+
+    /*
+     * There are two slightly different image formats: with "WithoutFreeSpace"
+     * or "WithouFreSpacExt" magic words. Call the first one as "old magic".
+     * In such images data_off field can be zero. In this case the offset is
+     * calculated as the end of BAT table plus some padding to ensure sector
+     * size alignment.
+     */
+    old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16);
+
+    min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+    if (!old_magic) {
+        min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE);
+    }
+
+    if (correct_offset) {
+        *correct_offset = min_off;
+    }
+
+    data_off = le32_to_cpu(s->header->data_off);
+    if (data_off == 0 && old_magic) {
+        return true;
+    }
+
+    if (data_off < min_off || data_off > file_nb_sectors) {
+        return false;
+    }
+
+    if (correct_offset) {
+        *correct_offset = data_off;
+    }
+
+    return true;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res,
+                         BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t file_size;
+    uint32_t data_off;
+
+    file_size = bdrv_co_nb_sectors(bs->file->bs);
+    if (file_size < 0) {
+        res->check_errors++;
+        return file_size;
+    }
+
+    if (parallels_test_data_off(s, file_size, &data_off)) {
+        return 0;
+    }
+
+    res->corruptions++;
+    if (fix & BDRV_FIX_ERRORS) {
+        int err;
+        s->header->data_off = cpu_to_le32(data_off);
+        s->data_start = data_off;
+
+        parallels_free_used_bitmap(bs);
+        err = parallels_fill_used_bitmap(bs);
+        if (err == -ENOMEM) {
+            res->check_errors++;
+            return err;
+        }
+
+        res->corruptions_fixed++;
+    }
+
+    fprintf(stderr, "%s data_off field has incorrect value\n",
+            fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+
+    return 0;
+}
+
 static int coroutine_fn GRAPH_RDLOCK
 parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
                               BdrvCheckMode fix)
@@ -484,13 +744,13 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
 
 static int coroutine_fn GRAPH_RDLOCK
 parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
-                     BdrvCheckMode fix)
+                     BdrvCheckMode fix, bool explicit)
 {
     BDRVParallelsState *s = bs->opaque;
     int64_t size;
     int ret;
 
-    size = bdrv_getlength(bs->file->bs);
+    size = bdrv_co_getlength(bs->file->bs);
     if (size < 0) {
         res->check_errors++;
         return size;
@@ -499,10 +759,13 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
     if (size > res->image_end_offset) {
         int64_t count;
         count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
-        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
-                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
-                size - res->image_end_offset);
-        res->leaks += count;
+        if (explicit) {
+            fprintf(stderr,
+                    "%s space leaked at the end of the image %" PRId64 "\n",
+                    fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+                    size - res->image_end_offset);
+            res->leaks += count;
+        }
         if (fix & BDRV_FIX_LEAKS) {
             Error *local_err = NULL;
 
@@ -517,13 +780,149 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
                 res->check_errors++;
                 return ret;
             }
-            res->leaks_fixed += count;
+            if (explicit) {
+                res->leaks_fixed += count;
+            }
         }
     }
 
     return 0;
 }
 
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res,
+                          BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t host_off, host_sector, guest_sector;
+    unsigned long *bitmap;
+    uint32_t i, bitmap_size, bat_entry;
+    int n, ret = 0;
+    uint64_t *buf = NULL;
+    bool fixed = false;
+
+    /*
+     * Create a bitmap of used clusters.
+     * If a bit is set, there is a BAT entry pointing to this cluster.
+     * Loop through the BAT entries, check bits relevant to an entry offset.
+     * If bit is set, this entry is duplicated. Otherwise set the bit.
+     *
+     * We shouldn't worry about newly allocated clusters outside the image
+     * because they are created higher then any existing cluster pointed by
+     * a BAT entry.
+     */
+    bitmap_size = host_cluster_index(s, res->image_end_offset);
+    if (bitmap_size == 0) {
+        return 0;
+    }
+    if (res->image_end_offset % s->cluster_size) {
+        /* A not aligned image end leads to a bitmap shorter by 1 */
+        bitmap_size++;
+    }
+
+    bitmap = bitmap_new(bitmap_size);
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+
+    for (i = 0; i < s->bat_size; i++) {
+        host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
+        assert(ret != -E2BIG);
+        if (ret == 0) {
+            continue;
+        }
+
+        /* this cluster duplicates another one */
+        fprintf(stderr, "%s duplicate offset in BAT entry %u\n",
+                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+        res->corruptions++;
+
+        if (!(fix & BDRV_FIX_ERRORS)) {
+            continue;
+        }
+
+        /*
+         * Reset the entry and allocate a new cluster
+         * for the relevant guest offset. In this way we let
+         * the lower layer to place the new cluster properly.
+         * Copy the original cluster to the allocated one.
+         * But before save the old offset value for repairing
+         * if we have an error.
+         */
+        bat_entry = s->bat_bitmap[i];
+        parallels_set_bat_entry(s, i, 0);
+
+        ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0);
+        if (ret < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS;
+        host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n);
+        if (host_sector < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+        host_off = host_sector << BDRV_SECTOR_BITS;
+
+        ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0);
+        if (ret < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        if (host_off + s->cluster_size > res->image_end_offset) {
+            res->image_end_offset = host_off + s->cluster_size;
+        }
+
+        /*
+         * In the future allocate_cluster() will reuse holed offsets
+         * inside the image. Keep the used clusters bitmap content
+         * consistent for the new allocated clusters too.
+         *
+         * Note, clusters allocated outside the current image are not
+         * considered, and the bitmap size doesn't change. This specifically
+         * means that -E2BIG is OK.
+         */
+        ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
+        if (ret == -EBUSY) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        fixed = true;
+        res->corruptions_fixed++;
+
+    }
+
+    if (fixed) {
+        /*
+         * When new clusters are allocated, the file size increases by
+         * 128 Mb. We need to truncate the file to the right size. Let
+         * the leak fix code make its job without res changing.
+         */
+        ret = parallels_check_leak(bs, res, fix, false);
+    }
+
+out_free:
+    g_free(buf);
+    g_free(bitmap);
+    return ret;
+/*
+ * We can get here only from places where index and old_offset have
+ * meaningful values.
+ */
+out_repair_bat:
+    s->bat_bitmap[i] = bat_entry;
+    goto out_free;
+}
+
 static void parallels_collect_statistics(BlockDriverState *bs,
                                          BdrvCheckResult *res,
                                          BdrvCheckMode fix)
@@ -565,12 +964,22 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
     WITH_QEMU_LOCK_GUARD(&s->lock) {
         parallels_check_unclean(bs, res, fix);
 
+        ret = parallels_check_data_off(bs, res, fix);
+        if (ret < 0) {
+            return ret;
+        }
+
         ret = parallels_check_outside_image(bs, res, fix);
         if (ret < 0) {
             return ret;
         }
 
-        ret = parallels_check_leak(bs, res, fix);
+        ret = parallels_check_leak(bs, res, fix, true);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = parallels_check_duplicate(bs, res, fix);
         if (ret < 0) {
             return ret;
         }
@@ -792,16 +1201,58 @@ static int parallels_update_header(BlockDriverState *bs)
     return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0);
 }
 
+
+static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options,
+                                   Error **errp)
+{
+    int err;
+    char *buf;
+    int64_t bytes;
+    BDRVParallelsState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
+    if (!opts) {
+        return -ENOMEM;
+    }
+
+    err = -EINVAL;
+    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+        goto done;
+    }
+
+    bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
+    s->prealloc_size = bytes >> BDRV_SECTOR_BITS;
+    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+    /* prealloc_mode can be downgraded later during allocate_clusters */
+    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
+                                       PRL_PREALLOC_MODE_FALLOCATE,
+                                       &local_err);
+    g_free(buf);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+    err = 0;
+
+done:
+    qemu_opts_del(opts);
+    return err;
+}
+
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
                           Error **errp)
 {
     BDRVParallelsState *s = bs->opaque;
     ParallelsHeader ph;
     int ret, size, i;
-    int64_t file_nb_sectors;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    char *buf;
+    int64_t file_nb_sectors, sector;
+    uint32_t data_start;
+    bool need_check = false;
+
+    ret = parallels_opts_prealloc(bs, options, errp);
+    if (ret < 0) {
+        return ret;
+    }
 
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
@@ -815,7 +1266,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
 
     ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
     if (ret < 0) {
-        goto fail;
+        return ret;
     }
 
     bs->total_sectors = le64_to_cpu(ph.nb_sectors);
@@ -835,38 +1286,26 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     s->tracks = le32_to_cpu(ph.tracks);
     if (s->tracks == 0) {
         error_setg(errp, "Invalid image: Zero sectors per track");
-        ret = -EINVAL;
-        goto fail;
+        return -EINVAL;
     }
     if (s->tracks > INT32_MAX/513) {
         error_setg(errp, "Invalid image: Too big cluster");
-        ret = -EFBIG;
-        goto fail;
+        return -EFBIG;
     }
+    s->prealloc_size = MAX(s->tracks, s->prealloc_size);
     s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     s->bat_size = le32_to_cpu(ph.bat_entries);
     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
         error_setg(errp, "Catalog too large");
-        ret = -EFBIG;
-        goto fail;
+        return -EFBIG;
     }
 
     size = bat_entry_off(s->bat_size);
     s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
     s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
     if (s->header == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->data_end = le32_to_cpu(ph.data_off);
-    if (s->data_end == 0) {
-        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
-    }
-    if (s->data_end < s->header_size) {
-        /* there is not enough unused space to fit to block align between BAT
-           and actual data. We can't avoid read-modify-write... */
-        s->header_size = size;
+        return -ENOMEM;
     }
 
     ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
@@ -875,56 +1314,23 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     }
     s->bat_bitmap = (uint32_t *)(s->header + 1);
 
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i);
-        if (off >= file_nb_sectors) {
-            if (flags & BDRV_O_CHECK) {
-                continue;
-            }
-            error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
-                       "is larger than file size (%" PRIi64 ")",
-                       off << BDRV_SECTOR_BITS, i,
-                       file_nb_sectors << BDRV_SECTOR_BITS);
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (off >= s->data_end) {
-            s->data_end = off + s->tracks;
-        }
-    }
-
     if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-        /* Image was not closed correctly. The check is mandatory */
-        s->header_unclean = true;
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "parallels: Image was not closed correctly; "
-                       "cannot be opened read/write");
-            ret = -EACCES;
-            goto fail;
-        }
+        need_check = s->header_unclean = true;
     }
 
-    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
-    if (!opts) {
-        goto fail_options;
+    {
+        bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start);
+        need_check = need_check || !ok;
     }
 
-    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
-        goto fail_options;
-    }
-
-    s->prealloc_size =
-        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    /* prealloc_mode can be downgraded later during allocate_clusters */
-    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
-                                       PRL_PREALLOC_MODE_FALLOCATE,
-                                       &local_err);
-    g_free(buf);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        goto fail_options;
+    s->data_start = data_start;
+    s->data_end = s->data_start;
+    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+        /*
+         * There is not enough unused space to fit to block align between BAT
+         * and actual data. We can't avoid read-modify-write...
+         */
+        s->header_size = size;
     }
 
     if (ph.ext_off) {
@@ -962,17 +1368,60 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
                bdrv_get_device_or_node_name(bs));
     ret = migrate_add_blocker(s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
+        error_setg(errp, "Migration blocker error");
         goto fail;
     }
     qemu_co_mutex_init(&s->lock);
+
+    for (i = 0; i < s->bat_size; i++) {
+        sector = bat2sect(s, i);
+        if (sector + s->tracks > s->data_end) {
+            s->data_end = sector + s->tracks;
+        }
+    }
+    need_check = need_check || s->data_end > file_nb_sectors;
+
+    if (!need_check) {
+        ret = parallels_fill_used_bitmap(bs);
+        if (ret == -ENOMEM) {
+            goto fail;
+        }
+        need_check = need_check || ret < 0; /* These are correctable errors */
+    }
+
+    /*
+     * We don't repair the image here if it's opened for checks. Also we don't
+     * want to change inactive images and can't change readonly images.
+     */
+    if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) {
+        return 0;
+    }
+
+    /* Repair the image if corruption was detected. */
+    if (need_check) {
+        BdrvCheckResult res;
+        ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not repair corrupted image");
+            migrate_del_blocker(s->migration_blocker);
+            goto fail;
+        }
+    }
     return 0;
 
 fail_format:
     error_setg(errp, "Image not in Parallels format");
-fail_options:
-    ret = -EINVAL;
+    return -EINVAL;
+
 fail:
+    /*
+     * "s" object was allocated by g_malloc0 so we can safely
+     * try to free its fields even they were not allocated.
+     */
+    parallels_free_used_bitmap(bs);
+
+    error_free(s->migration_blocker);
+    g_free(s->bat_dirty_bmap);
     qemu_vfree(s->header);
     return ret;
 }
@@ -991,6 +1440,8 @@ static void parallels_close(BlockDriverState *bs)
                       PREALLOC_MODE_OFF, 0, NULL);
     }
 
+    parallels_free_used_bitmap(bs);
+
     g_free(s->bat_dirty_bmap);
     qemu_vfree(s->header);
 
@@ -998,24 +1449,34 @@ static void parallels_close(BlockDriverState *bs)
     error_free(s->migration_blocker);
 }
 
+static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs)
+{
+    return 1;
+}
+
 static BlockDriver bdrv_parallels = {
-    .format_name	= "parallels",
-    .instance_size	= sizeof(BDRVParallelsState),
-    .bdrv_probe		= parallels_probe,
-    .bdrv_open		= parallels_open,
-    .bdrv_close		= parallels_close,
-    .bdrv_child_perm          = bdrv_default_perms,
-    .bdrv_co_block_status     = parallels_co_block_status,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
-    .bdrv_co_readv  = parallels_co_readv,
-    .bdrv_co_writev = parallels_co_writev,
-    .is_format      = true,
-    .supports_backing = true,
-    .bdrv_co_create      = parallels_co_create,
-    .bdrv_co_create_opts = parallels_co_create_opts,
-    .bdrv_co_check  = parallels_co_check,
-    .create_opts    = &parallels_create_opts,
+    .format_name                = "parallels",
+    .instance_size              = sizeof(BDRVParallelsState),
+    .create_opts                = &parallels_create_opts,
+    .is_format                  = true,
+    .supports_backing           = true,
+
+    .bdrv_has_zero_init         = bdrv_has_zero_init_1,
+    .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps,
+
+    .bdrv_probe                 = parallels_probe,
+    .bdrv_open                  = parallels_open,
+    .bdrv_close                 = parallels_close,
+    .bdrv_child_perm            = bdrv_default_perms,
+    .bdrv_co_block_status       = parallels_co_block_status,
+    .bdrv_co_flush_to_os        = parallels_co_flush_to_os,
+    .bdrv_co_readv              = parallels_co_readv,
+    .bdrv_co_writev             = parallels_co_writev,
+    .bdrv_co_create             = parallels_co_create,
+    .bdrv_co_create_opts        = parallels_co_create_opts,
+    .bdrv_co_check              = parallels_co_check,
+    .bdrv_co_pdiscard           = parallels_co_pdiscard,
+    .bdrv_co_pwrite_zeroes      = parallels_co_pwrite_zeroes,
 };
 
 static void bdrv_parallels_init(void)
diff --git a/block/parallels.h b/block/parallels.h
index f22f43f988..6b199443cf 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -72,9 +72,13 @@ typedef struct BDRVParallelsState {
     unsigned long *bat_dirty_bmap;
     unsigned int  bat_dirty_block;
 
+    unsigned long *used_bmap;
+    unsigned long used_bmap_size;
+
     uint32_t *bat_bitmap;
     unsigned int bat_size;
 
+    int64_t  data_start;
     int64_t  data_end;
     uint64_t prealloc_size;
     ParallelsPreallocMode prealloc_mode;
diff --git a/block/preallocate.c b/block/preallocate.c
index 4d82125036..bfb638d8b1 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -75,8 +75,14 @@ typedef struct BDRVPreallocateState {
      * be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and
      * BLK_PERM_WRITE permissions on file child.
      */
+
+    /* Gives up the resize permission on children when parents don't need it */
+    QEMUBH *drop_resize_bh;
 } BDRVPreallocateState;
 
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp);
+static void preallocate_drop_resize_bh(void *opaque);
+
 #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
 #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
 static QemuOptsList runtime_opts = {
@@ -142,6 +148,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
      * For this to work, mark them invalid.
      */
     s->file_end = s->zero_start = s->data_end = -EINVAL;
+    s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs);
 
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
@@ -162,26 +169,42 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
     return 0;
 }
 
-static void preallocate_close(BlockDriverState *bs)
+static int preallocate_truncate_to_real_size(BlockDriverState *bs, Error **errp)
 {
-    int ret;
     BDRVPreallocateState *s = bs->opaque;
-
-    if (s->data_end < 0) {
-        return;
-    }
+    int ret;
 
     if (s->file_end < 0) {
         s->file_end = bdrv_getlength(bs->file->bs);
         if (s->file_end < 0) {
-            return;
+            error_setg_errno(errp, -s->file_end, "Failed to get file length");
+            return s->file_end;
         }
     }
 
     if (s->data_end < s->file_end) {
         ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
                             NULL);
-        s->file_end = ret < 0 ? ret : s->data_end;
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to drop preallocation");
+            s->file_end = ret;
+            return ret;
+        }
+        s->file_end = s->data_end;
+    }
+
+    return 0;
+}
+
+static void preallocate_close(BlockDriverState *bs)
+{
+    BDRVPreallocateState *s = bs->opaque;
+
+    qemu_bh_cancel(s->drop_resize_bh);
+    qemu_bh_delete(s->drop_resize_bh);
+
+    if (s->data_end >= 0) {
+        preallocate_truncate_to_real_size(bs, NULL);
     }
 }
 
@@ -198,6 +221,7 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
                                       BlockReopenQueue *queue, Error **errp)
 {
     PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
+    int ret;
 
     if (!preallocate_absorb_opts(opts, reopen_state->options,
                                  reopen_state->bs->file->bs, errp)) {
@@ -205,6 +229,19 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
         return -EINVAL;
     }
 
+    /*
+     * Drop the preallocation already here if reopening read-only. The child
+     * might also be reopened read-only and then scheduling a BH during the
+     * permission update is too late.
+     */
+    if ((reopen_state->flags & BDRV_O_RDWR) == 0) {
+        ret = preallocate_drop_resize(reopen_state->bs, errp);
+        if (ret < 0) {
+            g_free(opts);
+            return ret;
+        }
+    }
+
     reopen_state->opaque = opts;
 
     return 0;
@@ -462,58 +499,61 @@ preallocate_co_getlength(BlockDriverState *bs)
     return ret;
 }
 
-static int preallocate_check_perm(BlockDriverState *bs,
-                                  uint64_t perm, uint64_t shared, Error **errp)
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp)
 {
     BDRVPreallocateState *s = bs->opaque;
+    int ret;
 
-    if (s->data_end >= 0 && !can_write_resize(perm)) {
-        /*
-         * Lose permissions.
-         * We should truncate in check_perm, as in set_perm bs->file->perm will
-         * be already changed, and we should not violate it.
-         */
-        if (s->file_end < 0) {
-            s->file_end = bdrv_getlength(bs->file->bs);
-            if (s->file_end < 0) {
-                error_setg(errp, "Failed to get file length");
-                return s->file_end;
-            }
-        }
-
-        if (s->data_end < s->file_end) {
-            int ret = bdrv_truncate(bs->file, s->data_end, true,
-                                    PREALLOC_MODE_OFF, 0, NULL);
-            if (ret < 0) {
-                error_setg(errp, "Failed to drop preallocation");
-                s->file_end = ret;
-                return ret;
-            }
-            s->file_end = s->data_end;
-        }
+    if (s->data_end < 0) {
+        return 0;
     }
 
+    /*
+     * Before switching children to be read-only, truncate them to remove
+     * the preallocation and let them have the real size.
+     */
+    ret = preallocate_truncate_to_real_size(bs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /*
+     * We'll drop our permissions and will allow other users to take write and
+     * resize permissions (see preallocate_child_perm). Anyone will be able to
+     * change the child, so mark all states invalid. We'll regain control if a
+     * parent requests write access again.
+     */
+    s->data_end = s->file_end = s->zero_start = -EINVAL;
+
+    bdrv_graph_rdlock_main_loop();
+    bdrv_child_refresh_perms(bs, bs->file, NULL);
+    bdrv_graph_rdunlock_main_loop();
+
     return 0;
 }
 
+static void preallocate_drop_resize_bh(void *opaque)
+{
+    /*
+     * In case of errors, we'll simply keep the exclusive lock on the image
+     * indefinitely.
+     */
+    preallocate_drop_resize(opaque, NULL);
+}
+
 static void preallocate_set_perm(BlockDriverState *bs,
                                  uint64_t perm, uint64_t shared)
 {
     BDRVPreallocateState *s = bs->opaque;
 
     if (can_write_resize(perm)) {
+        qemu_bh_cancel(s->drop_resize_bh);
         if (s->data_end < 0) {
             s->data_end = s->file_end = s->zero_start =
-                bdrv_getlength(bs->file->bs);
+                bs->file->bs->total_sectors * BDRV_SECTOR_SIZE;
         }
     } else {
-        /*
-         * We drop our permissions, as well as allow shared
-         * permissions (see preallocate_child_perm), anyone will be able to
-         * change the child, so mark all states invalid. We'll regain control if
-         * get good permissions back.
-         */
-        s->data_end = s->file_end = s->zero_start = -EINVAL;
+        qemu_bh_schedule(s->drop_resize_bh);
     }
 }
 
@@ -521,10 +561,16 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
     BdrvChildRole role, BlockReopenQueue *reopen_queue,
     uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
 {
+    BDRVPreallocateState *s = bs->opaque;
+
     bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
 
-    if (can_write_resize(perm)) {
-        /* This should come by default, but let's enforce: */
+    /*
+     * We need exclusive write and resize permissions on the child not only when
+     * the parent can write to it, but also after the parent gave up write
+     * permissions until preallocate_drop_resize() has completed.
+     */
+    if (can_write_resize(perm) || s->data_end != -EINVAL) {
         *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
 
         /*
@@ -535,7 +581,7 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
     }
 }
 
-BlockDriver bdrv_preallocate_filter = {
+static BlockDriver bdrv_preallocate_filter = {
     .format_name = "preallocate",
     .instance_size = sizeof(BDRVPreallocateState),
 
@@ -554,7 +600,6 @@ BlockDriver bdrv_preallocate_filter = {
     .bdrv_co_flush = preallocate_co_flush,
     .bdrv_co_truncate = preallocate_co_truncate,
 
-    .bdrv_check_perm = preallocate_check_perm,
     .bdrv_set_perm = preallocate_set_perm,
     .bdrv_child_perm = preallocate_child_perm,
 
diff --git a/block/qapi.c b/block/qapi.c
index f34f95e0ef..1cbb0935ff 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 {
     ImageInfo **p_image_info;
     ImageInfo *backing_info;
-    BlockDriverState *bs0, *backing;
+    BlockDriverState *backing;
     BlockDeviceInfo *info;
     ERRP_GUARD();
 
@@ -145,7 +145,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 
     info->write_threshold = bdrv_write_threshold_get(bs);
 
-    bs0 = bs;
     p_image_info = &info->image;
     info->backing_file_depth = 0;
 
@@ -153,7 +152,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
      * Skip automatically inserted nodes that the user isn't aware of for
      * query-block (blk != NULL), but not for query-named-block-nodes
      */
-    bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
+    bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp);
     if (*errp) {
         qapi_free_BlockDeviceInfo(info);
         return NULL;
@@ -309,33 +308,6 @@ out:
     aio_context_release(bdrv_get_aio_context(bs));
 }
 
-/**
- * bdrv_query_block_node_info:
- * @bs: block node to examine
- * @p_info: location to store node information
- * @errp: location to store error information
- *
- * Store image information about @bs in @p_info.
- *
- * @p_info will be set only on success. On error, store error in @errp.
- */
-void bdrv_query_block_node_info(BlockDriverState *bs,
-                                BlockNodeInfo **p_info,
-                                Error **errp)
-{
-    BlockNodeInfo *info;
-    ERRP_GUARD();
-
-    info = g_new0(BlockNodeInfo, 1);
-    bdrv_do_query_node_info(bs, info, errp);
-    if (*errp) {
-        qapi_free_BlockNodeInfo(info);
-        return;
-    }
-
-    *p_info = info;
-}
-
 /**
  * bdrv_query_image_info:
  * @bs: block node to examine
diff --git a/block/qcow.c b/block/qcow.c
index 577bd70324..d56d24ab6d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -549,7 +549,10 @@ qcow_co_block_status(BlockDriverState *bs, bool want_zero,
     if (!cluster_offset) {
         return 0;
     }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
+    if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        return BDRV_BLOCK_DATA | BDRV_BLOCK_COMPRESSED;
+    }
+    if (s->crypto) {
         return BDRV_BLOCK_DATA;
     }
     *map = cluster_offset | index_in_cluster;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 5095e99a37..996d1217d0 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2645,7 +2645,7 @@ rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
      * repeat all this until the reftable stops growing.
      *
      * (This loop will terminate, because with every cluster the
-     * reftable grows, it can accomodate a multitude of more refcounts,
+     * reftable grows, it can accommodate a multitude of more refcounts,
      * so that at some point this must be able to cover the reftable
      * and all refblocks describing it.)
      *
diff --git a/block/qcow2.c b/block/qcow2.c
index b48cd9ce63..af43d59d76 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1880,7 +1880,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
     g_free(s->image_data_file);
     if (open_data_file && has_data_file(bs)) {
         bdrv_graph_co_rdunlock();
-        bdrv_unref_child(bs, s->data_file);
+        bdrv_co_unref_child(bs, s->data_file);
         bdrv_graph_co_rdlock();
         s->data_file = NULL;
     }
@@ -2162,6 +2162,9 @@ qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
     {
         status |= BDRV_BLOCK_RECURSE;
     }
+    if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
+        status |= BDRV_BLOCK_COMPRESSED;
+    }
     return status;
 }
 
@@ -2790,7 +2793,9 @@ static void qcow2_do_close(BlockDriverState *bs, bool close_data_file)
     g_free(s->image_backing_format);
 
     if (close_data_file && has_data_file(bs)) {
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, s->data_file);
+        bdrv_graph_wrunlock();
         s->data_file = NULL;
     }
 
diff --git a/block/quorum.c b/block/quorum.c
index f28758cf2b..05220cab7f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1037,12 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
 
 close_exit:
     /* cleanup on error */
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_children; i++) {
         if (!opened[i]) {
             continue;
         }
         bdrv_unref_child(bs, s->children[i]);
     }
+    bdrv_graph_wrunlock();
     g_free(s->children);
     g_free(opened);
 exit:
@@ -1055,15 +1057,17 @@ static void quorum_close(BlockDriverState *bs)
     BDRVQuorumState *s = bs->opaque;
     int i;
 
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_children; i++) {
         bdrv_unref_child(bs, s->children[i]);
     }
+    bdrv_graph_wrunlock();
 
     g_free(s->children);
 }
 
-static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
-                             Error **errp)
+static void GRAPH_WRLOCK
+quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
     BdrvChild *child;
@@ -1089,8 +1093,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
     }
     s->next_child_index++;
 
-    bdrv_drained_begin(bs);
-
     /* We can safely add the child now */
     bdrv_ref(child_bs);
 
@@ -1098,18 +1100,15 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
                               BDRV_CHILD_DATA, errp);
     if (child == NULL) {
         s->next_child_index--;
-        goto out;
+        return;
     }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
     s->children[s->num_children++] = child;
     quorum_refresh_flags(bs);
-
-out:
-    bdrv_drained_end(bs);
 }
 
-static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
-                             Error **errp)
+static void GRAPH_WRLOCK
+quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
     char indexstr[INDEXSTR_LEN];
@@ -1139,16 +1138,14 @@ static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
         s->next_child_index--;
     }
 
-    bdrv_drained_begin(bs);
-
     /* We can safely remove this child now */
     memmove(&s->children[i], &s->children[i + 1],
             (s->num_children - i - 1) * sizeof(BdrvChild *));
     s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+
     bdrv_unref_child(bs, child);
 
     quorum_refresh_flags(bs);
-    bdrv_drained_end(bs);
 }
 
 static void quorum_gather_child_options(BlockDriverState *bs, QDict *target,
diff --git a/block/replication.c b/block/replication.c
index ea4bf1aa80..dd166d2d82 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -542,12 +542,15 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }
 
+        bdrv_graph_wrlock(bs);
+
         bdrv_ref(hidden_disk->bs);
         s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
                                            &child_of_bds, BDRV_CHILD_DATA,
                                            &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
+            bdrv_graph_wrunlock();
             aio_context_release(aio_context);
             return;
         }
@@ -558,10 +561,13 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
                                               BDRV_CHILD_DATA, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
+            bdrv_graph_wrunlock();
             aio_context_release(aio_context);
             return;
         }
 
+        bdrv_graph_wrunlock();
+
         /* start backup job now */
         error_setg(&s->blocker,
                    "Block device is in use by internal backup job");
@@ -666,10 +672,13 @@ static void replication_done(void *opaque, int ret)
     if (ret == 0) {
         s->stage = BLOCK_REPLICATION_DONE;
 
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, s->secondary_disk);
         s->secondary_disk = NULL;
         bdrv_unref_child(bs, s->hidden_disk);
         s->hidden_disk = NULL;
+        bdrv_graph_wrunlock();
+
         s->error = 0;
     } else {
         s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
index 67ea339da9..8d4e8932b8 100644
--- a/block/snapshot-access.c
+++ b/block/snapshot-access.c
@@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c,
     *nshared = BLK_PERM_ALL;
 }
 
-BlockDriver bdrv_snapshot_access_drv = {
+static BlockDriver bdrv_snapshot_access_drv = {
     .format_name = "snapshot-access",
 
     .bdrv_open                  = snapshot_access_open,
diff --git a/block/snapshot.c b/block/snapshot.c
index e22ac3eac6..b86b5b24ad 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -281,7 +281,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         }
 
         /* .bdrv_open() will re-attach it */
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, fallback);
+        bdrv_graph_wrunlock();
 
         ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
         open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..e4da214f1f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,6 +54,7 @@ static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
+    BlockDriverState *unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
     BlockDriverState *base;
     BlockDriverState *unfiltered_base;
     Error *local_err = NULL;
@@ -64,13 +65,18 @@ static int stream_prepare(Job *job)
     s->cor_filter_bs = NULL;
 
     /*
-     * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
-     * already here and use bdrv_set_backing_hd_drained() instead because
-     * the polling during drained_begin() might change the graph, and if we do
-     * this only later, we may end up working with the wrong base node (or it
-     * might even have gone away by the time we want to use it).
+     * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child
+     * of unfiltered_bs is drained. Drain already here and use
+     * bdrv_set_backing_hd_drained() instead because the polling during
+     * drained_begin() might change the graph, and if we do this only later, we
+     * may end up working with the wrong base node (or it might even have gone
+     * away by the time we want to use it).
      */
     bdrv_drained_begin(unfiltered_bs);
+    if (unfiltered_bs_cow) {
+        bdrv_ref(unfiltered_bs_cow);
+        bdrv_drained_begin(unfiltered_bs_cow);
+    }
 
     base = bdrv_filter_or_cow_bs(s->above_base);
     unfiltered_base = bdrv_skip_filters(base);
@@ -100,6 +106,10 @@ static int stream_prepare(Job *job)
     }
 
 out:
+    if (unfiltered_bs_cow) {
+        bdrv_drained_end(unfiltered_bs_cow);
+        bdrv_unref(unfiltered_bs_cow);
+    }
     bdrv_drained_end(unfiltered_bs);
     return ret;
 }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index fb203c3ced..3eda4c4e3d 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -37,7 +37,7 @@
 
 static void throttle_group_obj_init(Object *obj);
 static void throttle_group_obj_complete(UserCreatable *obj, Error **errp);
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write);
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction);
 
 /* The ThrottleGroup structure (with its ThrottleState) is shared
  * among different ThrottleGroupMembers and it's independent from
@@ -73,8 +73,8 @@ struct ThrottleGroup {
     QemuMutex lock; /* This lock protects the following four fields */
     ThrottleState ts;
     QLIST_HEAD(, ThrottleGroupMember) head;
-    ThrottleGroupMember *tokens[2];
-    bool any_timer_armed[2];
+    ThrottleGroupMember *tokens[THROTTLE_MAX];
+    bool any_timer_armed[THROTTLE_MAX];
     QEMUClockType clock_type;
 
     /* This field is protected by the global QEMU mutex */
@@ -197,13 +197,13 @@ static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm)
  * This assumes that tg->lock is held.
  *
  * @tgm:        the ThrottleGroupMember
- * @is_write:   the type of operation (read/write)
+ * @direction:  the ThrottleDirection
  * @ret:        whether the ThrottleGroupMember has pending requests.
  */
 static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
-                                        bool is_write)
+                                        ThrottleDirection direction)
 {
-    return tgm->pending_reqs[is_write];
+    return tgm->pending_reqs[direction];
 }
 
 /* Return the next ThrottleGroupMember in the round-robin sequence with pending
@@ -212,12 +212,12 @@ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
  * This assumes that tg->lock is held.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  * @ret:       the next ThrottleGroupMember with pending requests, or tgm if
  *             there is none.
  */
 static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
-                                                bool is_write)
+                                                ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -227,16 +227,16 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
      * it's being drained. Skip the round-robin search and return tgm
      * immediately if it has pending requests. Otherwise we could be
      * forcing it to wait for other member's throttled requests. */
-    if (tgm_has_pending_reqs(tgm, is_write) &&
+    if (tgm_has_pending_reqs(tgm, direction) &&
         qatomic_read(&tgm->io_limits_disabled)) {
         return tgm;
     }
 
-    start = token = tg->tokens[is_write];
+    start = token = tg->tokens[direction];
 
     /* get next bs round in round robin style */
     token = throttle_group_next_tgm(token);
-    while (token != start && !tgm_has_pending_reqs(token, is_write)) {
+    while (token != start && !tgm_has_pending_reqs(token, direction)) {
         token = throttle_group_next_tgm(token);
     }
 
@@ -244,12 +244,12 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
      * then decide the token is the current tgm because chances are
      * the current tgm got the current request queued.
      */
-    if (token == start && !tgm_has_pending_reqs(token, is_write)) {
+    if (token == start && !tgm_has_pending_reqs(token, direction)) {
         token = tgm;
     }
 
     /* Either we return the original TGM, or one with pending requests */
-    assert(token == tgm || tgm_has_pending_reqs(token, is_write));
+    assert(token == tgm || tgm_has_pending_reqs(token, direction));
 
     return token;
 }
@@ -261,11 +261,11 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
  * This assumes that tg->lock is held.
  *
  * @tgm:        the current ThrottleGroupMember
- * @is_write:   the type of operation (read/write)
+ * @direction:  the ThrottleDirection
  * @ret:        whether the I/O request needs to be throttled or not
  */
 static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
-                                          bool is_write)
+                                          ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -277,16 +277,16 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
     }
 
     /* Check if any of the timers in this group is already armed */
-    if (tg->any_timer_armed[is_write]) {
+    if (tg->any_timer_armed[direction]) {
         return true;
     }
 
-    must_wait = throttle_schedule_timer(ts, tt, is_write);
+    must_wait = throttle_schedule_timer(ts, tt, direction);
 
     /* If a timer just got armed, set tgm as the current token */
     if (must_wait) {
-        tg->tokens[is_write] = tgm;
-        tg->any_timer_armed[is_write] = true;
+        tg->tokens[direction] = tgm;
+        tg->any_timer_armed[direction] = true;
     }
 
     return must_wait;
@@ -296,15 +296,15 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
  * any request was actually pending.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
 static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm,
-                                                         bool is_write)
+                                                         ThrottleDirection direction)
 {
     bool ret;
 
     qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
-    ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]);
+    ret = qemu_co_queue_next(&tgm->throttled_reqs[direction]);
     qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
 
     return ret;
@@ -315,9 +315,10 @@ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tg
  * This assumes that tg->lock is held.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
-static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
+static void schedule_next_request(ThrottleGroupMember *tgm,
+                                  ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -325,27 +326,27 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
     ThrottleGroupMember *token;
 
     /* Check if there's any pending request to schedule next */
-    token = next_throttle_token(tgm, is_write);
-    if (!tgm_has_pending_reqs(token, is_write)) {
+    token = next_throttle_token(tgm, direction);
+    if (!tgm_has_pending_reqs(token, direction)) {
         return;
     }
 
     /* Set a timer for the request if it needs to be throttled */
-    must_wait = throttle_group_schedule_timer(token, is_write);
+    must_wait = throttle_group_schedule_timer(token, direction);
 
     /* If it doesn't have to wait, queue it for immediate execution */
     if (!must_wait) {
         /* Give preference to requests from the current tgm */
         if (qemu_in_coroutine() &&
-            throttle_group_co_restart_queue(tgm, is_write)) {
+            throttle_group_co_restart_queue(tgm, direction)) {
             token = tgm;
         } else {
             ThrottleTimers *tt = &token->throttle_timers;
             int64_t now = qemu_clock_get_ns(tg->clock_type);
-            timer_mod(tt->timers[is_write], now);
-            tg->any_timer_armed[is_write] = true;
+            timer_mod(tt->timers[direction], now);
+            tg->any_timer_armed[direction] = true;
         }
-        tg->tokens[is_write] = token;
+        tg->tokens[direction] = token;
     }
 }
 
@@ -355,48 +356,49 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
  *
  * @tgm:       the current ThrottleGroupMember
  * @bytes:     the number of bytes for this I/O
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
                                                         int64_t bytes,
-                                                        bool is_write)
+                                                        ThrottleDirection direction)
 {
     bool must_wait;
     ThrottleGroupMember *token;
     ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
 
     assert(bytes >= 0);
+    assert(direction < THROTTLE_MAX);
 
     qemu_mutex_lock(&tg->lock);
 
     /* First we check if this I/O has to be throttled. */
-    token = next_throttle_token(tgm, is_write);
-    must_wait = throttle_group_schedule_timer(token, is_write);
+    token = next_throttle_token(tgm, direction);
+    must_wait = throttle_group_schedule_timer(token, direction);
 
     /* Wait if there's a timer set or queued requests of this type */
-    if (must_wait || tgm->pending_reqs[is_write]) {
-        tgm->pending_reqs[is_write]++;
+    if (must_wait || tgm->pending_reqs[direction]) {
+        tgm->pending_reqs[direction]++;
         qemu_mutex_unlock(&tg->lock);
         qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
-        qemu_co_queue_wait(&tgm->throttled_reqs[is_write],
+        qemu_co_queue_wait(&tgm->throttled_reqs[direction],
                            &tgm->throttled_reqs_lock);
         qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
         qemu_mutex_lock(&tg->lock);
-        tgm->pending_reqs[is_write]--;
+        tgm->pending_reqs[direction]--;
     }
 
     /* The I/O will be executed, so do the accounting */
-    throttle_account(tgm->throttle_state, is_write, bytes);
+    throttle_account(tgm->throttle_state, direction, bytes);
 
     /* Schedule the next request */
-    schedule_next_request(tgm, is_write);
+    schedule_next_request(tgm, direction);
 
     qemu_mutex_unlock(&tg->lock);
 }
 
 typedef struct {
     ThrottleGroupMember *tgm;
-    bool is_write;
+    ThrottleDirection direction;
 } RestartData;
 
 static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
@@ -405,16 +407,16 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
     ThrottleGroupMember *tgm = data->tgm;
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    bool is_write = data->is_write;
+    ThrottleDirection direction = data->direction;
     bool empty_queue;
 
-    empty_queue = !throttle_group_co_restart_queue(tgm, is_write);
+    empty_queue = !throttle_group_co_restart_queue(tgm, direction);
 
     /* If the request queue was empty then we have to take care of
      * scheduling the next one */
     if (empty_queue) {
         qemu_mutex_lock(&tg->lock);
-        schedule_next_request(tgm, is_write);
+        schedule_next_request(tgm, direction);
         qemu_mutex_unlock(&tg->lock);
     }
 
@@ -424,18 +426,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
     aio_wait_kick();
 }
 
-static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write)
+static void throttle_group_restart_queue(ThrottleGroupMember *tgm,
+                                        ThrottleDirection direction)
 {
     Coroutine *co;
     RestartData *rd = g_new0(RestartData, 1);
 
     rd->tgm = tgm;
-    rd->is_write = is_write;
+    rd->direction = direction;
 
     /* This function is called when a timer is fired or when
      * throttle_group_restart_tgm() is called. Either way, there can
      * be no timer pending on this tgm at this point */
-    assert(!timer_pending(tgm->throttle_timers.timers[is_write]));
+    assert(!timer_pending(tgm->throttle_timers.timers[direction]));
 
     qatomic_inc(&tgm->restart_pending);
 
@@ -445,18 +448,18 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write
 
 void throttle_group_restart_tgm(ThrottleGroupMember *tgm)
 {
-    int i;
+    ThrottleDirection dir;
 
     if (tgm->throttle_state) {
-        for (i = 0; i < 2; i++) {
-            QEMUTimer *t = tgm->throttle_timers.timers[i];
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            QEMUTimer *t = tgm->throttle_timers.timers[dir];
             if (timer_pending(t)) {
                 /* If there's a pending timer on this tgm, fire it now */
                 timer_del(t);
-                timer_cb(tgm, i);
+                timer_cb(tgm, dir);
             } else {
                 /* Else run the next request from the queue manually */
-                throttle_group_restart_queue(tgm, i);
+                throttle_group_restart_queue(tgm, dir);
             }
         }
     }
@@ -500,30 +503,30 @@ void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg)
  * because it had been throttled.
  *
  * @tgm:       the ThrottleGroupMember whose request had been throttled
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write)
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 
     /* The timer has just been fired, so we can update the flag */
     qemu_mutex_lock(&tg->lock);
-    tg->any_timer_armed[is_write] = false;
+    tg->any_timer_armed[direction] = false;
     qemu_mutex_unlock(&tg->lock);
 
     /* Run the request that was waiting for this timer */
-    throttle_group_restart_queue(tgm, is_write);
+    throttle_group_restart_queue(tgm, direction);
 }
 
 static void read_timer_cb(void *opaque)
 {
-    timer_cb(opaque, false);
+    timer_cb(opaque, THROTTLE_READ);
 }
 
 static void write_timer_cb(void *opaque)
 {
-    timer_cb(opaque, true);
+    timer_cb(opaque, THROTTLE_WRITE);
 }
 
 /* Register a ThrottleGroupMember from the throttling group, also initializing
@@ -541,7 +544,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
                                  const char *groupname,
                                  AioContext *ctx)
 {
-    int i;
+    ThrottleDirection dir;
     ThrottleState *ts = throttle_group_incref(groupname);
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 
@@ -551,10 +554,11 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
 
     QEMU_LOCK_GUARD(&tg->lock);
     /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
-    for (i = 0; i < 2; i++) {
-        if (!tg->tokens[i]) {
-            tg->tokens[i] = tgm;
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (!tg->tokens[dir]) {
+            tg->tokens[dir] = tgm;
         }
+        qemu_co_queue_init(&tgm->throttled_reqs[dir]);
     }
 
     QLIST_INSERT_HEAD(&tg->head, tgm, round_robin);
@@ -566,8 +570,6 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
                          write_timer_cb,
                          tgm);
     qemu_co_mutex_init(&tgm->throttled_reqs_lock);
-    qemu_co_queue_init(&tgm->throttled_reqs[0]);
-    qemu_co_queue_init(&tgm->throttled_reqs[1]);
 }
 
 /* Unregister a ThrottleGroupMember from its group, removing it from the list,
@@ -585,7 +587,7 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     ThrottleGroupMember *token;
-    int i;
+    ThrottleDirection dir;
 
     if (!ts) {
         /* Discard already unregistered tgm */
@@ -596,17 +598,17 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
     AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0);
 
     WITH_QEMU_LOCK_GUARD(&tg->lock) {
-        for (i = 0; i < 2; i++) {
-            assert(tgm->pending_reqs[i] == 0);
-            assert(qemu_co_queue_empty(&tgm->throttled_reqs[i]));
-            assert(!timer_pending(tgm->throttle_timers.timers[i]));
-            if (tg->tokens[i] == tgm) {
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            assert(tgm->pending_reqs[dir] == 0);
+            assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
+            assert(!timer_pending(tgm->throttle_timers.timers[dir]));
+            if (tg->tokens[dir] == tgm) {
                 token = throttle_group_next_tgm(tgm);
                 /* Take care of the case where this is the last tgm in the group */
                 if (token == tgm) {
                     token = NULL;
                 }
-                tg->tokens[i] = token;
+                tg->tokens[dir] = token;
             }
         }
 
@@ -631,19 +633,20 @@ void throttle_group_detach_aio_context(ThrottleGroupMember *tgm)
 {
     ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
     ThrottleTimers *tt = &tgm->throttle_timers;
-    int i;
+    ThrottleDirection dir;
 
     /* Requests must have been drained */
-    assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0);
-    assert(qemu_co_queue_empty(&tgm->throttled_reqs[0]));
-    assert(qemu_co_queue_empty(&tgm->throttled_reqs[1]));
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        assert(tgm->pending_reqs[dir] == 0);
+        assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
+    }
 
     /* Kick off next ThrottleGroupMember, if necessary */
     WITH_QEMU_LOCK_GUARD(&tg->lock) {
-        for (i = 0; i < 2; i++) {
-            if (timer_pending(tt->timers[i])) {
-                tg->any_timer_armed[i] = false;
-                schedule_next_request(tgm, i);
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            if (timer_pending(tt->timers[dir])) {
+                tg->any_timer_armed[dir] = false;
+                schedule_next_request(tgm, dir);
             }
         }
     }
diff --git a/block/throttle.c b/block/throttle.c
index 3aaef18d4e..1098a4ae9a 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -118,7 +118,7 @@ throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 {
 
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, false);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_READ);
 
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -128,7 +128,7 @@ throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
@@ -138,7 +138,7 @@ throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
                           BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
@@ -147,7 +147,7 @@ static int coroutine_fn GRAPH_RDLOCK
 throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
diff --git a/block/vhdx.c b/block/vhdx.c
index f2c3a80190..a67edcc03e 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1077,7 +1077,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    /* endian convert populated BAT field entires */
+    /* endian convert populated BAT field entries */
     for (i = 0; i < s->bat_entries; i++) {
         s->bat[i] = le64_to_cpu(s->bat[i]);
     }
diff --git a/block/vhdx.h b/block/vhdx.h
index 7db746cd18..455a627a46 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -212,7 +212,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
     uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
     uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
                                            see the data descriptor field for the
-                                           other mising bytes */
+                                           other missing bytes */
     uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
 } VHDXLogDataSector;
 
@@ -257,7 +257,7 @@ typedef struct QEMU_PACKED VHDXMetadataTableHeader {
 
 #define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
 #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
-                                                   otherwise file metdata */
+                                                   otherwise file metadata */
 #define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
                                                    entry to open the file */
 typedef struct QEMU_PACKED VHDXMetadataTableEntry {
diff --git a/block/vmdk.c b/block/vmdk.c
index 70066c2b01..e90649c8bf 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -272,6 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *e;
 
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_extents; i++) {
         e = &s->extents[i];
         g_free(e->l1_table);
@@ -282,6 +283,8 @@ static void vmdk_free_extents(BlockDriverState *bs)
             bdrv_unref_child(bs, e->file);
         }
     }
+    bdrv_graph_wrunlock();
+
     g_free(s->extents);
 }
 
@@ -1207,7 +1210,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                                       bs, &child_of_bds, extent_role, false,
                                       &local_err);
         g_free(extent_path);
-        if (local_err) {
+        if (!extent_file) {
             error_propagate(errp, local_err);
             ret = -EINVAL;
             goto out;
@@ -1220,7 +1223,9 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             ret = vmdk_add_extent(bs, extent_file, true, sectors,
                             0, 0, 0, 0, 0, &extent, errp);
             if (ret < 0) {
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock();
                 goto out;
             }
             extent->flat_start_offset = flat_offset << 9;
@@ -1235,20 +1240,26 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             }
             g_free(buf);
             if (ret) {
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock();
                 goto out;
             }
             extent = &s->extents[s->num_extents - 1];
         } else if (!strcmp(type, "SESPARSE")) {
             ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
             if (ret) {
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock();
                 goto out;
             }
             extent = &s->extents[s->num_extents - 1];
         } else {
             error_setg(errp, "Unsupported extent type '%s'", type);
+            bdrv_graph_wrlock(NULL);
             bdrv_unref_child(bs, extent_file);
+            bdrv_graph_wrunlock();
             ret = -ENOTSUP;
             goto out;
         }
@@ -1309,6 +1320,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
         return ret;
@@ -1770,6 +1783,8 @@ vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
             if (extent->flat) {
                 ret |= BDRV_BLOCK_RECURSE;
             }
+        } else {
+            ret |= BDRV_BLOCK_COMPRESSED;
         }
         *file = extent->file->bs;
         break;
diff --git a/block/vpc.c b/block/vpc.c
index 3810a601a3..ceb87dd3d8 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
        miss sparse read optimization, but it's not a problem in terms of
        correctness. */
     if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
+        g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
         int r;
 
         s->last_bitmap_offset = bitmap_offset;
@@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset)
     int64_t bat_offset;
     uint32_t index, bat_value;
     int ret;
-    uint8_t bitmap[s->bitmap_size];
+    g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
 
     /* Check if sector_num is valid */
     if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
diff --git a/blockdev.c b/blockdev.c
index e6eba61484..325b7a3bef 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1378,6 +1378,9 @@ static void external_snapshot_action(TransactionAction *action,
     AioContext *aio_context;
     uint64_t perm, shared;
 
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     tran_add(tran, &external_snapshot_drv, state);
 
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
@@ -2521,6 +2524,9 @@ void qmp_block_commit(const char *job_id, const char *device,
     int job_flags = JOB_DEFAULT;
     uint64_t top_perm, top_shared;
 
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!has_speed) {
         speed = 0;
     }
@@ -3539,8 +3545,8 @@ out:
     aio_context_release(aio_context);
 }
 
-static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
-                                  const char *child_name)
+static BdrvChild * GRAPH_RDLOCK
+bdrv_find_child(BlockDriverState *parent_bs, const char *child_name)
 {
     BdrvChild *child;
 
@@ -3559,9 +3565,11 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
     BlockDriverState *parent_bs, *new_bs = NULL;
     BdrvChild *p_child;
 
+    bdrv_graph_wrlock(NULL);
+
     parent_bs = bdrv_lookup_bs(parent, parent, errp);
     if (!parent_bs) {
-        return;
+        goto out;
     }
 
     if (!child == !node) {
@@ -3570,7 +3578,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
         } else {
             error_setg(errp, "Either child or node must be specified");
         }
-        return;
+        goto out;
     }
 
     if (child) {
@@ -3578,7 +3586,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
         if (!p_child) {
             error_setg(errp, "Node '%s' does not have child '%s'",
                        parent, child);
-            return;
+            goto out;
         }
         bdrv_del_child(parent_bs, p_child, errp);
     }
@@ -3587,10 +3595,13 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
         new_bs = bdrv_find_node(node);
         if (!new_bs) {
             error_setg(errp, "Node '%s' not found", node);
-            return;
+            goto out;
         }
         bdrv_add_child(parent_bs, new_bs, errp);
     }
+
+out:
+    bdrv_graph_wrunlock();
 }
 
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
diff --git a/blockjob.c b/blockjob.c
index 25fe8e625d..58c5d64539 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
      * one to make sure that such a concurrent access does not attempt
      * to process an already freed BdrvChild.
      */
+    bdrv_graph_wrlock(NULL);
     while (job->nodes) {
         GSList *l = job->nodes;
         BdrvChild *c = l->data;
@@ -209,6 +210,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
 
         g_slist_free_1(l);
     }
+    bdrv_graph_wrunlock();
 }
 
 bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 588e0c50d4..3c00dc0056 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -51,10 +51,8 @@ do {                                        \
     unlock_user(p1, arg1, 0);               \
 } while (0)
 
-extern struct iovec *lock_iovec(int type, abi_ulong target_addr, int count,
-        int copy);
-extern void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count,
-        int copy);
+struct iovec *lock_iovec(int type, abi_ulong target_addr, int count, int copy);
+void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count, int copy);
 
 int safe_open(const char *path, int flags, mode_t mode);
 int safe_openat(int fd, const char *path, int flags, mode_t mode);
diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h
index f3e8ac3488..abe70119d9 100644
--- a/bsd-user/errno_defs.h
+++ b/bsd-user/errno_defs.h
@@ -149,7 +149,7 @@
 #define TARGET_ELAST            90              /* Must be equal largest errno */
 
 /* Internal errors: */
-#define TARGET_EJUSTRETURN      254             /* Just return without modifing regs */
+#define TARGET_EJUSTRETURN      254             /* Just return without modifying regs */
 #define TARGET_ERESTART         255             /* Restart syscall */
 
 #include "special-errno.h"
diff --git a/bsd-user/freebsd/target_os_siginfo.h b/bsd-user/freebsd/target_os_siginfo.h
index 4573738752..6c282d8502 100644
--- a/bsd-user/freebsd/target_os_siginfo.h
+++ b/bsd-user/freebsd/target_os_siginfo.h
@@ -72,7 +72,7 @@ typedef struct target_siginfo {
             int32_t _mqd;
         } _mesgp;
 
-        /* SIGPOLL -- Not really genreated in FreeBSD ??? */
+        /* SIGPOLL -- Not really generated in FreeBSD ??? */
         struct {
             int _band;  /* POLL_IN, POLL_OUT, POLL_MSG */
         } _poll;
diff --git a/bsd-user/freebsd/target_os_stack.h b/bsd-user/freebsd/target_os_stack.h
index 0590133291..d15fc3263f 100644
--- a/bsd-user/freebsd/target_os_stack.h
+++ b/bsd-user/freebsd/target_os_stack.h
@@ -25,7 +25,7 @@
 #include "qemu/guest-random.h"
 
 /*
- * The inital FreeBSD stack is as follows:
+ * The initial FreeBSD stack is as follows:
  * (see kern/kern_exec.c exec_copyout_strings() )
  *
  *  Hi Address -> char **ps_argvstr  (struct ps_strings for ps, w, etc.)
@@ -59,7 +59,7 @@ static inline int setup_initial_stack(struct bsd_binprm *bprm,
     /* Save some space for ps_strings. */
     p -= sizeof(struct target_ps_strings);
 
-    /* Add machine depedent sigcode. */
+    /* Add machine dependent sigcode. */
     p -= TARGET_SZSIGCODE;
     if (setup_sigtramp(p, (unsigned)offsetof(struct target_sigframe, sf_uc),
             TARGET_FREEBSD_NR_sigreturn)) {
diff --git a/bsd-user/freebsd/target_os_user.h b/bsd-user/freebsd/target_os_user.h
index f036a32343..1ca7b5ab17 100644
--- a/bsd-user/freebsd/target_os_user.h
+++ b/bsd-user/freebsd/target_os_user.h
@@ -26,7 +26,7 @@
 struct target_priority {
     uint8_t     pri_class;      /* Scheduling class. */
     uint8_t     pri_level;      /* Normal priority level. */
-    uint8_t     pri_native;     /* Priority before propogation. */
+    uint8_t     pri_native;     /* Priority before propagation. */
     uint8_t     pri_user;       /* User priority based on p_cpu and p_nice. */
 };
 
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index d3158bc2ed..d9507137cc 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -116,7 +116,7 @@ extern const char *qemu_uname_release;
 /*
  * TARGET_ARG_MAX defines the number of bytes allocated for arguments
  * and envelope for the new program. 256k should suffice for a reasonable
- * maxiumum env+arg in 32-bit environments, bump it up to 512k for !ILP32
+ * maximum env+arg in 32-bit environments, bump it up to 512k for !ILP32
  * platforms.
  */
 #if TARGET_ABI_BITS > 32
diff --git a/bsd-user/signal-common.h b/bsd-user/signal-common.h
index 6f90345bb2..c044e81165 100644
--- a/bsd-user/signal-common.h
+++ b/bsd-user/signal-common.h
@@ -49,11 +49,11 @@ void target_to_host_sigset(sigset_t *d, const target_sigset_t *s);
  * union in target_siginfo is valid. This only applies between
  * host_to_target_siginfo_noswap() and tswap_siginfo(); it does not appear
  * either within host siginfo_t or in target_siginfo structures which we get
- * from the guest userspace program. Linux kenrels use this internally, but BSD
+ * from the guest userspace program. Linux kernels use this internally, but BSD
  * kernels don't do this, but its a useful abstraction.
  *
  * The linux-user version of this uses the top 16 bits, but FreeBSD's SI_USER
- * and other signal indepenent SI_ codes have bit 16 set, so we only use the top
+ * and other signal independent SI_ codes have bit 16 set, so we only use the top
  * byte instead.
  *
  * For FreeBSD, we have si_pid, si_uid, si_status, and si_addr always. Linux and
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index 4db85a3485..b6beab659e 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -44,7 +44,7 @@ static inline int sas_ss_flags(TaskState *ts, unsigned long sp)
 }
 
 /*
- * The BSD ABIs use the same singal numbers across all the CPU architectures, so
+ * The BSD ABIs use the same signal numbers across all the CPU architectures, so
  * (unlike Linux) these functions are just the identity mapping. This might not
  * be true for XyzBSD running on AbcBSD, which doesn't currently work.
  */
@@ -241,7 +241,7 @@ static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
 #endif
         /*
          * Unsure that this can actually be generated, and our support for
-         * capsicum is somewhere between weak and non-existant, but if we get
+         * capsicum is somewhere between weak and non-existent, but if we get
          * one, then we know what to save.
          */
 #ifdef QEMU_SI_CAPSICUM
@@ -319,7 +319,7 @@ int block_signals(void)
     /*
      * It's OK to block everything including SIGSEGV, because we won't run any
      * further guest code before unblocking signals in
-     * process_pending_signals(). We depend on the FreeBSD behaivor here where
+     * process_pending_signals(). We depend on the FreeBSD behavior here where
      * this will only affect this thread's signal mask. We don't use
      * pthread_sigmask which might seem more correct because that routine also
      * does odd things with SIGCANCEL to implement pthread_cancel().
diff --git a/bsd-user/trace-events b/bsd-user/trace-events
index 843896f627..2c1cb66726 100644
--- a/bsd-user/trace-events
+++ b/bsd-user/trace-events
@@ -1,4 +1,4 @@
-# See docs/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # bsd-user/signal.c
 user_setup_frame(void *env, uint64_t frame_addr) "env=%p frame_addr=0x%"PRIx64
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index e8e3a743d5..73947da188 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -710,7 +710,7 @@ static void tcp_chr_telnet_init(Chardev *chr)
 
     if (!s->is_tn3270) {
         init->buflen = 12;
-        /* Prep the telnet negotion to put telnet in binary,
+        /* Prep the telnet negotiation to put telnet in binary,
          * no echo, single char mode */
         IACSET(init->buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
         IACSET(init->buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
@@ -718,7 +718,7 @@ static void tcp_chr_telnet_init(Chardev *chr)
         IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
     } else {
         init->buflen = 21;
-        /* Prep the TN3270 negotion based on RFC1576 */
+        /* Prep the TN3270 negotiation based on RFC1576 */
         IACSET(init->buf, 0xff, 0xfd, 0x19);  /* IAC DO EOR */
         IACSET(init->buf, 0xff, 0xfb, 0x19);  /* IAC WILL EOR */
         IACSET(init->buf, 0xff, 0xfd, 0x00);  /* IAC DO BINARY */
@@ -1298,7 +1298,7 @@ static bool qmp_chardev_validate_socket(ChardevSocket *sock,
         return false;
     }
 
-    /* Validate any options which have a dependancy on client vs server */
+    /* Validate any options which have a dependency on client vs server */
     if (!sock->has_server || sock->server) {
         if (sock->has_reconnect) {
             error_setg(errp,
diff --git a/chardev/char.c b/chardev/char.c
index 661ad8176a..996a024c7a 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -1115,7 +1115,7 @@ ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend,
         return NULL;
     }
 
-    /* change successfull, clean up */
+    /* change successful, clean up */
     chr_new->handover_yank_instance = false;
 
     /*
diff --git a/chardev/meson.build b/chardev/meson.build
index fb630b429e..6d56ad32fd 100644
--- a/chardev/meson.build
+++ b/chardev/meson.build
@@ -26,7 +26,7 @@ chardev_ss.add(when: 'CONFIG_WIN32', if_true: files(
   'char-win.c',
 ))
 
-chardev_ss = chardev_ss.apply(config_host, strict: false)
+chardev_ss = chardev_ss.apply(config_targetos, strict: false)
 
 system_ss.add(files(
     'char-hmp-cmds.c',
diff --git a/configure b/configure
index 6bc0d7d3b2..df6611ff18 100755
--- a/configure
+++ b/configure
@@ -245,10 +245,9 @@ for opt do
   esac
 done
 
-
+default_cflags='-O2 -g'
 git_submodules_action="update"
 git="git"
-debug_tcg="no"
 docs="auto"
 EXESUF=""
 prefix="/usr/local"
@@ -257,6 +256,7 @@ softmmu="yes"
 linux_user=""
 bsd_user=""
 plugins="$default_feature"
+subdirs=""
 ninja=""
 python=
 download="enabled"
@@ -288,7 +288,7 @@ static="no"
 #  ${cross_prefix}gcc (if cross-prefix specified)
 #  system compiler
 if test -z "${CC}${cross_prefix}"; then
-  cc="$host_cc"
+  cc="cc"
 else
   cc="${CC-${cross_prefix}gcc}"
 fi
@@ -374,45 +374,14 @@ fi
 
 # OS specific
 
-mingw32="no"
-bsd="no"
-linux="no"
-solaris="no"
 case $targetos in
 windows)
-  mingw32="yes"
   plugins="no"
   pie="no"
 ;;
-gnu/kfreebsd)
-  bsd="yes"
-;;
-freebsd)
-  bsd="yes"
-  # needed for kinfo_getvmmap(3) in libutil.h
-;;
-dragonfly)
-  bsd="yes"
-;;
-netbsd)
-  bsd="yes"
-;;
-openbsd)
-  bsd="yes"
-;;
-darwin)
-  bsd="yes"
-  darwin="yes"
-;;
-sunos)
-  solaris="yes"
-;;
 haiku)
   pie="no"
 ;;
-linux)
-  linux="yes"
-;;
 esac
 
 if test ! -z "$cpu" ; then
@@ -583,16 +552,16 @@ if test -n "$linux_arch" && ! test -d "$source_path/linux-headers/asm-$linux_arc
 fi
 
 check_py_version() {
-    # We require python >= 3.7.
+    # We require python >= 3.8.
     # NB: a True python conditional creates a non-zero return code (Failure)
-    "$1" -c 'import sys; sys.exit(sys.version_info < (3,7))'
+    "$1" -c 'import sys; sys.exit(sys.version_info < (3,8))'
 }
 
 first_python=
 if test -z "${PYTHON}"; then
     # A bare 'python' is traditionally python 2.x, but some distros
     # have it as python 3.x, so check in both places.
-    for binary in python3 python python3.11 python3.10 python3.9 python3.8 python3.7; do
+    for binary in python3 python python3.11 python3.10 python3.9 python3.8; do
         if has "$binary"; then
             python=$(command -v "$binary")
             if check_py_version "$python"; then
@@ -627,7 +596,7 @@ do
     fi
 done
 
-if test "$mingw32" = "yes" ; then
+if test "$targetos" = "windows" ; then
   EXESUF=".exe"
   prefix="/qemu"
   bindir=""
@@ -761,16 +730,13 @@ for opt do
     # configure to be used by RPM and similar macros that set
     # lots of directory switches by default.
   ;;
-  --enable-debug-tcg) debug_tcg="yes"
-  ;;
-  --disable-debug-tcg) debug_tcg="no"
-  ;;
   --enable-debug)
       # Enable debugging options that aren't excessively noisy
-      debug_tcg="yes"
+      meson_option_parse --enable-debug-tcg ""
       meson_option_parse --enable-debug-graph-lock ""
       meson_option_parse --enable-debug-mutex ""
       meson_option_add -Doptimization=0
+      default_cflags='-O0 -g'
   ;;
   --disable-tcg) tcg="disabled"
                  plugins="no"
@@ -812,7 +778,7 @@ for opt do
   ;;
   --enable-download) download="enabled"; git_submodules_action=update;
   ;;
-  --enable-plugins) if test "$mingw32" = "yes"; then
+  --enable-plugins) if test "$targetos" = "windows"; then
                         error_exit "TCG plugins not currently supported on Windows platforms"
                     else
                         plugins="yes"
@@ -884,30 +850,36 @@ fi
 default_target_list=""
 mak_wilds=""
 
-if [ "$linux_user" != no ]; then
-    if [ "$targetos" = linux ] && [ -n "$host_arch" ]; then
-        linux_user=yes
-    elif [ "$linux_user" = yes ]; then
-        error_exit "linux-user not supported on this architecture"
+if [ -n "$host_arch" ] && [ -d "$source_path/common-user/host/$host_arch" ]; then
+    if [ "$linux_user" != no ]; then
+        if [ "$targetos" = linux ]; then
+            linux_user=yes
+        elif [ "$linux_user" = yes ]; then
+            error_exit "linux-user not supported on this architecture"
+        fi
+        if [ "$linux_user" = "yes" ]; then
+            mak_wilds="${mak_wilds} $source_path/configs/targets/*-linux-user.mak"
+        fi
     fi
-fi
-if [ "$bsd_user" != no ]; then
-    if [ "$bsd_user" = "" ]; then
-        test $targetos = freebsd && bsd_user=yes
+    if [ "$bsd_user" != no ]; then
+        if [ "$bsd_user" = "" ]; then
+            test $targetos = freebsd && bsd_user=yes
+        fi
+        if [ "$bsd_user" = yes ] && ! [ -d "$source_path/bsd-user/$targetos" ]; then
+            error_exit "bsd-user not supported on this host OS"
+        fi
+        if [ "$bsd_user" = "yes" ]; then
+            mak_wilds="${mak_wilds} $source_path/configs/targets/*-bsd-user.mak"
+        fi
     fi
-    if [ "$bsd_user" = yes ] && ! [ -d "$source_path/bsd-user/$targetos" ]; then
-        error_exit "bsd-user not supported on this host OS"
+else
+    if [ "$linux_user" = yes ] || [ "$bsd_user" = yes ]; then
+        error_exit "user mode emulation not supported on this architecture"
     fi
 fi
 if [ "$softmmu" = "yes" ]; then
     mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak"
 fi
-if [ "$linux_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/configs/targets/*-linux-user.mak"
-fi
-if [ "$bsd_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/configs/targets/*-bsd-user.mak"
-fi
 
 for config in $mak_wilds; do
     target="$(basename "$config" .mak)"
@@ -934,8 +906,8 @@ Advanced options (experts only):
   -Dmesonoptname=val       passthrough option to meson unmodified
   --cross-prefix=PREFIX    use PREFIX for compile tools, PREFIX can be blank [$cross_prefix]
   --cc=CC                  use C compiler CC [$cc]
-  --host-cc=CC             use C compiler CC [$host_cc] for code run at
-                           build time
+  --host-cc=CC             when cross compiling, use C compiler CC for code run
+                           at build time [$host_cc]
   --cxx=CXX                use C++ compiler CXX [$cxx]
   --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]
   --extra-cflags=CFLAGS    append extra C compiler flags CFLAGS
@@ -971,7 +943,6 @@ cat << EOF
   linux-user      all linux usermode emulation targets
   bsd-user        all BSD usermode emulation targets
   pie             Position Independent Executables
-  debug-tcg       TCG debugging (default is disabled)
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -994,7 +965,7 @@ then
 fi
 
 if ! check_py_version "$python"; then
-  error_exit "Cannot use '$python', Python >= 3.7 is required." \
+  error_exit "Cannot use '$python', Python >= 3.8 is required." \
              "Use --python=/path/to/python to specify a supported Python." \
              "Maybe try:" \
              "  openSUSE Leap 15.3+: zypper install python39" \
@@ -1087,7 +1058,7 @@ fi
 # by default.  Only enable by default for git builds
 if test -z "$werror" ; then
     if test -e "$source_path/.git" && \
-        { test "$linux" = "yes" || test "$mingw32" = "yes"; }; then
+        { test "$targetos" = linux || test "$targetos" = "windows"; }; then
         werror="yes"
     else
         werror="no"
@@ -1110,6 +1081,9 @@ if test "$static" = "yes" ; then
   fi
 fi
 test "$plugins" = "" && plugins=yes
+if test "$plugins" = "yes"; then
+  subdirs="$subdirs contrib/plugins"
+fi
 
 cat > $TMPC << EOF
 
@@ -1165,14 +1139,6 @@ else
     done
 fi
 
-# see if system emulation was really requested
-case " $target_list " in
-  *"-softmmu "*) softmmu=yes
-  ;;
-  *) softmmu=no
-  ;;
-esac
-
 if test "$tcg" = "auto"; then
   if test -z "$target_list"; then
     tcg="disabled"
@@ -1670,12 +1636,11 @@ done
 echo "# Automatically generated by configure - do not modify" > Makefile.prereqs
 
 # Mac OS X ships with a broken assembler
-roms=
 if have_target i386-softmmu x86_64-softmmu && \
         test "$targetos" != "darwin" && test "$targetos" != "sunos" && \
         test "$targetos" != "haiku" && \
         probe_target_compiler i386-softmmu; then
-    roms="pc-bios/optionrom"
+    subdirs="$subdirs pc-bios/optionrom"
     config_mak=pc-bios/optionrom/config.mak
     echo "# Automatically generated by configure - do not modify" > $config_mak
     echo "TOPSRC_DIR=$source_path" >> $config_mak
@@ -1684,7 +1649,7 @@ fi
 
 if have_target ppc-softmmu ppc64-softmmu && \
         probe_target_compiler ppc-softmmu; then
-    roms="$roms pc-bios/vof"
+    subdirs="$subdirs pc-bios/vof"
     config_mak=pc-bios/vof/config.mak
     echo "# Automatically generated by configure - do not modify" > $config_mak
     echo "SRC_DIR=$source_path/pc-bios/vof" >> $config_mak
@@ -1703,7 +1668,7 @@ if have_target s390x-softmmu && probe_target_compiler s390x-softmmu && \
       echo "WARNING: Your compiler does not support the z900!"
       echo "         The s390-ccw bios will only work with guest CPUs >= z10."
     fi
-    roms="$roms pc-bios/s390-ccw"
+    subdirs="$subdirs pc-bios/s390-ccw"
     config_mak=pc-bios/s390-ccw/config-host.mak
     echo "# Automatically generated by configure - do not modify" > $config_mak
     echo "SRC_PATH=$source_path/pc-bios/s390-ccw" >> $config_mak
@@ -1722,41 +1687,15 @@ echo >> $config_host_mak
 
 echo all: >> $config_host_mak
 
-if test "$debug_tcg" = "yes" ; then
-  echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak
-fi
-if test "$mingw32" = "yes" ; then
-  echo "CONFIG_WIN32=y" >> $config_host_mak
+if test "$targetos" = "windows"; then
   echo "QEMU_GA_MANUFACTURER=${QEMU_GA_MANUFACTURER-QEMU}" >> $config_host_mak
   echo "QEMU_GA_DISTRO=${QEMU_GA_DISTRO-Linux}" >> $config_host_mak
   echo "QEMU_GA_VERSION=${QEMU_GA_VERSION-$(cat "$source_path"/VERSION)}" >> $config_host_mak
-else
-  echo "CONFIG_POSIX=y" >> $config_host_mak
 fi
 
-if test "$linux" = "yes" ; then
-  echo "CONFIG_LINUX=y" >> $config_host_mak
-fi
-
-if test "$darwin" = "yes" ; then
-  echo "CONFIG_DARWIN=y" >> $config_host_mak
-fi
-
-if test "$solaris" = "yes" ; then
-  echo "CONFIG_SOLARIS=y" >> $config_host_mak
-fi
 echo "SRC_PATH=$source_path" >> $config_host_mak
 echo "TARGET_DIRS=$target_list" >> $config_host_mak
 
-# XXX: suppress that
-if [ "$bsd" = "yes" ] ; then
-  echo "CONFIG_BSD=y" >> $config_host_mak
-fi
-
-if test "$plugins" = "yes" ; then
-    echo "CONFIG_PLUGIN=y" >> $config_host_mak
-fi
-
 if test -n "$gdb_bin"; then
     gdb_version=$($gdb_bin --version | head -n 1)
     if version_ge ${gdb_version##* } 9.1; then
@@ -1772,6 +1711,13 @@ if test "$container" != no; then
     echo "RUNC=$runc" >> $config_host_mak
 fi
 
+echo "SUBDIRS=$subdirs" >> $config_host_mak
+echo "PYTHON=$python" >> $config_host_mak
+echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
+echo "MESON=$meson" >> $config_host_mak
+echo "NINJA=$ninja" >> $config_host_mak
+echo "EXESUF=$EXESUF" >> $config_host_mak
+
 if test "$as_shared_lib" = "yes" ; then
   echo "AS_SHARED_LIB=y" >> $config_host_mak
 fi
@@ -1779,17 +1725,8 @@ if test "$as_static_lib" = "yes" ; then
   echo "AS_STATIC_LIB=y" >> $config_host_mak
 fi
 
-echo "ROMS=$roms" >> $config_host_mak
-echo "PYTHON=$python" >> $config_host_mak
-echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
-echo "MESON=$meson" >> $config_host_mak
-echo "NINJA=$ninja" >> $config_host_mak
-echo "PKG_CONFIG=${pkg_config}" >> $config_host_mak
-echo "CC=$cc" >> $config_host_mak
-echo "EXESUF=$EXESUF" >> $config_host_mak
-
 # use included Linux headers for KVM architectures
-if test "$linux" = "yes" && test -n "$linux_arch"; then
+if test "$targetos" = "linux" && test -n "$linux_arch"; then
   symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm
 fi
 
@@ -1810,12 +1747,21 @@ if test "$ccache_cpp2" = "yes"; then
   echo "export CCACHE_CPP2=y" >> $config_host_mak
 fi
 
+# contrib/plugins configuration
+echo "# Automatically generated by configure - do not modify" > contrib/plugins/$config_host_mak
+echo "SRC_PATH=$source_path/contrib/plugins" >> contrib/plugins/$config_host_mak
+echo "PKG_CONFIG=${pkg_config}" >> contrib/plugins/$config_host_mak
+echo "CC=$cc $CPU_CFLAGS" >> contrib/plugins/$config_host_mak
+echo "CFLAGS=${CFLAGS-$default_cflags} $EXTRA_CFLAGS" >> contrib/plugins/$config_host_mak
+if test "$targetos" = darwin; then
+  echo "CONFIG_DARWIN=y" >> contrib/plugins/$config_host_mak
+fi
+
 # tests/tcg configuration
 (config_host_mak=tests/tcg/config-host.mak
 mkdir -p tests/tcg
 echo "# Automatically generated by configure - do not modify" > $config_host_mak
 echo "SRC_PATH=$source_path" >> $config_host_mak
-echo "HOST_CC=$host_cc" >> $config_host_mak
 
 # versioned checked in the main config_host.mak above
 if test -n "$gdb_bin"; then
@@ -1908,7 +1854,6 @@ if test "$skip_meson" = no; then
   echo "windres = [$(meson_quote $windres)]" >> $cross
   echo "windmc = [$(meson_quote $windmc)]" >> $cross
   if test "$cross_compile" = "yes"; then
-    cross_arg="--cross-file config-meson.cross"
     echo "[host_machine]" >> $cross
     echo "system = '$targetos'" >> $cross
     case "$cpu" in
@@ -1925,6 +1870,14 @@ if test "$skip_meson" = no; then
     else
         echo "endian = 'little'" >> $cross
     fi
+    cross_arg="--cross-file config-meson.cross"
+
+    native="config-meson.native.new"
+    echo "# Automatically generated by configure - do not modify" > $native
+    echo "[binaries]" >> $native
+    echo "c = [$(meson_quote $host_cc)]" >> $native
+    mv $native config-meson.native
+    cross_arg="$cross_arg --native-file config-meson.native"
   else
     cross_arg="--native-file config-meson.cross"
   fi
@@ -1944,6 +1897,7 @@ if test "$skip_meson" = no; then
   test "$cfi" != false && meson_option_add "-Dcfi=$cfi"
   test "$docs" != auto && meson_option_add "-Ddocs=$docs"
   test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE"
+  test "$plugins" = yes && meson_option_add "-Dplugins=true"
   test "$qemu_suffix" != qemu && meson_option_add "-Dqemu_suffix=$qemu_suffix"
   test "$smbd" != '' && meson_option_add "-Dsmbd=$smbd"
   test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg"
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b2b9db9f51..8ba78c7a32 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -6,11 +6,11 @@
 # programs that the main configure has already done for us.
 #
 
-BUILD_DIR := $(CURDIR)/../..
+include config-host.mak
 
-include $(BUILD_DIR)/config-host.mak
+TOP_SRC_PATH = $(SRC_PATH)/../..
 
-VPATH += $(SRC_PATH)/contrib/plugins
+VPATH += $(SRC_PATH)
 
 NAMES :=
 NAMES += execlog
@@ -26,21 +26,25 @@ SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
 # The main QEMU uses Glib extensively so it's perfectly fine to use it
 # in plugins (which many example do).
-CFLAGS := $(shell $(PKG_CONFIG) --cflags glib-2.0)
-CFLAGS += -fPIC -Wall
-CFLAGS += $(if $(CONFIG_DEBUG_TCG), -ggdb -O0)
-CFLAGS += -I$(SRC_PATH)/include/qemu
+PLUGIN_CFLAGS := $(shell $(PKG_CONFIG) --cflags glib-2.0)
+PLUGIN_CFLAGS += -fPIC -Wall
+PLUGIN_CFLAGS += -I$(TOP_SRC_PATH)/include/qemu
 
 all: $(SONAMES)
 
 %.o: %.c
-	$(CC) $(CFLAGS) -c -o $@ $<
+	$(CC) $(CFLAGS) $(PLUGIN_CFLAGS) -c -o $@ $<
 
 lib%.so: %.o
-	$(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS)
+ifeq ($(CONFIG_DARWIN),y)
+	$(CC) -bundle -Wl,-undefined,dynamic_lookup -o $@ $^ $(LDLIBS)
+else
+	$(CC) -shared -o $@ $^ $(LDLIBS)
+endif
 
 clean:
 	rm -f *.o *.so *.d
 	rm -Rf .libs
 
 .PHONY: all clean
+.SECONDARY:
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index 5036213f1b..4fca3edd07 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -350,7 +350,7 @@ static int in_cache(Cache *cache, uint64_t addr)
  * @cache: The cache under simulation
  * @addr: The address of the requested memory location
  *
- * Returns true if the requsted data is hit in the cache and false when missed.
+ * Returns true if the requested data is hit in the cache and false when missed.
  * The cache is updated on miss for the next access.
  */
 static bool access_cache(Cache *cache, uint64_t addr)
@@ -545,8 +545,8 @@ static void append_stats_line(GString *line, uint64_t l1_daccess,
     l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
     l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
 
-    g_string_append_printf(line, "%-14lu %-12lu %9.4lf%%  %-14lu %-12lu"
-                           " %9.4lf%%",
+    g_string_append_printf(line, "%-14" PRIu64 " %-12" PRIu64 " %9.4lf%%"
+                           "  %-14" PRIu64 " %-12" PRIu64 " %9.4lf%%",
                            l1_daccess,
                            l1_dmisses,
                            l1_daccess ? l1_dmiss_rate : 0.0,
@@ -556,7 +556,8 @@ static void append_stats_line(GString *line, uint64_t l1_daccess,
 
     if (use_l2) {
         l2_miss_rate =  ((double) l2_misses) / (l2_access) * 100.0;
-        g_string_append_printf(line, "  %-12lu %-11lu %10.4lf%%",
+        g_string_append_printf(line,
+                               "  %-12" PRIu64 " %-11" PRIu64 " %10.4lf%%",
                                l2_access,
                                l2_misses,
                                l2_access ? l2_miss_rate : 0.0);
@@ -662,8 +663,8 @@ static void log_top_insns(void)
         if (insn->symbol) {
             g_string_append_printf(rep, " (%s)", insn->symbol);
         }
-        g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses,
-                               insn->disas_str);
+        g_string_append_printf(rep, ", %" PRId64 ", %s\n",
+                               insn->l1_dmisses, insn->disas_str);
     }
 
     miss_insns = g_list_sort(miss_insns, icmp);
@@ -675,8 +676,8 @@ static void log_top_insns(void)
         if (insn->symbol) {
             g_string_append_printf(rep, " (%s)", insn->symbol);
         }
-        g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses,
-                               insn->disas_str);
+        g_string_append_printf(rep, ", %" PRId64 ", %s\n",
+                               insn->l1_imisses, insn->disas_str);
     }
 
     if (!use_l2) {
@@ -692,8 +693,8 @@ static void log_top_insns(void)
         if (insn->symbol) {
             g_string_append_printf(rep, " (%s)", insn->symbol);
         }
-        g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses,
-                               insn->disas_str);
+        g_string_append_printf(rep, ", %" PRId64 ", %s\n",
+                               insn->l2_misses, insn->disas_str);
     }
 
 finish:
diff --git a/contrib/plugins/drcov.c b/contrib/plugins/drcov.c
index 686ae0a537..5edc94dcaf 100644
--- a/contrib/plugins/drcov.c
+++ b/contrib/plugins/drcov.c
@@ -48,7 +48,7 @@ static void printf_header(unsigned long count)
     uint64_t start_code = qemu_plugin_start_code();
     uint64_t end_code = qemu_plugin_end_code();
     uint64_t entry = qemu_plugin_entry_code();
-    fprintf(fp, "0, 0x%lx, 0x%lx, 0x%lx, %s\n",
+    fprintf(fp, "0, 0x%" PRIx64 ", 0x%" PRIx64 ", 0x%" PRIx64 ", %s\n",
             start_code, end_code, entry, path);
     fprintf(fp, "BB Table: %ld bbs\n", count);
 }
diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c
index 0ed01ea931..644a7856bb 100644
--- a/contrib/plugins/howvec.c
+++ b/contrib/plugins/howvec.c
@@ -181,7 +181,8 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
         switch (class->what) {
         case COUNT_CLASS:
             if (class->count || verbose) {
-                g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
+                g_string_append_printf(report,
+                                       "Class: %-24s\t(%" PRId64 " hits)\n",
                                        class->class,
                                        class->count);
             }
@@ -208,7 +209,8 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
              i++, counts = g_list_next(counts)) {
             InsnExecCount *rec = (InsnExecCount *) counts->data;
             g_string_append_printf(report,
-                                   "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
+                                   "Instr: %-24s\t(%" PRId64 " hits)"
+                                   "\t(op=0x%08x/%s)\n",
                                    rec->insn,
                                    rec->count,
                                    rec->opcode,
diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c
index 3614c3564c..682b11feb2 100644
--- a/contrib/plugins/lockstep.c
+++ b/contrib/plugins/lockstep.c
@@ -108,7 +108,7 @@ static void report_divergance(ExecState *us, ExecState *them)
 
     /*
      * If we have diverged before did we get back on track or are we
-     * totally loosing it?
+     * totally losing it?
      */
     if (divergence_log) {
         DivergeState *last = (DivergeState *) divergence_log->data;
@@ -134,7 +134,9 @@ static void report_divergance(ExecState *us, ExecState *them)
 
     /* Output short log entry of going out of sync... */
     if (verbose || divrec.distance == 1 || diverged) {
-        g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n",
+        g_string_printf(out,
+                        "@ 0x%016" PRIx64 " vs 0x%016" PRIx64
+                        " (%d/%d since last)\n",
                         us->pc, them->pc, g_slist_length(divergence_log),
                         divrec.distance);
         qemu_plugin_outs(out->str);
@@ -144,7 +146,9 @@ static void report_divergance(ExecState *us, ExecState *them)
         int i;
         GSList *entry;
 
-        g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n",
+        g_string_printf(out,
+                        "Δ insn_count @ 0x%016" PRIx64
+                        " (%ld) vs 0x%016" PRIx64 " (%ld)\n",
                         us->pc, us->insn_count, them->pc, them->insn_count);
 
         for (entry = log, i = 0;
@@ -152,7 +156,8 @@ static void report_divergance(ExecState *us, ExecState *them)
              entry = g_slist_next(entry), i++) {
             ExecInfo *prev = (ExecInfo *) entry->data;
             g_string_append_printf(out,
-                                   "  previously @ 0x%016lx/%ld (%ld insns)\n",
+                                   "  previously @ 0x%016" PRIx64 "/%" PRId64
+                                   " (%ld insns)\n",
                                    prev->block->pc, prev->block->insns,
                                    prev->insn_count);
         }
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c
index 2e7815a7a3..aa304475a0 100644
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -1071,6 +1071,7 @@ static gboolean
 protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data)
 {
     const uint64_t protocol_edid = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID);
+    const uint64_t protocol_dmabuf2 = (1 << VHOST_USER_GPU_PROTOCOL_F_DMABUF2);
     VuGpu *g = user_data;
     uint64_t protocol_features;
     VhostUserGpuMsg msg = {
@@ -1082,7 +1083,7 @@ protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data)
         return G_SOURCE_CONTINUE;
     }
 
-    protocol_features &= protocol_edid;
+    protocol_features &= (protocol_edid | protocol_dmabuf2);
 
     msg = (VhostUserGpuMsg) {
         .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
@@ -1100,6 +1101,8 @@ protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data)
         exit(EXIT_FAILURE);
     }
 
+    g->use_modifiers = !!(protocol_features & protocol_dmabuf2);
+
     return G_SOURCE_REMOVE;
 }
 
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
index 211aa110a9..1da6cc1588 100644
--- a/contrib/vhost-user-gpu/virgl.c
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -318,6 +318,37 @@ virgl_resource_detach_backing(VuGpu *g,
     vg_cleanup_mapping_iov(g, res_iovs, num_iovs);
 }
 
+static int
+virgl_get_resource_info_modifiers(uint32_t resource_id,
+                                  struct virgl_renderer_resource_info *info,
+                                  uint64_t *modifiers)
+{
+    int ret;
+#ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION
+    struct virgl_renderer_resource_info_ext info_ext;
+    ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext);
+    if (ret < 0) {
+        return ret;
+    }
+
+    *info = info_ext.base;
+    *modifiers = info_ext.modifiers;
+#else
+    ret = virgl_renderer_resource_get_info(resource_id, info);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /*
+     * Before virgl_renderer_resource_get_info_ext,
+     * getting the modifiers was not possible.
+     */
+    *modifiers = 0;
+#endif
+
+    return 0;
+}
+
 static void
 virgl_cmd_set_scanout(VuGpu *g,
                       struct virtio_gpu_ctrl_command *cmd)
@@ -338,7 +369,9 @@ virgl_cmd_set_scanout(VuGpu *g,
     memset(&info, 0, sizeof(info));
 
     if (ss.resource_id && ss.r.width && ss.r.height) {
-        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+        uint64_t modifiers = 0;
+        ret = virgl_get_resource_info_modifiers(ss.resource_id, &info,
+                                                &modifiers);
         if (ret == -1) {
             g_critical("%s: illegal resource specified %d\n",
                        __func__, ss.resource_id);
@@ -354,8 +387,6 @@ virgl_cmd_set_scanout(VuGpu *g,
         }
         assert(fd >= 0);
         VhostUserGpuMsg msg = {
-            .request = VHOST_USER_GPU_DMABUF_SCANOUT,
-            .size = sizeof(VhostUserGpuDMABUFScanout),
             .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
             .payload.dmabuf_scanout.x =  ss.r.x,
             .payload.dmabuf_scanout.y =  ss.r.y,
@@ -367,6 +398,20 @@ virgl_cmd_set_scanout(VuGpu *g,
             .payload.dmabuf_scanout.fd_flags = info.flags,
             .payload.dmabuf_scanout.fd_drm_fourcc = info.drm_fourcc
         };
+
+        if (g->use_modifiers) {
+            /*
+             * The mesage uses all the fields set in dmabuf_scanout plus
+             * modifiers which is appended after VhostUserGpuDMABUFScanout.
+             */
+            msg.request = VHOST_USER_GPU_DMABUF_SCANOUT2;
+            msg.size = sizeof(VhostUserGpuDMABUFScanout2);
+            msg.payload.dmabuf_scanout2.modifier = modifiers;
+        } else {
+            msg.request = VHOST_USER_GPU_DMABUF_SCANOUT;
+            msg.size = sizeof(VhostUserGpuDMABUFScanout);
+        }
+
         vg_send_msg(g, &msg, fd);
         close(fd);
     } else {
diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
index f0f2069c47..509b679f03 100644
--- a/contrib/vhost-user-gpu/vugpu.h
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -37,6 +37,7 @@ typedef enum VhostUserGpuRequest {
     VHOST_USER_GPU_DMABUF_SCANOUT,
     VHOST_USER_GPU_DMABUF_UPDATE,
     VHOST_USER_GPU_GET_EDID,
+    VHOST_USER_GPU_DMABUF_SCANOUT2,
 } VhostUserGpuRequest;
 
 typedef struct VhostUserGpuDisplayInfoReply {
@@ -84,6 +85,11 @@ typedef struct VhostUserGpuDMABUFScanout {
     int fd_drm_fourcc;
 } QEMU_PACKED VhostUserGpuDMABUFScanout;
 
+typedef struct VhostUserGpuDMABUFScanout2 {
+    struct VhostUserGpuDMABUFScanout dmabuf_scanout;
+    uint64_t modifier;
+} QEMU_PACKED VhostUserGpuDMABUFScanout2;
+
 typedef struct VhostUserGpuEdidRequest {
     uint32_t scanout_id;
 } QEMU_PACKED VhostUserGpuEdidRequest;
@@ -98,6 +104,7 @@ typedef struct VhostUserGpuMsg {
         VhostUserGpuScanout scanout;
         VhostUserGpuUpdate update;
         VhostUserGpuDMABUFScanout dmabuf_scanout;
+        VhostUserGpuDMABUFScanout2 dmabuf_scanout2;
         VhostUserGpuEdidRequest edid_req;
         struct virtio_gpu_resp_edid resp_edid;
         struct virtio_gpu_resp_display_info display_info;
@@ -112,6 +119,7 @@ static VhostUserGpuMsg m __attribute__ ((unused));
 #define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
 
 #define VHOST_USER_GPU_PROTOCOL_F_EDID 0
+#define VHOST_USER_GPU_PROTOCOL_F_DMABUF2 1
 
 struct virtio_gpu_scanout {
     uint32_t width, height;
@@ -132,6 +140,7 @@ typedef struct VuGpu {
     bool virgl;
     bool virgl_inited;
     bool edid_inited;
+    bool use_modifiers;
     uint32_t inflight;
 
     struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
diff --git a/cpu.c b/cpu.c
index 6789e5c67b..885eeeb5ea 100644
--- a/cpu.c
+++ b/cpu.c
@@ -734,11 +734,7 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
 
 bool target_words_bigendian(void)
 {
-#if TARGET_BIG_ENDIAN
-    return true;
-#else
-    return false;
-#endif
+    return TARGET_BIG_ENDIAN;
 }
 
 const char *target_name(void)
diff --git a/crypto/aes.c b/crypto/aes.c
index 836d7d5c0b..df4362ac60 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -272,7 +272,7 @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
 AES_Td4[x] = Si[x].[01, 01, 01, 01];
 */
 
-static const uint32_t AES_Te0[256] = {
+const uint32_t AES_Te0[256] = {
     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
@@ -607,7 +607,7 @@ static const uint32_t AES_Te4[256] = {
     0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
 };
 
-static const uint32_t AES_Td0[256] = {
+const uint32_t AES_Td0[256] = {
     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
diff --git a/crypto/afalg.c b/crypto/afalg.c
index 348301e703..52a491dbb5 100644
--- a/crypto/afalg.c
+++ b/crypto/afalg.c
@@ -73,7 +73,7 @@ qcrypto_afalg_comm_alloc(const char *type, const char *name,
     QCryptoAFAlg *afalg;
 
     afalg = g_new0(QCryptoAFAlg, 1);
-    /* initilize crypto API socket */
+    /* initialize crypto API socket */
     afalg->opfd = -1;
     afalg->tfmfd = qcrypto_afalg_socket_bind(type, name, errp);
     if (afalg->tfmfd == -1) {
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 2f59c3a625..fb01ec38bb 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -244,7 +244,7 @@ qcrypto_block_luks_has_format(const uint8_t *buf,
  *
  * When calculating ESSIV IVs, the cipher length used by ESSIV
  * may be different from the cipher length used for the block
- * encryption, becauses dm-crypt uses the hash digest length
+ * encryption, because dm-crypt uses the hash digest length
  * as the key size. ie, if you have AES 128 as the block cipher
  * and SHA 256 as ESSIV hash, then ESSIV will use AES 256 as
  * the cipher since that gets a key length matching the digest
@@ -393,7 +393,7 @@ qcrypto_block_luks_from_disk_endian(QCryptoBlockLUKSHeader *hdr)
 }
 
 /*
- * Stores the main LUKS header, taking care of endianess
+ * Stores the main LUKS header, taking care of endianness
  */
 static int
 qcrypto_block_luks_store_header(QCryptoBlock *block,
@@ -423,7 +423,7 @@ qcrypto_block_luks_store_header(QCryptoBlock *block,
 }
 
 /*
- * Loads the main LUKS header,and byteswaps it to native endianess
+ * Loads the main LUKS header, and byteswaps it to native endianness
  * And run basic sanity checks on it
  */
 static int
diff --git a/crypto/clmul.c b/crypto/clmul.c
new file mode 100644
index 0000000000..9e3e61a77d
--- /dev/null
+++ b/crypto/clmul.c
@@ -0,0 +1,111 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/clmul.h"
+
+uint64_t clmul_8x8_low(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0101010101010101ull) * 0xff;
+        r ^= m & mask;
+        m = (m << 1) & 0xfefefefefefefefeull;
+        n >>= 1;
+    }
+    return r;
+}
+
+static uint64_t clmul_8x4_even_int(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0001000100010001ull) * 0xffff;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_8x4_even(uint64_t n, uint64_t m)
+{
+    n &= 0x00ff00ff00ff00ffull;
+    m &= 0x00ff00ff00ff00ffull;
+    return clmul_8x4_even_int(n, m);
+}
+
+uint64_t clmul_8x4_odd(uint64_t n, uint64_t m)
+{
+    return clmul_8x4_even(n >> 8, m >> 8);
+}
+
+static uint64_t unpack_8_to_16(uint64_t x)
+{
+    return  (x & 0x000000ff)
+         | ((x & 0x0000ff00) << 8)
+         | ((x & 0x00ff0000) << 16)
+         | ((x & 0xff000000) << 24);
+}
+
+uint64_t clmul_8x4_packed(uint32_t n, uint32_t m)
+{
+    return clmul_8x4_even_int(unpack_8_to_16(n), unpack_8_to_16(m));
+}
+
+uint64_t clmul_16x2_even(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    n &= 0x0000ffff0000ffffull;
+    m &= 0x0000ffff0000ffffull;
+
+    for (int i = 0; i < 16; ++i) {
+        uint64_t mask = (n & 0x0000000100000001ull) * 0xffffffffull;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_16x2_odd(uint64_t n, uint64_t m)
+{
+    return clmul_16x2_even(n >> 16, m >> 16);
+}
+
+uint64_t clmul_32(uint32_t n, uint32_t m32)
+{
+    uint64_t r = 0;
+    uint64_t m = m32;
+
+    for (int i = 0; i < 32; ++i) {
+        r ^= n & 1 ? m : 0;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+Int128 clmul_64_gen(uint64_t n, uint64_t m)
+{
+    uint64_t rl = 0, rh = 0;
+
+    /* Bit 0 can only influence the low 64-bit result.  */
+    if (n & 1) {
+        rl = m;
+    }
+
+    for (int i = 1; i < 64; ++i) {
+        uint64_t mask = -((n >> i) & 1);
+        rl ^= (m << i) & mask;
+        rh ^= (m >> (64 - i)) & mask;
+    }
+    return int128_make128(rl, rh);
+}
diff --git a/crypto/der.c b/crypto/der.c
index dab3fe4f24..ebbecfc3fe 100644
--- a/crypto/der.c
+++ b/crypto/der.c
@@ -76,7 +76,7 @@ enum QCryptoDERTagEnc {
 /**
  * qcrypto_der_encode_length:
  * @src_len: the length of source data
- * @dst: distination to save the encoded 'length', if dst is NULL, only compute
+ * @dst: destination to save the encoded 'length', if dst is NULL, only compute
  * the expected buffer size in bytes.
  * @dst_len: output parameter, indicates how many bytes wrote.
  *
diff --git a/crypto/der.h b/crypto/der.h
index 0e895bbeec..f4ba6da28a 100644
--- a/crypto/der.h
+++ b/crypto/der.h
@@ -249,7 +249,7 @@ void qcrypto_der_encode_octet_str(QCryptoEncodeContext *ctx,
  * Start encoding a octet string, All fields between
  * qcrypto_der_encode_octet_str_begin and qcrypto_der_encode_octet_str_end
  * are encoded as an octet string. This is useful when we need to encode a
- * encoded SEQUNCE as OCTET STRING.
+ * encoded SEQUENCE as OCTET STRING.
  */
 void qcrypto_der_encode_octet_str_begin(QCryptoEncodeContext *ctx);
 
@@ -260,7 +260,7 @@ void qcrypto_der_encode_octet_str_begin(QCryptoEncodeContext *ctx);
  * Finish encoding a octet string, All fields between
  * qcrypto_der_encode_octet_str_begin and qcrypto_der_encode_octet_str_end
  * are encoded as an octet string. This is useful when we need to encode a
- * encoded SEQUNCE as OCTET STRING.
+ * encoded SEQUENCE as OCTET STRING.
  */
 void qcrypto_der_encode_octet_str_end(QCryptoEncodeContext *ctx);
 
@@ -275,7 +275,7 @@ size_t qcrypto_der_encode_ctx_buffer_len(QCryptoEncodeContext *ctx);
 /**
  * qcrypto_der_encode_ctx_flush_and_free:
  * @ctx: the encode context.
- * @dst: the distination to save the encoded data, the length of dst should
+ * @dst: the destination to save the encoded data, the length of dst should
  * not less than qcrypto_der_encode_cxt_buffer_len
  *
  * Flush all encoded data into dst, then free ctx.
diff --git a/crypto/hmacpriv.h b/crypto/hmacpriv.h
index 4387ca2587..62dfe8257a 100644
--- a/crypto/hmacpriv.h
+++ b/crypto/hmacpriv.h
@@ -28,19 +28,18 @@ struct QCryptoHmacDriver {
     void (*hmac_free)(QCryptoHmac *hmac);
 };
 
-extern void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp);
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_lib_driver;
 
 #ifdef CONFIG_AF_ALG
 
 #include "afalgpriv.h"
 
-extern QCryptoAFAlg *
-qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
-                           const uint8_t *key, size_t nkey,
-                           Error **errp);
+QCryptoAFAlg *qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                                         const uint8_t *key, size_t nkey,
+                                         Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_afalg_driver;
 
 #endif
diff --git a/crypto/meson.build b/crypto/meson.build
index 5f03a30d34..9ac1a89802 100644
--- a/crypto/meson.build
+++ b/crypto/meson.build
@@ -48,9 +48,12 @@ if have_afalg
 endif
 crypto_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c'))
 
-util_ss.add(files('sm4.c'))
-util_ss.add(files('aes.c'))
-util_ss.add(files('init.c'))
+util_ss.add(files(
+  'aes.c',
+  'clmul.c',
+  'init.c',
+  'sm4.c',
+))
 if gnutls.found()
   util_ss.add(gnutls)
 endif
diff --git a/crypto/sm4.c b/crypto/sm4.c
index 9f0cd452c7..2987306cf7 100644
--- a/crypto/sm4.c
+++ b/crypto/sm4.c
@@ -47,3 +47,13 @@ uint8_t const sm4_sbox[] = {
     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
 };
 
+uint32_t const sm4_ck[] = {
+    0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+    0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+    0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+    0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+    0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+    0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+    0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+    0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
index 0e2cb9e770..f2a7aec56f 100644
--- a/docs/about/build-platforms.rst
+++ b/docs/about/build-platforms.rst
@@ -52,7 +52,7 @@ Those hosts are officially supported, with various accelerators:
    * - SPARC
      - tcg
    * - x86
-     - hax, hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen
+     - hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen
 
 Other host architectures are not supported. It is possible to build QEMU system
 emulation on an unsupported host architecture using the configure
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 92a2bafd2b..dc4da95329 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -105,12 +105,6 @@ Use ``-machine hpet=off`` instead.
 The ``-no-acpi`` setting has been turned into a machine property.
 Use ``-machine acpi=off`` instead.
 
-``-accel hax`` (since 8.0)
-''''''''''''''''''''''''''
-
-The HAXM project has been retired (see https://github.com/intel/haxm#status).
-Use "whpx" (on Windows) or "hvf" (on macOS) instead.
-
 ``-async-teardown`` (since 8.1)
 '''''''''''''''''''''''''''''''
 
diff --git a/docs/about/index.rst b/docs/about/index.rst
index b00b584b31..4f96ab5d91 100644
--- a/docs/about/index.rst
+++ b/docs/about/index.rst
@@ -8,7 +8,7 @@ QEMU can be used in several different ways. The most common is for
 :ref:`System Emulation`, where it provides a virtual model of an
 entire machine (CPU, memory and emulated devices) to run a guest OS.
 In this mode the CPU may be fully emulated, or it may work with a
-hypervisor such as KVM, Xen, Hax or Hypervisor.Framework to allow the
+hypervisor such as KVM, Xen or Hypervisor.Framework to allow the
 guest to run directly on the host CPU.
 
 The second supported way to use QEMU is :ref:`User Mode Emulation`,
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index 5b258b446b..c2043fd415 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -659,15 +659,18 @@ Use ``Icelake-Server`` instead.
 System accelerators
 -------------------
 
-Userspace local APIC with KVM (x86, removed 8.0)
-''''''''''''''''''''''''''''''''''''''''''''''''
+Userspace local APIC with KVM (x86, removed in 8.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''
 
 ``-M kernel-irqchip=off`` cannot be used on KVM if the CPU model includes
 a local APIC.  The ``split`` setting is supported, as is using ``-M
 kernel-irqchip=off`` when the CPU does not have a local APIC.
 
-System accelerators
--------------------
+HAXM (``-accel hax``) (removed in 8.2)
+''''''''''''''''''''''''''''''''''''''
+
+The HAXM project has been retired (see https://github.com/intel/haxm#status).
+Use "whpx" (on Windows) or "hvf" (on macOS) instead.
 
 MIPS "Trap-and-Emulate" KVM support (removed in 8.0)
 ''''''''''''''''''''''''''''''''''''''''''''''''''''
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 64efa26b90..0f990bb3e9 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -460,17 +460,13 @@ Built by configure:
 
 ``config-host.mak``
   When configure has determined the characteristics of the build host it
-  will write them to this file for use in ``Makefile`` and to a smaller
-  extent ``meson.build``. These include the paths to various tools and a
-  variety of ``CONFIG_*`` variables related to optionally enabled features.
+  will write the paths to various tools to this file, for use in ``Makefile``
+  and to a smaller extent ``meson.build``.
 
   ``config-host.mak`` is also used as a dependency checking mechanism. If make
   sees that the modification timestamp on configure is newer than that on
   ``config-host.mak``, then configure will be re-run.
 
-  The variables defined here apply to all QEMU
-  build outputs.
-
 ``config-meson.cross``
 
   A Meson "cross file" (or native file) used to communicate the paths to
diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst
index e3a544e463..73f52de106 100644
--- a/docs/devel/kconfig.rst
+++ b/docs/devel/kconfig.rst
@@ -316,6 +316,6 @@ variable::
 
     host_kconfig = \
       (have_tpm ? ['CONFIG_TPM=y'] : []) + \
-      ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
+      (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
       (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \
       ...
diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst
index e4801751f2..4ef539c0b0 100644
--- a/docs/devel/multi-process.rst
+++ b/docs/devel/multi-process.rst
@@ -409,8 +409,9 @@ the initial messages sent to the emulation process is a guest memory
 table. Each entry in this table consists of a file descriptor and size
 that the emulation process can ``mmap()`` to directly access guest
 memory, similar to ``vhost_user_set_mem_table()``. Note guest memory
-must be backed by file descriptors, such as when QEMU is given the
-*-mem-path* command line option.
+must be backed by shared file-backed memory, for example, using
+*-object memory-backend-file,share=on* and setting that memory backend
+as RAM for the machine.
 
 IOMMU operations
 ^^^^^^^^^^^^^^^^
diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst
index 7cc6a6b314..38ed1790f7 100644
--- a/docs/devel/reset.rst
+++ b/docs/devel/reset.rst
@@ -184,21 +184,20 @@ in reset.
     {
         MyDevClass *myclass = MYDEV_CLASS(class);
         ResettableClass *rc = RESETTABLE_CLASS(class);
-        resettable_class_set_parent_reset_phases(rc,
-                                                 mydev_reset_enter,
-                                                 mydev_reset_hold,
-                                                 mydev_reset_exit,
-                                                 &myclass->parent_phases);
+        resettable_class_set_parent_phases(rc,
+                                           mydev_reset_enter,
+                                           mydev_reset_hold,
+                                           mydev_reset_exit,
+                                           &myclass->parent_phases);
     }
 
 In the above example, we override all three phases. It is possible to override
 only some of them by passing NULL instead of a function pointer to
-``resettable_class_set_parent_reset_phases()``. For example, the following will
+``resettable_class_set_parent_phases()``. For example, the following will
 only override the *enter* phase and leave *hold* and *exit* untouched::
 
-    resettable_class_set_parent_reset_phases(rc, mydev_reset_enter,
-                                             NULL, NULL,
-                                             &myclass->parent_phases);
+    resettable_class_set_parent_phases(rc, mydev_reset_enter, NULL, NULL,
+                                       &myclass->parent_phases);
 
 This is equivalent to providing a trivial implementation of the hold and exit
 phases which does nothing but call the parent class's implementation of the
diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
index b433cb5bb2..605fe60e96 100644
--- a/docs/devel/vfio-migration.rst
+++ b/docs/devel/vfio-migration.rst
@@ -23,9 +23,21 @@ and recommends that the initial bytes are sent and loaded in the destination
 before stopping the source VM. Enabling this migration capability will
 guarantee that and thus, can potentially reduce downtime even further.
 
-Note that currently VFIO migration is supported only for a single device. This
-is due to VFIO migration's lack of P2P support. However, P2P support is planned
-to be added later on.
+To support migration of multiple devices that might do P2P transactions between
+themselves, VFIO migration uAPI defines an intermediate P2P quiescent state.
+While in the P2P quiescent state, P2P DMA transactions cannot be initiated by
+the device, but the device can respond to incoming ones. Additionally, all
+outstanding P2P transactions are guaranteed to have been completed by the time
+the device enters this state.
+
+All the devices that support P2P migration are first transitioned to the P2P
+quiescent state and only then are they stopped or started. This makes migration
+safe P2P-wise, since starting and stopping the devices is not done atomically
+for all the devices together.
+
+Thus, multiple VFIO devices migration is allowed only if all the devices
+support P2P migration. Single VFIO device migration is allowed regardless of
+P2P migration support.
 
 A detailed description of the UAPI for VFIO device migration can be found in
 the comment for the ``vfio_device_mig_state`` structure in the header file
@@ -132,54 +144,63 @@ will be blocked.
 Flow of state changes during Live migration
 ===========================================
 
-Below is the flow of state change during live migration.
+Below is the state change flow during live migration for a VFIO device that
+supports both precopy and P2P migration. The flow for devices that don't
+support it is similar, except that the relevant states for precopy and P2P are
+skipped.
 The values in the parentheses represent the VM state, the migration state, and
 the VFIO device state, respectively.
-The text in the square brackets represents the flow if the VFIO device supports
-pre-copy.
 
 Live migration save path
 ------------------------
 
 ::
 
-                        QEMU normal running state
-                        (RUNNING, _NONE, _RUNNING)
-                                  |
+                           QEMU normal running state
+                           (RUNNING, _NONE, _RUNNING)
+                                      |
                      migrate_init spawns migration_thread
-                Migration thread then calls each device's .save_setup()
-                  (RUNNING, _SETUP, _RUNNING [_PRE_COPY])
-                                  |
-                  (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY])
-      If device is active, get pending_bytes by .state_pending_{estimate,exact}()
-          If total pending_bytes >= threshold_size, call .save_live_iterate()
-                  [Data of VFIO device for pre-copy phase is copied]
-        Iterate till total pending bytes converge and are less than threshold
-                                  |
-  On migration completion, vCPU stops and calls .save_live_complete_precopy for
-  each active device. The VFIO device is then transitioned into _STOP_COPY state
-                  (FINISH_MIGRATE, _DEVICE, _STOP_COPY)
-                                  |
-     For the VFIO device, iterate in .save_live_complete_precopy until
-                         pending data is 0
-                   (FINISH_MIGRATE, _DEVICE, _STOP)
-                                  |
-                 (FINISH_MIGRATE, _COMPLETED, _STOP)
-             Migraton thread schedules cleanup bottom half and exits
+            Migration thread then calls each device's .save_setup()
+                          (RUNNING, _SETUP, _PRE_COPY)
+                                      |
+                         (RUNNING, _ACTIVE, _PRE_COPY)
+  If device is active, get pending_bytes by .state_pending_{estimate,exact}()
+       If total pending_bytes >= threshold_size, call .save_live_iterate()
+                Data of VFIO device for pre-copy phase is copied
+      Iterate till total pending bytes converge and are less than threshold
+                                      |
+       On migration completion, the vCPUs and the VFIO device are stopped
+              The VFIO device is first put in P2P quiescent state
+                    (FINISH_MIGRATE, _ACTIVE, _PRE_COPY_P2P)
+                                      |
+                Then the VFIO device is put in _STOP_COPY state
+                     (FINISH_MIGRATE, _ACTIVE, _STOP_COPY)
+         .save_live_complete_precopy() is called for each active device
+      For the VFIO device, iterate in .save_live_complete_precopy() until
+                               pending data is 0
+                                      |
+                     (POSTMIGRATE, _COMPLETED, _STOP_COPY)
+            Migraton thread schedules cleanup bottom half and exits
+                                      |
+                           .save_cleanup() is called
+                        (POSTMIGRATE, _COMPLETED, _STOP)
 
 Live migration resume path
 --------------------------
 
 ::
 
-              Incoming migration calls .load_setup for each device
-                       (RESTORE_VM, _ACTIVE, _STOP)
-                                 |
-       For each device, .load_state is called for that device section data
-                       (RESTORE_VM, _ACTIVE, _RESUMING)
-                                 |
-    At the end, .load_cleanup is called for each device and vCPUs are started
-                       (RUNNING, _NONE, _RUNNING)
+             Incoming migration calls .load_setup() for each device
+                          (RESTORE_VM, _ACTIVE, _STOP)
+                                      |
+     For each device, .load_state() is called for that device section data
+                        (RESTORE_VM, _ACTIVE, _RESUMING)
+                                      |
+  At the end, .load_cleanup() is called for each device and vCPUs are started
+              The VFIO device is first put in P2P quiescent state
+                        (RUNNING, _ACTIVE, _RUNNING_P2P)
+                                      |
+                           (RUNNING, _NONE, _RUNNING)
 
 Postcopy
 ========
diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst
index b78806892d..3035822d05 100644
--- a/docs/interop/vhost-user-gpu.rst
+++ b/docs/interop/vhost-user-gpu.rst
@@ -134,6 +134,19 @@ VhostUserGpuEdidRequest
 :scanout-id: ``u32``, the scanout to get edid from
 
 
+VhostUserGpuDMABUFScanout2
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++----------------+----------+
+| dmabuf_scanout | modifier |
++----------------+----------+
+
+:dmabuf_scanout: ``VhostUserGpuDMABUFScanout``, filled as described in the
+                 VhostUserGpuDMABUFScanout structure.
+
+:modifier: ``u64``, the DMABUF modifiers
+
+
 C structure
 -----------
 
@@ -163,7 +176,8 @@ Protocol features
 
 .. code:: c
 
-  #define VHOST_USER_GPU_PROTOCOL_F_EDID 0
+  #define VHOST_USER_GPU_PROTOCOL_F_EDID    0
+  #define VHOST_USER_GPU_PROTOCOL_F_DMABUF2 1
 
 New messages and communication changes are negotiated thanks to the
 ``VHOST_USER_GPU_GET_PROTOCOL_FEATURES`` and
@@ -263,3 +277,13 @@ Message types
   Retrieve the EDID data for a given scanout.
   This message requires the ``VHOST_USER_GPU_PROTOCOL_F_EDID`` protocol
   feature to be supported.
+
+``VHOST_USER_GPU_DMABUF_SCANOUT2``
+  :id: 12
+  :request payload: ``VhostUserGpuDMABUFScanout2``
+  :reply payload: N/A
+
+  Same as VHOST_USER_GPU_DMABUF_SCANOUT, but also sends the dmabuf modifiers
+  appended to the message, which were not provided in the other message.
+  This message requires the ``VHOST_USER_GPU_PROTOCOL_F_DMABUF2`` protocol
+  feature to be supported.
diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt
index bb88c6bdf1..95b1556f67 100644
--- a/docs/multi-thread-compression.txt
+++ b/docs/multi-thread-compression.txt
@@ -117,13 +117,13 @@ to support the multiple thread compression migration:
     {qemu} migrate_set_capability compress on
 
 3. Set the compression thread count on source:
-    {qemu} migrate_set_parameter compress_threads 12
+    {qemu} migrate_set_parameter compress-threads 12
 
 4. Set the compression level on the source:
-    {qemu} migrate_set_parameter compress_level 1
+    {qemu} migrate_set_parameter compress-level 1
 
 5. Set the decompression thread count on destination:
-    {qemu} migrate_set_parameter decompress_threads 3
+    {qemu} migrate_set_parameter decompress-threads 3
 
 6. Start outgoing migration:
     {qemu} migrate -d tcp:destination.host:4444
@@ -133,9 +133,9 @@ to support the multiple thread compression migration:
 
 The following are the default settings:
     compress: off
-    compress_threads: 8
-    decompress_threads: 2
-    compress_level: 1 (which means best speed)
+    compress-threads: 8
+    decompress-threads: 2
+    compress-level: 1 (which means best speed)
 
 So, only the first two steps are required to use the multiple
 thread compression in migration. You can do more if the default
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 2b4cdea1d8..bd8dd799a9 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -89,7 +89,7 @@ RUNNING:
 First, set the migration speed to match your hardware's capabilities:
 
 QEMU Monitor Command:
-$ migrate_set_parameter max_bandwidth 40g # or whatever is the MAX of your RDMA device
+$ migrate_set_parameter max-bandwidth 40g # or whatever is the MAX of your RDMA device
 
 Next, on the destination machine, add the following to the QEMU command line:
 
diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst
index e302bea484..d6707fa069 100644
--- a/docs/specs/pci-ids.rst
+++ b/docs/specs/pci-ids.rst
@@ -92,6 +92,8 @@ PCI devices (other than virtio):
   PCI PVPanic device (``-device pvpanic-pci``)
 1b36:0012
   PCI ACPI ERST device (``-device acpi-erst``)
+1b36:0013
+  PCI UFS device (``-device ufs``)
 
 All these devices are documented in :doc:`index`.
 
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
index 80538422a1..b2dea54eed 100644
--- a/docs/system/arm/aspeed.rst
+++ b/docs/system/arm/aspeed.rst
@@ -104,7 +104,7 @@ To boot a kernel directly from a Linux build tree:
         -dtb arch/arm/boot/dts/aspeed-ast2600-evb.dtb \
         -initrd rootfs.cpio
 
-The image should be attached as an MTD drive. Run :
+To boot the machine from the flash image, use an MTD drive :
 
 .. code-block:: bash
 
@@ -117,23 +117,46 @@ Options specific to Aspeed machines are :
    device by using the FMC controller to load the instructions, and
    not simply from RAM. This takes a little longer.
 
- * ``fmc-model`` to change the FMC Flash model. FW needs support for
-   the chip model to boot.
+ * ``fmc-model`` to change the default FMC Flash model. FW needs
+   support for the chip model to boot.
 
- * ``spi-model`` to change the SPI Flash model.
+ * ``spi-model`` to change the default SPI Flash model.
 
  * ``bmc-console`` to change the default console device. Most of the
    machines use the ``UART5`` device for a boot console, which is
    mapped on ``/dev/ttyS4`` under Linux, but it is not always the
    case.
 
-For instance, to start the ``ast2500-evb`` machine with a different
-FMC chip and a bigger (64M) SPI chip, use :
+To use other flash models, for instance a different FMC chip and a
+bigger (64M) SPI for the ``ast2500-evb`` machine, run :
 
 .. code-block:: bash
 
   -M ast2500-evb,fmc-model=mx25l25635e,spi-model=mx66u51235f
 
+When more flexibility is needed to define the flash devices, to use
+different flash models or define all flash devices (up to 8), the
+``-nodefaults`` QEMU option can be used to avoid creating the default
+flash devices.
+
+Flash devices should then be created from the command line and attached
+to a block device :
+
+.. code-block:: bash
+
+  $ qemu-system-arm -M ast2600-evb \
+        -blockdev node-name=fmc0,driver=file,filename=/path/to/fmc0.img \
+	-device mx66u51235f,bus=ssi.0,cs=0x0,drive=fmc0 \
+	-blockdev node-name=fmc1,driver=file,filename=/path/to/fmc1.img \
+	-device mx66u51235f,bus=ssi.0,cs=0x1,drive=fmc1 \
+	-blockdev node-name=spi1,driver=file,filename=/path/to/spi1.img \
+	-device mx66u51235f,cs=0x0,bus=ssi.1,drive=spi1 \
+	-nographic -nodefaults
+
+In that case, the machine boots fetching instructions from the FMC0
+device. It is slower to start but closer to what HW does. Using the
+machine option ``execute-in-place`` has a similar effect.
+
 To change the boot console and use device ``UART3`` (``/dev/ttyS2``
 under Linux), use :
 
diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst
index 6bb88a40c7..a5fb929243 100644
--- a/docs/system/arm/cpu-features.rst
+++ b/docs/system/arm/cpu-features.rst
@@ -210,15 +210,20 @@ TCG VCPU Features
 TCG VCPU features are CPU features that are specific to TCG.
 Below is the list of TCG VCPU features and their descriptions.
 
-``pauth-impdef``
-  When ``FEAT_Pauth`` is enabled, either the *impdef* (Implementation
-  Defined) algorithm is enabled or the *architected* QARMA algorithm
-  is enabled.  By default the impdef algorithm is disabled, and QARMA
-  is enabled.
+``pauth``
+  Enable or disable ``FEAT_Pauth`` entirely.
 
-  The architected QARMA algorithm has good cryptographic properties,
-  but can be quite slow to emulate.  The impdef algorithm used by QEMU
-  is non-cryptographic but significantly faster.
+``pauth-impdef``
+  When ``pauth`` is enabled, select the QEMU implementation defined algorithm.
+
+``pauth-qarma3``
+  When ``pauth`` is enabled, select the architected QARMA3 algorithm.
+
+Without either ``pauth-impdef`` or ``pauth-qarma3`` enabled,
+the architected QARMA5 algorithm is used.  The architected QARMA5
+and QARMA3 algorithms have good cryptographic properties, but can
+be quite slow to emulate.  The impdef algorithm used by QEMU is
+non-cryptographic but significantly faster.
 
 SVE CPU Properties
 ==================
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 2e6a7c8961..3df936fc35 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -28,12 +28,15 @@ the following architecture extensions:
 - FEAT_DotProd (Advanced SIMD dot product instructions)
 - FEAT_DoubleFault (Double Fault Extension)
 - FEAT_E0PD (Preventing EL0 access to halves of address maps)
+- FEAT_EPAC (Enhanced pointer authentication)
 - FEAT_ETS (Enhanced Translation Synchronization)
 - FEAT_EVT (Enhanced Virtualization Traps)
 - FEAT_FCMA (Floating-point complex number instructions)
 - FEAT_FGT (Fine-Grained Traps)
 - FEAT_FHM (Floating-point half-precision multiplication instructions)
 - FEAT_FP16 (Half-precision floating-point data processing)
+- FEAT_FPAC (Faulting on AUT* instructions)
+- FEAT_FPACCOMBINE (Faulting on combined pointer authentication instructions)
 - FEAT_FRINTTS (Floating-point to integer instructions)
 - FEAT_FlagM (Flag manipulation instructions v2)
 - FEAT_FlagM2 (Enhancements to flag manipulation instructions)
@@ -57,10 +60,14 @@ the following architecture extensions:
 - FEAT_MTE (Memory Tagging Extension)
 - FEAT_MTE2 (Memory Tagging Extension)
 - FEAT_MTE3 (MTE Asymmetric Fault Handling)
+- FEAT_PACIMP (Pointer authentication - IMPLEMENTATION DEFINED algorithm)
+- FEAT_PACQARMA3 (Pointer authentication - QARMA3 algorithm)
+- FEAT_PACQARMA5 (Pointer authentication - QARMA5 algorithm)
 - FEAT_PAN (Privileged access never)
 - FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN)
 - FEAT_PAN3 (Support for SCTLR_ELx.EPAN)
 - FEAT_PAuth (Pointer authentication)
+- FEAT_PAuth2 (Enhacements to pointer authentication)
 - FEAT_PMULL (PMULL, PMULL2 instructions)
 - FEAT_PMUv3p1 (PMU Extensions v3.1)
 - FEAT_PMUv3p4 (PMU Extensions v3.4)
@@ -85,6 +92,7 @@ the following architecture extensions:
 - FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
 - FEAT_SPECRES (Speculation restriction instructions)
 - FEAT_SSBS (Speculative Store Bypass Safe)
+- FEAT_TIDCP1 (EL0 use of IMPLEMENTATION DEFINED functionality)
 - FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
 - FEAT_TLBIRANGE (TLB invalidate range instructions)
 - FEAT_TTCNP (Translation table Common not private translations)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 51cdac6841..e1697ac8f4 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -58,6 +58,7 @@ Supported guest CPU types:
 - ``cortex-a57`` (64-bit)
 - ``cortex-a72`` (64-bit)
 - ``cortex-a76`` (64-bit)
+- ``cortex-a710`` (64-bit)
 - ``a64fx`` (64-bit)
 - ``host`` (with KVM only)
 - ``neoverse-n1`` (64-bit)
diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index f12011e230..6ab5f72473 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -157,7 +157,7 @@ responsible for allocating appropriate ranges from within the CFMWs
 and exposing those via normal memory configurations as would be done
 for system RAM.
 
-Example system Topology. x marks the match in each decoder level::
+Example system topology. x marks the match in each decoder level::
 
   |<------------------SYSTEM PHYSICAL ADDRESS MAP (1)----------------->|
   |    __________   __________________________________   __________    |
@@ -187,8 +187,8 @@ Example system Topology. x marks the match in each decoder level::
        ___________|___   __________|__   __|_________   ___|_________
    (3)|  Root Port 0  | | Root Port 1 | | Root Port 2| | Root Port 3 |
       |  Appears in   | | Appears in  | | Appears in | | Appear in   |
-      |  PCI topology | | PCI Topology| | PCI Topo   | | PCI Topo    |
-      |  As 0c:00.0   | | as 0c:01.0  | | as de:00.0 | | as de:01.0  |
+      |  PCI topology | | PCI topology| | PCI topo   | | PCI topo    |
+      |  as 0c:00.0   | | as 0c:01.0  | | as de:00.0 | | as de:01.0  |
       |_______________| |_____________| |____________| |_____________|
             |                  |               |              |
             |                  |               |              |
@@ -272,7 +272,7 @@ Example topology involving a switch::
       |  Root Port 0  |
       |  Appears in   |
       |  PCI topology |
-      |  As 0c:00.0   |
+      |  as 0c:00.0   |
       |___________x___|
                   |
                   |
@@ -313,7 +313,7 @@ A very simple setup with just one directly attached CXL Type 3 Persistent Memory
 
 A very simple setup with just one directly attached CXL Type 3 Volatile Memory device::
 
-  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
   ...
   -object memory-backend-ram,id=vmem0,share=on,size=256M \
   -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
@@ -323,7 +323,7 @@ A very simple setup with just one directly attached CXL Type 3 Volatile Memory d
 
 The same volatile setup may optionally include an LSA region::
 
-  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
   ...
   -object memory-backend-ram,id=vmem0,share=on,size=256M \
   -object memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \
diff --git a/docs/system/index.rst b/docs/system/index.rst
index 3605bbe1ce..c21065e519 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -6,7 +6,7 @@ System Emulation
 
 This section of the manual is the overall guide for users using QEMU
 for full system emulation (as opposed to user-mode emulation).
-This includes working with hypervisors such as KVM, Xen, Hax
+This includes working with hypervisors such as KVM, Xen
 or Hypervisor.Framework.
 
 .. toctree::
@@ -38,3 +38,4 @@ or Hypervisor.Framework.
    security
    multi-process
    confidential-guest-support
+   vm-templating
diff --git a/docs/system/introduction.rst b/docs/system/introduction.rst
index 3e256f8326..51ac132d6c 100644
--- a/docs/system/introduction.rst
+++ b/docs/system/introduction.rst
@@ -21,9 +21,6 @@ Tiny Code Generator (TCG) capable of emulating many CPUs.
   * - Xen
     - Linux (as dom0)
     - Arm, x86
-  * - Intel HAXM (hax)
-    - Linux, Windows
-    - x86
   * - Hypervisor Framework (hvf)
     - MacOS
     - x86 (64 bit only), Arm (64 bit only)
diff --git a/docs/system/replay.rst b/docs/system/replay.rst
index 3105327423..ca7c17c63d 100644
--- a/docs/system/replay.rst
+++ b/docs/system/replay.rst
@@ -181,7 +181,7 @@ Audio data is recorded and replay automatically. The command line for recording
 and replaying must contain identical specifications of audio hardware, e.g.:
 
 .. parsed-literal::
-    -soundhw ac97
+    -audio pa,model=ac97
 
 Serial ports
 ------------
diff --git a/docs/system/vm-templating.rst b/docs/system/vm-templating.rst
new file mode 100644
index 0000000000..28905a1eeb
--- /dev/null
+++ b/docs/system/vm-templating.rst
@@ -0,0 +1,125 @@
+QEMU VM templating
+==================
+
+This document explains how to use VM templating in QEMU.
+
+For now, the focus is on VM memory aspects, and not about how to save and
+restore other VM state (i.e., migrate-to-file with ``x-ignore-shared``).
+
+Overview
+--------
+
+With VM templating, a single template VM serves as the starting point for
+new VMs. This allows for fast and efficient replication of VMs, resulting
+in fast startup times and reduced memory consumption.
+
+Conceptually, the VM state is frozen, to then be used as a basis for new
+VMs. The Copy-On-Write mechanism in the operating systems makes sure that
+new VMs are able to read template VM memory; however, any modifications
+stay private and don't modify the original template VM or any other
+created VM.
+
+!!! Security Alert !!!
+----------------------
+
+When effectively cloning VMs by VM templating, hardware identifiers
+(such as UUIDs and NIC MAC addresses), and similar data in the guest OS
+(such as machine IDs, SSH keys, certificates) that are supposed to be
+*unique* are no longer unique, which can be a security concern.
+
+Please be aware of these implications and how to mitigate them for your
+use case, which might involve vmgenid, hot(un)plug of NIC, etc..
+
+Memory configuration
+--------------------
+
+In order to create the template VM, we have to make sure that VM memory
+ends up in a file, from where it can be reused for the new VMs:
+
+Supply VM RAM via memory-backend-file, with ``share=on`` (modifications go
+to the file) and ``readonly=off`` (open the file writable). Note that
+``readonly=off`` is implicit.
+
+In the following command-line example, a 2GB VM is created, whereby VM RAM
+is to be stored in the ``template`` file.
+
+.. parsed-literal::
+
+    |qemu_system| [...] -m 2g \\
+        -object memory-backend-file,id=pc.ram,mem-path=template,size=2g,share=on,... \\
+        -machine q35,memory-backend=pc.ram
+
+If multiple memory backends are used (vNUMA, DIMMs), configure all
+memory backends accordingly.
+
+Once the VM is in the desired state, stop the VM and save other VM state,
+leaving the current state of VM RAM reside in the file.
+
+In order to have a new VM be based on a template VM, we have to
+configure VM RAM to be based on a template VM RAM file; however, the VM
+should not be able to modify file content.
+
+Supply VM RAM via memory-backend-file, with ``share=off`` (modifications
+stay private), ``readonly=on`` (open the file readonly) and ``rom=off``
+(don't make the memory readonly for the VM). Note that ``share=off`` is
+implicit and that other VM state has to be restored separately.
+
+In the following command-line example, a 2GB VM is created based on the
+existing 2GB file ``template``.
+
+.. parsed-literal::
+
+    |qemu_system| [...] -m 2g \\
+        -object memory-backend-file,id=pc.ram,mem-path=template,size=2g,readonly=on,rom=off,... \\
+        -machine q35,memory-backend=pc.ram
+
+If multiple memory backends are used (vNUMA, DIMMs), configure all
+memory backends accordingly.
+
+Note that ``-mem-path`` cannot be used for VM templating when creating the
+template VM or when starting new VMs based on a template VM.
+
+Incompatible features
+---------------------
+
+Some features are incompatible with VM templating, as the underlying file
+cannot be modified to discard VM RAM, or to actually share memory with
+another process.
+
+vhost-user and multi-process QEMU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+vhost-user and multi-process QEMU are incompatible with VM templating.
+These technologies rely on shared memory, however, the template VMs
+don't actually share memory (``share=off``), even though they are
+file-based.
+
+virtio-balloon
+~~~~~~~~~~~~~~
+
+virtio-balloon inflation and "free page reporting" cannot discard VM RAM
+and will repeatedly report errors. While virtio-balloon can be used
+for template VMs (e.g., report VM RAM stats), "free page reporting"
+should be disabled and the balloon should not be inflated.
+
+virtio-mem
+~~~~~~~~~~
+
+virtio-mem cannot discard VM RAM that is managed by the virtio-mem
+device. virtio-mem will fail early when realizing the device. To use
+VM templating with virtio-mem, either hotplug virtio-mem devices to the
+new VM, or don't supply any memory to the template VM using virtio-mem
+(requested-size=0), not using a template VM file as memory backend for the
+virtio-mem device.
+
+VM migration
+~~~~~~~~~~~~
+
+For VM migration, "x-release-ram" similarly relies on discarding of VM
+RAM on the migration source to free up migrated RAM, and will
+repeatedly report errors.
+
+Postcopy live migration fails discarding VM RAM on the migration
+destination early and refuses to activate postcopy live migration. Note
+that postcopy live migration usually only works on selected filesystems
+(shmem/tmpfs, hugetlbfs) either way.
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 15aeddc6d8..ca5a2773cf 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -106,7 +106,11 @@ by the used format or see the format descriptions below for details.
 
 .. option:: -c
 
-  Indicates that target image must be compressed (qcow format only).
+  Indicates that target image must be compressed (qcow/qcow2 and vmdk with
+  streamOptimized subformat only).
+
+  For qcow2, the compression algorithm can be specified with the ``-o
+  compression_type=...`` option (see below).
 
 .. option:: -h
 
@@ -776,7 +780,7 @@ Supported image file formats:
 
   QEMU image format, the most versatile format. Use it to have smaller
   images (useful if your filesystem does not supports holes, for example
-  on Windows), optional AES encryption, zlib based compression and
+  on Windows), optional AES encryption, zlib or zstd based compression and
   support of multiple VM snapshots.
 
   Supported options:
@@ -794,6 +798,17 @@ Supported image file formats:
   ``backing_fmt``
     Image format of the base image
 
+  ``compression_type``
+    This option configures which compression algorithm will be used for
+    compressed clusters on the image. Note that setting this option doesn't yet
+    cause the image to actually receive compressed writes. It is most commonly
+    used with the ``-c`` option of ``qemu-img convert``, but can also be used
+    with the ``compress`` filter driver or backup block jobs with compression
+    enabled.
+
+    Valid values are ``zlib`` and ``zstd``. For images that use
+    ``compat=0.10``, only ``zlib`` compression is available.
+
   ``encryption``
     If this option is set to ``on``, the image is encrypted with
     128-bit AES-CBC.
diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index faf6349ea5..329f44d989 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -197,7 +197,9 @@ driver options if :option:`--image-opts` is specified.
 
 .. option:: -v, --verbose
 
-  Display extra debugging information.
+  Display extra debugging information. This option also keeps the original
+  *STDERR* stream open if the ``qemu-nbd`` process is daemonized due to
+  other options like :option:`--fork` or :option:`-c`.
 
 .. option:: -h, --help
 
diff --git a/ebpf/trace-events b/ebpf/trace-events
index 411b1e2be3..b3ad1a35f2 100644
--- a/ebpf/trace-events
+++ b/ebpf/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # ebpf-rss.c
 ebpf_error(const char *s1, const char *s2) "error in %s: %s"
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 527e15e6ab..a44649f4f4 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -118,7 +118,8 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
         } else {
             int shift = frac_normalize(p);
             p->cls = float_class_normal;
-            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+            p->exp = fmt->frac_shift - fmt->exp_bias
+                   - shift + !fmt->m68k_denormal;
         }
     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
         p->cls = float_class_normal;
@@ -256,7 +257,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             is_tiny = !frac_addi(&discard, p, inc);
         }
 
-        frac_shrjam(p, 1 - exp);
+        frac_shrjam(p, !fmt->m68k_denormal - exp);
 
         if (p->frac_lo & round_mask) {
             /* Need to recompute round-to-even/round-to-odd. */
@@ -287,7 +288,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             p->frac_lo &= ~round_mask;
         }
 
-        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
         frac_shr(p, frac_shift);
 
         if (is_tiny && (flags & float_flag_inexact)) {
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 0cc130ae9b..027a8e576d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -517,6 +517,7 @@ typedef struct {
  *   round_mask: bits below lsb which must be rounded
  * The following optional modifiers are available:
  *   arm_althp: handle ARM Alternative Half Precision
+ *   m68k_denormal: explicit integer bit for extended precision may be 1
  */
 typedef struct {
     int exp_size;
@@ -526,6 +527,7 @@ typedef struct {
     int frac_size;
     int frac_shift;
     bool arm_althp;
+    bool m68k_denormal;
     uint64_t round_mask;
 } FloatFmt;
 
@@ -576,7 +578,12 @@ static const FloatFmt float128_params = {
 static const FloatFmt floatx80_params[3] = {
     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
-    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
+    [floatx80_precision_x] = {
+        FLOATX80_PARAMS(64),
+#ifdef TARGET_M68K
+        .m68k_denormal = true,
+#endif
+    },
 };
 
 /* Unpack a float to parts, but do not canonicalize.  */
@@ -3126,6 +3133,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
+int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                               float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
+}
+
 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
                                  float_status *s)
 {
@@ -3392,6 +3408,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
+int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
 {
     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
@@ -3407,6 +3428,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
+int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
+}
+
 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
 {
     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@@ -3534,6 +3560,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
 }
 
+uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
+                                 int scale, float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
+}
+
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
                                    int scale, float_status *s)
 {
@@ -3759,6 +3794,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
 }
 
+uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
 {
     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
@@ -3774,6 +3814,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
+uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
+}
+
 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
 {
     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
@@ -3929,6 +3974,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
     return int64_to_bfloat16_scalbn(a, scale, status);
 }
 
+bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, scale, status);
+}
+
 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
 {
     return int64_to_bfloat16_scalbn(a, 0, status);
@@ -3944,6 +3994,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
     return int64_to_bfloat16_scalbn(a, 0, status);
 }
 
+bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
+
 float128 int128_to_float128(Int128 a, float_status *status)
 {
     FloatParts128 p = { };
@@ -4139,6 +4194,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
     return uint64_to_bfloat16_scalbn(a, scale, status);
 }
 
+bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
+
 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
 {
     return uint64_to_bfloat16_scalbn(a, 0, status);
@@ -4154,6 +4214,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
     return uint64_to_bfloat16_scalbn(a, 0, status);
 }
 
+bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
+
 float128 uint64_to_float128(uint64_t a, float_status *status)
 {
     FloatParts128 p;
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
index 5c83a1cc09..d912da906d 100644
--- a/fsdev/qemu-fsdev-throttle.c
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -94,20 +94,22 @@ void fsdev_throttle_init(FsThrottle *fst)
     }
 }
 
-void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst,
+                                            ThrottleDirection direction,
                                             struct iovec *iov, int iovcnt)
 {
+    assert(direction < THROTTLE_MAX);
     if (throttle_enabled(&fst->cfg)) {
-        if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
-            !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
-            qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
+        if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) ||
+            !qemu_co_queue_empty(&fst->throttled_reqs[direction])) {
+            qemu_co_queue_wait(&fst->throttled_reqs[direction], NULL);
         }
 
-        throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
+        throttle_account(&fst->ts, direction, iov_size(iov, iovcnt));
 
-        if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
-            !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
-            qemu_co_queue_next(&fst->throttled_reqs[is_write]);
+        if (!qemu_co_queue_empty(&fst->throttled_reqs[direction]) &&
+            !throttle_schedule_timer(&fst->ts, &fst->tt, direction)) {
+            qemu_co_queue_next(&fst->throttled_reqs[direction]);
         }
     }
 }
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
index a21aecddc7..daa8ca2494 100644
--- a/fsdev/qemu-fsdev-throttle.h
+++ b/fsdev/qemu-fsdev-throttle.h
@@ -23,14 +23,14 @@ typedef struct FsThrottle {
     ThrottleState ts;
     ThrottleTimers tt;
     ThrottleConfig cfg;
-    CoQueue      throttled_reqs[2];
+    CoQueue      throttled_reqs[THROTTLE_MAX];
 } FsThrottle;
 
 int fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
 
 void fsdev_throttle_init(FsThrottle *);
 
-void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *, ThrottleDirection ,
                                             struct iovec *, int);
 
 void fsdev_throttle_cleanup(FsThrottle *);
diff --git a/gdbstub/meson.build b/gdbstub/meson.build
index 77762e0b3e..9500b9dc4e 100644
--- a/gdbstub/meson.build
+++ b/gdbstub/meson.build
@@ -14,8 +14,8 @@ gdb_system_ss = ss.source_set()
 gdb_user_ss.add(files('gdbstub.c', 'user.c'))
 gdb_system_ss.add(files('gdbstub.c', 'softmmu.c'))
 
-gdb_user_ss = gdb_user_ss.apply(config_host, strict: false)
-gdb_system_ss = gdb_system_ss.apply(config_host, strict: false)
+gdb_user_ss = gdb_user_ss.apply(config_targetos, strict: false)
+gdb_system_ss = gdb_system_ss.apply(config_targetos, strict: false)
 
 libgdb_user = static_library('gdb_user',
                              gdb_user_ss.sources() + genh,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2cbd0f77a0..63eac22734 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1296,6 +1296,9 @@ ERST
         .name       = "netdev_add",
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user"
+#ifdef CONFIG_AF_XDP
+                      "|af-xdp"
+#endif
 #ifdef CONFIG_VMNET
                       "|vmnet-host|vmnet-shared|vmnet-bridged"
 #endif
diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
index 769626b098..fe671534e4 100644
--- a/host/include/aarch64/host/cpuinfo.h
+++ b/host/include/aarch64/host/cpuinfo.h
@@ -10,6 +10,8 @@
 #define CPUINFO_LSE             (1u << 1)
 #define CPUINFO_LSE2            (1u << 2)
 #define CPUINFO_AES             (1u << 3)
+#define CPUINFO_PMULL           (1u << 4)
+#define CPUINFO_BTI             (1u << 5)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/host/include/aarch64/host/crypto/clmul.h b/host/include/aarch64/host/crypto/clmul.h
new file mode 100644
index 0000000000..bb516d8b2f
--- /dev/null
+++ b/host/include/aarch64/host/crypto/clmul.h
@@ -0,0 +1,41 @@
+/*
+ * AArch64 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef AARCH64_HOST_CRYPTO_CLMUL_H
+#define AARCH64_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+/*
+ * 64x64->128 pmull is available with FEAT_PMULL.
+ * Both FEAT_AES and FEAT_PMULL are covered under the same macro.
+ */
+#ifdef __ARM_FEATURE_AES
+# define HAVE_CLMUL_ACCEL  true
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PMULL)
+#endif
+#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("+crypto")))
+#else
+# define ATTR_CLMUL_ACCEL
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { poly128_t v; Int128 s; } u;
+
+#ifdef CONFIG_ARM_AES_BUILTIN
+    u.v = vmull_p64((poly64_t)n, (poly64_t)m);
+#else
+    asm(".arch_extension aes\n\t"
+        "pmull %0.1q, %1.1d, %2.1d" : "=w"(u.v) : "w"(n), "w"(m));
+#endif
+    return u.s;
+}
+
+#endif /* AARCH64_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/generic/host/crypto/clmul.h b/host/include/generic/host/crypto/clmul.h
new file mode 100644
index 0000000000..915bfb88d3
--- /dev/null
+++ b/host/include/generic/host/crypto/clmul.h
@@ -0,0 +1,15 @@
+/*
+ * No host specific carry-less multiply acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GENERIC_HOST_CRYPTO_CLMUL_H
+#define GENERIC_HOST_CRYPTO_CLMUL_H
+
+#define HAVE_CLMUL_ACCEL  false
+#define ATTR_CLMUL_ACCEL
+
+Int128 clmul_64_accel(uint64_t, uint64_t)
+    QEMU_ERROR("unsupported accel");
+
+#endif /* GENERIC_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 073d0a426f..b89e6d2e55 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for x86.
+ * Host specific cpu identification for x86.
  */
 
 #ifndef HOST_CPUINFO_H
@@ -27,6 +27,7 @@
 #define CPUINFO_ATOMIC_VMOVDQA  (1u << 16)
 #define CPUINFO_ATOMIC_VMOVDQU  (1u << 17)
 #define CPUINFO_AES             (1u << 18)
+#define CPUINFO_PCLMUL          (1u << 19)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/host/include/i386/host/crypto/clmul.h b/host/include/i386/host/crypto/clmul.h
new file mode 100644
index 0000000000..dc3c814797
--- /dev/null
+++ b/host/include/i386/host/crypto/clmul.h
@@ -0,0 +1,29 @@
+/*
+ * x86 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef X86_HOST_CRYPTO_CLMUL_H
+#define X86_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <immintrin.h>
+
+#if defined(__PCLMUL__)
+# define HAVE_CLMUL_ACCEL  true
+# define ATTR_CLMUL_ACCEL
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PCLMUL)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("pclmul")))
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { __m128i v; Int128 s; } u;
+
+    u.v = _mm_clmulepi64_si128(_mm_set_epi64x(0, n), _mm_set_epi64x(0, m), 0);
+    return u.s;
+}
+
+#endif /* X86_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h
index 29ee7f9ef8..38b8eabe2a 100644
--- a/host/include/ppc/host/cpuinfo.h
+++ b/host/include/ppc/host/cpuinfo.h
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for ppc.
+ * Host specific cpu identification for ppc.
  */
 
 #ifndef HOST_CPUINFO_H
diff --git a/host/include/x86_64/host/crypto/clmul.h b/host/include/x86_64/host/crypto/clmul.h
new file mode 100644
index 0000000000..f25eced416
--- /dev/null
+++ b/host/include/x86_64/host/crypto/clmul.h
@@ -0,0 +1 @@
+#include "host/include/i386/host/crypto/clmul.h"
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 9c5344039e..71174c3e4a 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -252,7 +252,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
-    fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
+    fsdev_co_throttle_request(s->ctx.fst, THROTTLE_WRITE, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -272,7 +272,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
-    fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
+    fsdev_co_throttle_request(s->ctx.fst, THROTTLE_READ, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);
diff --git a/hw/Kconfig b/hw/Kconfig
index ba62ff6417..9ca7b38c31 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -38,6 +38,7 @@ source smbios/Kconfig
 source ssi/Kconfig
 source timer/Kconfig
 source tpm/Kconfig
+source ufs/Kconfig
 source usb/Kconfig
 source virtio/Kconfig
 source vfio/Kconfig
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ea331a20d1..af66bde0f5 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -312,7 +312,7 @@ build_prepend_package_length(GArray *package, unsigned length, bool incl_self)
         /*
          * PkgLength is the length of the inclusive length of the data
          * and PkgLength's length itself when used for terms with
-         * explitit length.
+         * explicit length.
          */
         length += length_bytes;
     }
@@ -680,7 +680,7 @@ Aml *aml_store(Aml *val, Aml *target)
  *   "Op Operand Operand Target"
  * pattern.
  *
- * Returns: The newly allocated and composed according to patter Aml object.
+ * Returns: The newly allocated and composed according to pattern Aml object.
  */
 static Aml *
 build_opcode_2arg_dst(uint8_t op, Aml *arg1, Aml *arg2, Aml *dst)
@@ -2159,7 +2159,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
         /* FADT Minor Version */
         build_append_int_noprefix(tbl, f->minor_ver, 1);
     } else {
-        build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */
+        build_append_int_noprefix(tbl, 0, 3); /* Reserved up to ACPI 5.0 */
     }
     build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */
 
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 3a6d51282a..2d5e199ba9 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -82,7 +82,7 @@ static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
     uint32_t base;
     /* Length in bytes for entire structure */
     uint32_t lb_length
-        = 32 /* Table length upto and including Entry Base Unit */
+        = 32 /* Table length up to and including Entry Base Unit */
         + 4 * num_initiator /* Initiator Proximity Domain List */
         + 4 * num_target /* Target Proximity Domain List */
         + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index a3b25a92f3..9ba90806f2 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -670,7 +670,8 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 }
 
 static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
-                                           uint32_t offset, uint32_t length)
+                                           uint32_t offset, uint32_t length,
+                                           bool is_write)
 {
     uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
 
@@ -690,6 +691,10 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
         return ret;
     }
 
+    if (is_write && nvdimm->readonly) {
+        return NVDIMM_DSM_RET_STATUS_UNSUPPORT;
+    }
+
     return NVDIMM_DSM_RET_STATUS_SUCCESS;
 }
 
@@ -713,7 +718,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
                                  get_label_data->length);
 
     status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
-                                        get_label_data->length);
+                                        get_label_data->length, false);
     if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
@@ -752,7 +757,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
                                   set_label_data->length);
 
     status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
-                                        set_label_data->length);
+                                        set_label_data->length, true);
     if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
@@ -1097,7 +1102,7 @@ static void nvdimm_build_common_dsm(Aml *dev,
      * be treated as an integer. Moreover, the integer size depends on
      * DSDT tables revision number. If revision number is < 2, integer
      * size is 32 bits, otherwise it is 64 bits.
-     * Because of this CreateField() canot be used if RLEN < Integer Size.
+     * Because of this CreateField() cannot be used if RLEN < Integer Size.
      *
      * Also please note that APCI ASL operator SizeOf() doesn't support
      * Integer and there isn't any other way to figure out the Integer
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 263626abea..f8ba67531a 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -15,6 +15,7 @@
 #include "hw/arm/aspeed.h"
 #include "hw/arm/aspeed_soc.h"
 #include "hw/arm/aspeed_eeprom.h"
+#include "hw/block/flash.h"
 #include "hw/i2c/i2c_mux_pca954x.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/misc/pca9552.h"
@@ -47,6 +48,13 @@ struct AspeedMachineState {
     char *spi_model;
 };
 
+/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
+#if HOST_LONG_BITS == 32
+#define ASPEED_RAM_SIZE(sz) MIN((sz), 1 * GiB)
+#else
+#define ASPEED_RAM_SIZE(sz) (sz)
+#endif
+
 /* Palmetto hardware value: 0x120CE416 */
 #define PALMETTO_BMC_HW_STRAP1 (                                        \
         SCU_AST2400_HW_STRAP_DRAM_SIZE(DRAM_SIZE_256MB) |               \
@@ -300,17 +308,14 @@ void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
 
     for (i = 0; i < count; ++i) {
         DriveInfo *dinfo = drive_get(IF_MTD, 0, unit0 + i);
-        qemu_irq cs_line;
         DeviceState *dev;
 
         dev = qdev_new(flashtype);
         if (dinfo) {
             qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
         }
+        qdev_prop_set_uint8(dev, "cs", i);
         qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal);
-
-        cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0);
-        qdev_connect_gpio_out_named(DEVICE(s), "cs", i, cs_line);
     }
 }
 
@@ -392,12 +397,14 @@ static void aspeed_machine_init(MachineState *machine)
     connect_serial_hds_to_uarts(bmc);
     qdev_realize(DEVICE(&bmc->soc), NULL, &error_abort);
 
-    aspeed_board_init_flashes(&bmc->soc.fmc,
+    if (defaults_enabled()) {
+        aspeed_board_init_flashes(&bmc->soc.fmc,
                               bmc->fmc_model ? bmc->fmc_model : amc->fmc_model,
                               amc->num_cs, 0);
-    aspeed_board_init_flashes(&bmc->soc.spi[0],
+        aspeed_board_init_flashes(&bmc->soc.spi[0],
                               bmc->spi_model ? bmc->spi_model : amc->spi_model,
                               1, amc->num_cs);
+    }
 
     if (machine->kernel_filename && sc->num_cpus > 1) {
         /* With no u-boot we must set up a boot stub for the secondary CPU */
@@ -430,11 +437,12 @@ static void aspeed_machine_init(MachineState *machine)
     }
 
     if (!bmc->mmio_exec) {
-        DriveInfo *mtd0 = drive_get(IF_MTD, 0, 0);
+        DeviceState *dev = ssi_get_cs(bmc->soc.fmc.spi, 0);
+        BlockBackend *fmc0 = dev ? m25p80_get_blk(dev) : NULL;
 
-        if (mtd0) {
+        if (fmc0) {
             uint64_t rom_size = memory_region_size(&bmc->soc.spi_boot);
-            aspeed_install_boot_rom(bmc, blk_by_legacy_dinfo(mtd0), rom_size);
+            aspeed_install_boot_rom(bmc, fmc0, rom_size);
         }
     }
 
@@ -1423,12 +1431,7 @@ static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data)
         aspeed_soc_num_cpus(amc->soc_name);
 };
 
-/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
-#if HOST_LONG_BITS == 32
-#define FUJI_BMC_RAM_SIZE (1 * GiB)
-#else
-#define FUJI_BMC_RAM_SIZE (2 * GiB)
-#endif
+#define FUJI_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
 
 static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data)
 {
@@ -1450,12 +1453,7 @@ static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data)
         aspeed_soc_num_cpus(amc->soc_name);
 };
 
-/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
-#if HOST_LONG_BITS == 32
-#define BLETCHLEY_BMC_RAM_SIZE (1 * GiB)
-#else
-#define BLETCHLEY_BMC_RAM_SIZE (2 * GiB)
-#endif
+#define BLETCHLEY_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
 
 static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data)
 {
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index f7e99baf62..aa5b0ddfaa 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1235,14 +1235,15 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
 
             dinfo = drive_get(IF_SD, 0, 0);
             blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
-            carddev = qdev_new(TYPE_SD_CARD);
+            carddev = qdev_new(TYPE_SD_CARD_SPI);
             qdev_prop_set_drive_err(carddev, "drive", blk, &error_fatal);
-            qdev_prop_set_bit(carddev, "spi", true);
             qdev_realize_and_unref(carddev,
                                    qdev_get_child_bus(sddev, "sd-bus"),
                                    &error_fatal);
 
-            ssddev = ssi_create_peripheral(bus, "ssd0323");
+            ssddev = qdev_new("ssd0323");
+            qdev_prop_set_uint8(ssddev, "cs", 1);
+            qdev_realize_and_unref(ssddev, bus, &error_fatal);
 
             gpio_d_splitter = qdev_new(TYPE_SPLIT_IRQ);
             qdev_prop_set_uint32(gpio_d_splitter, "num-lines", 2);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a13c658bbf..8ad78b23c2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -211,6 +211,7 @@ static const char *valid_cpus[] = {
     ARM_CPU_TYPE_NAME("cortex-a55"),
     ARM_CPU_TYPE_NAME("cortex-a72"),
     ARM_CPU_TYPE_NAME("cortex-a76"),
+    ARM_CPU_TYPE_NAME("cortex-a710"),
     ARM_CPU_TYPE_NAME("a64fx"),
     ARM_CPU_TYPE_NAME("neoverse-n1"),
     ARM_CPU_TYPE_NAME("neoverse-v1"),
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 3190cc0b8d..8dc2ea83a9 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -164,6 +164,7 @@ static inline int zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq,
                                         blk_by_legacy_dinfo(dinfo),
                                         &error_fatal);
             }
+            qdev_prop_set_uint8(flash_dev, "cs", j);
             qdev_realize_and_unref(flash_dev, BUS(spi), &error_fatal);
 
             cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 1ee2b8697f..88c561ff63 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -740,6 +740,7 @@ static void versal_virt_init(MachineState *machine)
             qdev_prop_set_drive_err(flash_dev, "drive",
                                     blk_by_legacy_dinfo(dinfo), &error_fatal);
         }
+        qdev_prop_set_uint8(flash_dev, "cs", i);
         qdev_realize_and_unref(flash_dev, spi_bus, &error_fatal);
 
         cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 60bf5fe657..fa556d8764 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -27,7 +27,7 @@
 #define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f")
 #define GEM_REVISION        0x40070106
 
-#define VERSAL_NUM_PMC_APB_IRQS 3
+#define VERSAL_NUM_PMC_APB_IRQS 18
 #define NUM_OSPI_IRQ_LINES 3
 
 static void versal_create_apu_cpus(Versal *s)
@@ -341,6 +341,7 @@ static void versal_create_pmc_apb_irq_orgate(Versal *s, qemu_irq *pic)
      *  - RTC
      *  - BBRAM
      *  - PMC SLCR
+     *  - CFRAME regs (input 3 - 17 to the orgate)
      */
     object_initialize_child(OBJECT(s), "pmc-apb-irq-orgate",
                             &s->pmc.apb_irq_orgate, TYPE_OR_IRQ);
@@ -570,6 +571,157 @@ static void versal_create_ospi(Versal *s, qemu_irq *pic)
     qdev_connect_gpio_out(orgate, 0, pic[VERSAL_OSPI_IRQ]);
 }
 
+static void versal_create_cfu(Versal *s, qemu_irq *pic)
+{
+    SysBusDevice *sbd;
+    DeviceState *dev;
+    int i;
+    const struct {
+        uint64_t reg_base;
+        uint64_t fdri_base;
+    } cframe_addr[] = {
+        { MM_PMC_CFRAME0_REG, MM_PMC_CFRAME0_FDRI },
+        { MM_PMC_CFRAME1_REG, MM_PMC_CFRAME1_FDRI },
+        { MM_PMC_CFRAME2_REG, MM_PMC_CFRAME2_FDRI },
+        { MM_PMC_CFRAME3_REG, MM_PMC_CFRAME3_FDRI },
+        { MM_PMC_CFRAME4_REG, MM_PMC_CFRAME4_FDRI },
+        { MM_PMC_CFRAME5_REG, MM_PMC_CFRAME5_FDRI },
+        { MM_PMC_CFRAME6_REG, MM_PMC_CFRAME6_FDRI },
+        { MM_PMC_CFRAME7_REG, MM_PMC_CFRAME7_FDRI },
+        { MM_PMC_CFRAME8_REG, MM_PMC_CFRAME8_FDRI },
+        { MM_PMC_CFRAME9_REG, MM_PMC_CFRAME9_FDRI },
+        { MM_PMC_CFRAME10_REG, MM_PMC_CFRAME10_FDRI },
+        { MM_PMC_CFRAME11_REG, MM_PMC_CFRAME11_FDRI },
+        { MM_PMC_CFRAME12_REG, MM_PMC_CFRAME12_FDRI },
+        { MM_PMC_CFRAME13_REG, MM_PMC_CFRAME13_FDRI },
+        { MM_PMC_CFRAME14_REG, MM_PMC_CFRAME14_FDRI },
+    };
+    const struct {
+        uint32_t blktype0_frames;
+        uint32_t blktype1_frames;
+        uint32_t blktype2_frames;
+        uint32_t blktype3_frames;
+        uint32_t blktype4_frames;
+        uint32_t blktype5_frames;
+        uint32_t blktype6_frames;
+    } cframe_cfg[] = {
+        [0] = { 34111, 3528, 12800, 11, 5, 1, 1 },
+        [1] = { 38498, 3841, 15361, 13, 7, 3, 1 },
+        [2] = { 38498, 3841, 15361, 13, 7, 3, 1 },
+        [3] = { 38498, 3841, 15361, 13, 7, 3, 1 },
+    };
+
+    /* CFU FDRO */
+    object_initialize_child(OBJECT(s), "cfu-fdro", &s->pmc.cfu_fdro,
+                            TYPE_XLNX_VERSAL_CFU_FDRO);
+    sbd = SYS_BUS_DEVICE(&s->pmc.cfu_fdro);
+
+    sysbus_realize(sbd, &error_fatal);
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_FDRO,
+                                sysbus_mmio_get_region(sbd, 0));
+
+    /* CFRAME REG */
+    for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
+        g_autofree char *name = g_strdup_printf("cframe%d", i);
+
+        object_initialize_child(OBJECT(s), name, &s->pmc.cframe[i],
+                                TYPE_XLNX_VERSAL_CFRAME_REG);
+
+        sbd = SYS_BUS_DEVICE(&s->pmc.cframe[i]);
+        dev = DEVICE(&s->pmc.cframe[i]);
+
+        if (i < ARRAY_SIZE(cframe_cfg)) {
+            object_property_set_int(OBJECT(dev), "blktype0-frames",
+                                    cframe_cfg[i].blktype0_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype1-frames",
+                                    cframe_cfg[i].blktype1_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype2-frames",
+                                    cframe_cfg[i].blktype2_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype3-frames",
+                                    cframe_cfg[i].blktype3_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype4-frames",
+                                    cframe_cfg[i].blktype4_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype5-frames",
+                                    cframe_cfg[i].blktype5_frames,
+                                    &error_abort);
+            object_property_set_int(OBJECT(dev), "blktype6-frames",
+                                    cframe_cfg[i].blktype6_frames,
+                                    &error_abort);
+        }
+        object_property_set_link(OBJECT(dev), "cfu-fdro",
+                                 OBJECT(&s->pmc.cfu_fdro), &error_fatal);
+
+        sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+
+        memory_region_add_subregion(&s->mr_ps, cframe_addr[i].reg_base,
+                                    sysbus_mmio_get_region(sbd, 0));
+        memory_region_add_subregion(&s->mr_ps, cframe_addr[i].fdri_base,
+                                    sysbus_mmio_get_region(sbd, 1));
+        sysbus_connect_irq(sbd, 0,
+                           qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate),
+                                            3 + i));
+    }
+
+    /* CFRAME BCAST */
+    object_initialize_child(OBJECT(s), "cframe_bcast", &s->pmc.cframe_bcast,
+                            TYPE_XLNX_VERSAL_CFRAME_BCAST_REG);
+
+    sbd = SYS_BUS_DEVICE(&s->pmc.cframe_bcast);
+    dev = DEVICE(&s->pmc.cframe_bcast);
+
+    for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
+        g_autofree char *propname = g_strdup_printf("cframe%d", i);
+        object_property_set_link(OBJECT(dev), propname,
+                                 OBJECT(&s->pmc.cframe[i]), &error_fatal);
+    }
+
+    sysbus_realize(sbd, &error_fatal);
+
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_REG,
+                                sysbus_mmio_get_region(sbd, 0));
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_FDRI,
+                                sysbus_mmio_get_region(sbd, 1));
+
+    /* CFU APB */
+    object_initialize_child(OBJECT(s), "cfu-apb", &s->pmc.cfu_apb,
+                            TYPE_XLNX_VERSAL_CFU_APB);
+    sbd = SYS_BUS_DEVICE(&s->pmc.cfu_apb);
+    dev = DEVICE(&s->pmc.cfu_apb);
+
+    for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
+        g_autofree char *propname = g_strdup_printf("cframe%d", i);
+        object_property_set_link(OBJECT(dev), propname,
+                                 OBJECT(&s->pmc.cframe[i]), &error_fatal);
+    }
+
+    sysbus_realize(sbd, &error_fatal);
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_APB,
+                                sysbus_mmio_get_region(sbd, 0));
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM,
+                                sysbus_mmio_get_region(sbd, 1));
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM_2,
+                                sysbus_mmio_get_region(sbd, 2));
+    sysbus_connect_irq(sbd, 0, pic[VERSAL_CFU_IRQ_0]);
+
+    /* CFU SFR */
+    object_initialize_child(OBJECT(s), "cfu-sfr", &s->pmc.cfu_sfr,
+                            TYPE_XLNX_VERSAL_CFU_SFR);
+
+    sbd = SYS_BUS_DEVICE(&s->pmc.cfu_sfr);
+
+    object_property_set_link(OBJECT(&s->pmc.cfu_sfr),
+                            "cfu", OBJECT(&s->pmc.cfu_apb), &error_abort);
+
+    sysbus_realize(sbd, &error_fatal);
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_SFR,
+                                sysbus_mmio_get_region(sbd, 0));
+}
+
 static void versal_create_crl(Versal *s, qemu_irq *pic)
 {
     SysBusDevice *sbd;
@@ -763,6 +915,7 @@ static void versal_realize(DeviceState *dev, Error **errp)
     versal_create_pmc_iou_slcr(s, pic);
     versal_create_ospi(s, pic);
     versal_create_crl(s, pic);
+    versal_create_cfu(s, pic);
     versal_map_ddr(s);
     versal_unimp(s);
 
diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c
index 4c84bb932a..21483f75fd 100644
--- a/hw/arm/xlnx-zcu102.c
+++ b/hw/arm/xlnx-zcu102.c
@@ -201,6 +201,7 @@ static void xlnx_zcu102_init(MachineState *machine)
             qdev_prop_set_drive_err(flash_dev, "drive",
                                     blk_by_legacy_dinfo(dinfo), &error_fatal);
         }
+        qdev_prop_set_uint8(flash_dev, "cs", i);
         qdev_realize_and_unref(flash_dev, spi_bus, &error_fatal);
 
         cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
@@ -224,6 +225,7 @@ static void xlnx_zcu102_init(MachineState *machine)
             qdev_prop_set_drive_err(flash_dev, "drive",
                                     blk_by_legacy_dinfo(dinfo), &error_fatal);
         }
+        qdev_prop_set_uint8(flash_dev, "cs", i);
         qdev_realize_and_unref(flash_dev, spi_bus, &error_fatal);
 
         cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
diff --git a/hw/audio/fmopl.c b/hw/audio/fmopl.c
index 8a71a569fa..a63ad0f04d 100644
--- a/hw/audio/fmopl.c
+++ b/hw/audio/fmopl.c
@@ -355,7 +355,7 @@ static void set_algorithm( OPL_CH *CH)
 	CH->connect2 = carrier;
 }
 
-/* ---------- frequency counter for operater update ---------- */
+/* ---------- frequency counter for operator update ---------- */
 static inline void CALC_FCSLOT(OPL_CH *CH,OPL_SLOT *SLOT)
 {
 	int ksr;
@@ -640,7 +640,7 @@ static int OPLOpenTable( void )
 		TL_TABLE[t] = TL_TABLE[TL_MAX+t] = 0;
 	}
 
-	/* make sinwave table (total level offet) */
+	/* make sinwave table (total level offset) */
 	/* degree 0 = degree 180                   = off */
 	SIN_TABLE[0] = SIN_TABLE[SIN_ENT/2]         = &TL_TABLE[EG_ENT-1];
 	for (s = 1;s <= SIN_ENT/4;s++){
@@ -1075,7 +1075,7 @@ FM_OPL *OPLCreate(int clock, int rate)
 	char *ptr;
 	FM_OPL *OPL;
 	int state_size;
-	int max_ch = 9; /* normaly 9 channels */
+	int max_ch = 9; /* normally 9 channels */
 
 	if( OPL_LockTable() ==-1) return NULL;
 	/* allocate OPL state space */
@@ -1092,7 +1092,7 @@ FM_OPL *OPLCreate(int clock, int rate)
 	OPL->clock = clock;
 	OPL->rate  = rate;
 	OPL->max_ch = max_ch;
-	/* init grobal tables */
+	/* init global tables */
 	OPL_initialize(OPL);
 	/* reset chip */
 	OPLResetChip(OPL);
diff --git a/hw/audio/fmopl.h b/hw/audio/fmopl.h
index e008e72d7a..89086b93f4 100644
--- a/hw/audio/fmopl.h
+++ b/hw/audio/fmopl.h
@@ -69,7 +69,7 @@ typedef struct fm_opl_f {
 	/* FM channel slots */
 	OPL_CH *P_CH;		/* pointer of CH                     */
 	int	max_ch;			/* maximum channel                   */
-	/* Rhythm sention */
+	/* Rhythm section */
 	uint8_t rhythm;		/* Rhythm mode , key flag */
 	/* time tables */
 	int32_t AR_TABLE[76];	/* attack rate tables  */
diff --git a/hw/audio/gusemu_hal.c b/hw/audio/gusemu_hal.c
index 5b9a14ee21..f159978b49 100644
--- a/hw/audio/gusemu_hal.c
+++ b/hw/audio/gusemu_hal.c
@@ -154,7 +154,7 @@ unsigned int gus_read(GUSEmuState * state, int port, int size)
         case 0x8d:
             {
                 int             offset = 2 * (GUSregb(FunkSelReg3x3) & 0x0f);
-                offset += ((int) GUSregb(VoiceSelReg3x2) & 0x1f) << 5; /* = Voice*32 + Funktion*2 */
+                offset += ((int) GUSregb(VoiceSelReg3x2) & 0x1f) << 5; /* = Voice*32 + Function*2 */
                 value_read = GUSregw(offset);
             }
             break;
@@ -353,7 +353,7 @@ void gus_write(GUSEmuState * state, int port, int size, unsigned int data)
                     if (!(GUSregb(GUS4cReset) & 0x01))
                         break;  /* reset flag active? */
                     offset = 2 * (GUSregb(FunkSelReg3x3) & 0x0f);
-                    offset += (GUSregb(VoiceSelReg3x2) & 0x1f) << 5; /*  = Voice*32 + Funktion*2 */
+                    offset += (GUSregb(VoiceSelReg3x2) & 0x1f) << 5; /*  = Voice*32 + Function*2 */
                     GUSregw(offset) = (uint16_t) ((GUSregw(offset) & readmask) | writedata);
                 }
                 break;
diff --git a/hw/audio/intel-hda-defs.h b/hw/audio/intel-hda-defs.h
index 2e37e5b874..261bdb48ff 100644
--- a/hw/audio/intel-hda-defs.h
+++ b/hw/audio/intel-hda-defs.h
@@ -418,7 +418,7 @@ enum {
 #define AC_UNSOL_RES_CP_STATE		(1<<1)	/* content protection */
 #define AC_UNSOL_RES_CP_READY		(1<<0)	/* content protection */
 
-/* Pin widget capabilies */
+/* Pin widget capabilities */
 #define AC_PINCAP_IMP_SENSE		(1<<0)	/* impedance sense capable */
 #define AC_PINCAP_TRIG_REQ		(1<<1)	/* trigger required */
 #define AC_PINCAP_PRES_DETECT		(1<<2)	/* presence detect capable */
@@ -483,7 +483,7 @@ enum {
 #define AC_PWRST_D2			0x02
 #define AC_PWRST_D3			0x03
 
-/* Processing capabilies */
+/* Processing capabilities */
 #define AC_PCAP_BENIGN			(1<<0)
 #define AC_PCAP_NUM_COEF		(0xff<<8)
 #define AC_PCAP_NUM_COEF_SHIFT		8
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index dae13ab14d..2b0af4430f 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -50,7 +50,7 @@ struct partition {
         uint32_t nr_sects;          /* nr of sectors in partition */
 } QEMU_PACKED;
 
-/* try to guess the disk logical geometry from the MSDOS partition table.
+/* try to guess the disk logical geometry from the MS-DOS partition table.
    Return 0 if OK, -1 if could not guess */
 static int guess_disk_lchs(BlockBackend *blk,
                            int *pcylinders, int *pheads, int *psectors)
@@ -66,7 +66,7 @@ static int guess_disk_lchs(BlockBackend *blk,
     if (blk_pread(blk, 0, BDRV_SECTOR_SIZE, buf, 0) < 0) {
         return -1;
     }
-    /* test msdos magic */
+    /* test MS-DOS magic */
     if (buf[510] != 0x55 || buf[511] != 0xaa) {
         return -1;
     }
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index dc5ffbc4ff..afc3fdf4d6 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -25,6 +25,7 @@
 #include "qemu/units.h"
 #include "sysemu/block-backend.h"
 #include "hw/block/block.h"
+#include "hw/block/flash.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
 #include "hw/ssi/ssi.h"
@@ -1830,3 +1831,8 @@ static void m25p80_register_types(void)
 }
 
 type_init(m25p80_register_types)
+
+BlockBackend *m25p80_get_blk(DeviceState *dev)
+{
+    return M25P80(dev)->blk;
+}
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 3c066e3405..62056b1d74 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -891,7 +891,7 @@ static Property pflash_cfi01_properties[] = {
     /* num-blocks is the number of blocks actually visible to the guest,
      * ie the total size of the device divided by the sector length.
      * If we're emulating flash devices wired in parallel the actual
-     * number of blocks per indvidual device will differ.
+     * number of blocks per individual device will differ.
      */
     DEFINE_PROP_UINT32("num-blocks", PFlashCFI01, nb_blocs, 0),
     DEFINE_PROP_UINT64("sector-length", PFlashCFI01, sector_len, 0),
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index 807e398541..a2ac062b1e 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -307,11 +307,11 @@ static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond,
     /* instant drain the fifo when there's no back-end */
     if (!qemu_chr_fe_backend_connected(&s->chr)) {
         s->tx_count = 0;
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
     if (!s->tx_count) {
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
     ret = qemu_chr_fe_write(&s->chr, s->tx_fifo, s->tx_count);
@@ -326,12 +326,12 @@ static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond,
                                         cadence_uart_xmit, s);
         if (!r) {
             s->tx_count = 0;
-            return FALSE;
+            return G_SOURCE_REMOVE;
         }
     }
 
     uart_update_status(s);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static void uart_write_tx_fifo(CadenceUARTState *s, const uint8_t *buf,
@@ -575,7 +575,7 @@ static int cadence_uart_pre_load(void *opaque)
 {
     CadenceUARTState *s = opaque;
 
-    /* the frequency will be overriden if the refclk field is present */
+    /* the frequency will be overridden if the refclk field is present */
     clock_set_hz(s->refclk, UART_DEFAULT_REF_CLK);
     return 0;
 }
diff --git a/hw/char/cmsdk-apb-uart.c b/hw/char/cmsdk-apb-uart.c
index f8dc89ee3d..d466cd93de 100644
--- a/hw/char/cmsdk-apb-uart.c
+++ b/hw/char/cmsdk-apb-uart.c
@@ -199,7 +199,7 @@ static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque)
     s->watch_tag = 0;
 
     if (!(s->ctrl & R_CTRL_TX_EN_MASK) || !(s->state & R_STATE_TXFULL_MASK)) {
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
     ret = qemu_chr_fe_write(&s->chr, &s->txbuf, 1);
@@ -215,7 +215,7 @@ static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque)
         }
         /* Transmit pending */
         trace_cmsdk_apb_uart_tx_pending();
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
 buffer_drained:
@@ -227,7 +227,7 @@ buffer_drained:
         s->intstatus |= R_INTSTATUS_TX_MASK;
     }
     cmsdk_apb_uart_update(s);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static void uart_cancel_transmit(CMSDKAPBUART *s)
diff --git a/hw/char/ibex_uart.c b/hw/char/ibex_uart.c
index f70adb5308..51708c0836 100644
--- a/hw/char/ibex_uart.c
+++ b/hw/char/ibex_uart.c
@@ -147,7 +147,7 @@ static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond,
     /* instant drain the fifo when there's no back-end */
     if (!qemu_chr_fe_backend_connected(&s->chr)) {
         s->tx_level = 0;
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
     if (!s->tx_level) {
@@ -156,7 +156,7 @@ static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond,
         s->uart_intr_state |= R_INTR_STATE_TX_EMPTY_MASK;
         s->uart_intr_state &= ~R_INTR_STATE_TX_WATERMARK_MASK;
         ibex_uart_update_irqs(s);
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
     ret = qemu_chr_fe_write(&s->chr, s->tx_fifo, s->tx_level);
@@ -171,7 +171,7 @@ static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond,
                                         ibex_uart_xmit, s);
         if (!r) {
             s->tx_level = 0;
-            return FALSE;
+            return G_SOURCE_REMOVE;
         }
     }
 
@@ -192,7 +192,7 @@ static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond,
     }
 
     ibex_uart_update_irqs(s);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static void uart_write_tx_fifo(IbexUartState *s, const uint8_t *buf,
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index 1b75a89588..377d1d9773 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -112,7 +112,7 @@ static void imx_serial_reset_at_boot(DeviceState *dev)
     imx_serial_reset(s);
 
     /*
-     * enable the uart on boot, so messages from the linux decompresser
+     * enable the uart on boot, so messages from the linux decompressor
      * are visible.  On real hardware this is done by the boot rom
      * before anything else is loaded.
      */
diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c
index 3c6f982de9..dfe2276d71 100644
--- a/hw/char/nrf51_uart.c
+++ b/hw/char/nrf51_uart.c
@@ -93,13 +93,13 @@ static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque)
              */
             goto buffer_drained;
         }
-        return FALSE;
+        return G_SOURCE_REMOVE;
     }
 
 buffer_drained:
     s->reg[R_UART_TXDRDY] = 1;
     s->pending_tx_byte = false;
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static void uart_cancel_transmit(NRF51UARTState *s)
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 77bbc2a982..58edeb9ddb 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -48,14 +48,15 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
     return dev;
 }
 
-#define PL011_INT_TX 0x20
-#define PL011_INT_RX 0x10
-
+/* Flag Register, UARTFR */
 #define PL011_FLAG_TXFE 0x80
 #define PL011_FLAG_RXFF 0x40
 #define PL011_FLAG_TXFF 0x20
 #define PL011_FLAG_RXFE 0x10
 
+/* Data Register, UARTDR */
+#define DR_BE   (1 << 10)
+
 /* Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC */
 #define INT_OE (1 << 10)
 #define INT_BE (1 << 9)
@@ -71,11 +72,33 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
 #define INT_E (INT_OE | INT_BE | INT_PE | INT_FE)
 #define INT_MS (INT_RI | INT_DSR | INT_DCD | INT_CTS)
 
+/* Line Control Register, UARTLCR_H */
+#define LCR_FEN     (1 << 4)
+#define LCR_BRK     (1 << 0)
+
 static const unsigned char pl011_id_arm[8] =
   { 0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
 static const unsigned char pl011_id_luminary[8] =
   { 0x11, 0x00, 0x18, 0x01, 0x0d, 0xf0, 0x05, 0xb1 };
 
+static const char *pl011_regname(hwaddr offset)
+{
+    static const char *const rname[] = {
+        [0] = "DR", [1] = "RSR", [6] = "FR", [8] = "ILPR", [9] = "IBRD",
+        [10] = "FBRD", [11] = "LCRH", [12] = "CR", [13] = "IFLS", [14] = "IMSC",
+        [15] = "RIS", [16] = "MIS", [17] = "ICR", [18] = "DMACR",
+    };
+    unsigned idx = offset >> 2;
+
+    if (idx < ARRAY_SIZE(rname) && rname[idx]) {
+        return rname[idx];
+    }
+    if (idx >= 0x3f8 && idx <= 0x400) {
+        return "ID";
+    }
+    return "UNKN";
+}
+
 /* Which bits in the interrupt status matter for each outbound IRQ line ? */
 static const uint32_t irqmask[] = {
     INT_E | INT_MS | INT_RT | INT_TX | INT_RX, /* combined IRQ */
@@ -100,7 +123,7 @@ static void pl011_update(PL011State *s)
 
 static bool pl011_is_fifo_enabled(PL011State *s)
 {
-    return (s->lcr & 0x10) != 0;
+    return (s->lcr & LCR_FEN) != 0;
 }
 
 static inline unsigned pl011_get_fifo_depth(PL011State *s)
@@ -138,7 +161,7 @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
             s->flags |= PL011_FLAG_RXFE;
         }
         if (s->read_count == s->read_trigger - 1)
-            s->int_level &= ~ PL011_INT_RX;
+            s->int_level &= ~ INT_RX;
         trace_pl011_read_fifo(s->read_count);
         s->rsr = c >> 8;
         pl011_update(s);
@@ -191,7 +214,7 @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
         break;
     }
 
-    trace_pl011_read(offset, r);
+    trace_pl011_read(offset, r, pl011_regname(offset));
     return r;
 }
 
@@ -202,7 +225,7 @@ static void pl011_set_read_trigger(PL011State *s)
        the threshold.  However linux only reads the FIFO in response to an
        interrupt.  Triggering the interrupt when the FIFO is non-empty seems
        to make things work.  */
-    if (s->lcr & 0x10)
+    if (s->lcr & LCR_FEN)
         s->read_trigger = (s->ifl >> 1) & 0x1c;
     else
 #endif
@@ -234,7 +257,7 @@ static void pl011_write(void *opaque, hwaddr offset,
     PL011State *s = (PL011State *)opaque;
     unsigned char ch;
 
-    trace_pl011_write(offset, value);
+    trace_pl011_write(offset, value, pl011_regname(offset));
 
     switch (offset >> 2) {
     case 0: /* UARTDR */
@@ -243,7 +266,7 @@ static void pl011_write(void *opaque, hwaddr offset,
         /* XXX this blocks entire thread. Rewrite to use
          * qemu_chr_fe_write and background I/O callbacks */
         qemu_chr_fe_write_all(&s->chr, &ch, 1);
-        s->int_level |= PL011_INT_TX;
+        s->int_level |= INT_TX;
         pl011_update(s);
         break;
     case 1: /* UARTRSR/UARTECR */
@@ -252,7 +275,7 @@ static void pl011_write(void *opaque, hwaddr offset,
     case 6: /* UARTFR */
         /* Writes to Flag register are ignored.  */
         break;
-    case 8: /* UARTUARTILPR */
+    case 8: /* UARTILPR */
         s->ilpr = value;
         break;
     case 9: /* UARTIBRD */
@@ -265,11 +288,11 @@ static void pl011_write(void *opaque, hwaddr offset,
         break;
     case 11: /* UARTLCR_H */
         /* Reset the FIFO state on FIFO enable or disable */
-        if ((s->lcr ^ value) & 0x10) {
+        if ((s->lcr ^ value) & LCR_FEN) {
             pl011_reset_fifo(s);
         }
-        if ((s->lcr ^ value) & 0x1) {
-            int break_enable = value & 0x1;
+        if ((s->lcr ^ value) & LCR_BRK) {
+            int break_enable = value & LCR_BRK;
             qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
                               &break_enable);
         }
@@ -331,7 +354,7 @@ static void pl011_put_fifo(void *opaque, uint32_t value)
         s->flags |= PL011_FLAG_RXFF;
     }
     if (s->read_count == s->read_trigger) {
-        s->int_level |= PL011_INT_RX;
+        s->int_level |= INT_RX;
         pl011_update(s);
     }
 }
@@ -343,8 +366,9 @@ static void pl011_receive(void *opaque, const uint8_t *buf, int size)
 
 static void pl011_event(void *opaque, QEMUChrEvent event)
 {
-    if (event == CHR_EVENT_BREAK)
-        pl011_put_fifo(opaque, 0x400);
+    if (event == CHR_EVENT_BREAK) {
+        pl011_put_fifo(opaque, DR_BE);
+    }
 }
 
 static void pl011_clock_update(void *opaque, ClockEvent event)
@@ -358,6 +382,8 @@ static const MemoryRegionOps pl011_ops = {
     .read = pl011_read,
     .write = pl011_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
 };
 
 static bool pl011_clock_needed(void *opaque)
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
index 37d3ccc76b..40de6b8b77 100644
--- a/hw/char/riscv_htif.c
+++ b/hw/char/riscv_htif.c
@@ -30,6 +30,7 @@
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 #include "exec/address-spaces.h"
+#include "exec/tswap.h"
 #include "sysemu/dma.h"
 
 #define RISCV_DEBUG_HTIF 0
@@ -209,11 +210,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
             } else {
                 uint64_t syscall[8];
                 cpu_physical_memory_read(payload, syscall, sizeof(syscall));
-                if (syscall[0] == PK_SYS_WRITE &&
-                    syscall[1] == HTIF_DEV_CONSOLE &&
-                    syscall[3] == HTIF_CONSOLE_CMD_PUTC) {
+                if (tswap64(syscall[0]) == PK_SYS_WRITE &&
+                    tswap64(syscall[1]) == HTIF_DEV_CONSOLE &&
+                    tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
                     uint8_t ch;
-                    cpu_physical_memory_read(syscall[2], &ch, 1);
+                    cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
                     qemu_chr_fe_write(&s->chr, &ch, 1);
                     resp = 0x100 | (uint8_t)payload;
                 } else {
@@ -232,7 +233,8 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
             s->tohost = 0; /* clear to indicate we read */
             return;
         } else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
-            qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1);
+            uint8_t ch = (uint8_t)payload;
+            qemu_chr_fe_write(&s->chr, &ch, 1);
             resp = 0x100 | (uint8_t)payload;
         } else {
             qemu_log("HTIF device %d: unknown command\n", device);
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 270e1b1094..a32eb25f58 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -54,7 +54,7 @@
 #define UART_IIR_RLSI   0x06    /* Receiver line status interrupt */
 #define UART_IIR_CTI    0x0C    /* Character Timeout Indication */
 
-#define UART_IIR_FENF   0x80    /* Fifo enabled, but not functionning */
+#define UART_IIR_FENF   0x80    /* Fifo enabled, but not functioning */
 #define UART_IIR_FE     0xC0    /* Fifo enabled */
 
 /*
@@ -226,7 +226,7 @@ static gboolean serial_watch_cb(void *do_not_use, GIOCondition cond,
     SerialState *s = opaque;
     s->watch_tag = 0;
     serial_xmit(s);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static void serial_xmit(SerialState *s)
diff --git a/hw/char/trace-events b/hw/char/trace-events
index 2ecb36232e..babf4d35ea 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -54,9 +54,9 @@ escc_sunmouse_event(int dx, int dy, int buttons_state) "dx=%d dy=%d buttons=0x%0
 
 # pl011.c
 pl011_irq_state(int level) "irq state %d"
-pl011_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
+pl011_read(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
 pl011_read_fifo(int read_count) "FIFO read, read_count now %d"
-pl011_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
+pl011_write(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
 pl011_can_receive(uint32_t lcr, int read_count, int r) "LCR 0x%08x read_count %d returning %d"
 pl011_put_fifo(uint32_t c, int read_count) "new char 0x%x read_count now %d"
 pl011_put_fifo_full(void) "FIFO now full, RXFF set"
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index dd5a02e339..dbe0b28e60 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -45,7 +45,7 @@ static gboolean chr_write_unblocked(void *do_not_use, GIOCondition cond,
 
     vcon->watch = 0;
     virtio_serial_throttle_port(VIRTIO_SERIAL_PORT(vcon), false);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 /* Callback function that's called when the guest sends us data */
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index 4f4d77908d..d4b5c501d8 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -24,7 +24,7 @@
  * callback that does the memory operations.
 
  * This device allows the user to monkey patch memory. To be able to do
- * this it needs a backend to manage the datas, the same as other
+ * this it needs a backend to manage the data, the same as other
  * memory-related devices. In this case as the backend is so trivial we
  * have merged it with the frontend instead of creating and maintaining a
  * separate backend.
@@ -166,7 +166,7 @@ static void generic_loader_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    /* Convert the data endiannes */
+    /* Convert the data endianness */
     if (s->data_be) {
         s->data = cpu_to_be64(s->data);
     } else {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index da699cf4e1..cb38b8cf4c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -38,6 +38,7 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-net.h"
 
 GlobalProperty hw_compat_8_1[] = {};
 const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
@@ -45,6 +46,9 @@ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
 GlobalProperty hw_compat_8_0[] = {
     { "migration", "multifd-flush-after-each-section", "on"},
     { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+    { TYPE_VIRTIO_NET, "host_uso", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso6", "off"},
 };
 const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
 
@@ -1355,6 +1359,7 @@ out:
 
 void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp)
 {
+    ERRP_GUARD();
     MachineClass *machine_class = MACHINE_GET_CLASS(machine);
     ObjectClass *oc = object_class_by_name(machine->cpu_type);
     CPUClass *cc;
@@ -1383,9 +1388,13 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
                numa_uses_legacy_mem()) {
         if (object_property_find(object_get_objects_root(),
                                  machine_class->default_ram_id)) {
-            error_setg(errp, "object name '%s' is reserved for the default"
-                " RAM backend, it can't be used for any other purposes."
-                " Change the object's 'id' to something else",
+            error_setg(errp, "object's id '%s' is reserved for the default"
+                " RAM backend, it can't be used for any other purposes",
+                machine_class->default_ram_id);
+            error_append_hint(errp,
+                "Change the object's 'id' to something else or disable"
+                " automatic creation of the default RAM backend by setting"
+                " 'memory-backend=%s' with '-machine'.\n",
                 machine_class->default_ram_id);
             return;
         }
@@ -1417,7 +1426,7 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
         for (i = 0; machine_class->valid_cpu_types[i]; i++) {
             if (object_class_dynamic_cast(oc,
                                           machine_class->valid_cpu_types[i])) {
-                /* The user specificed CPU is in the valid field, we are
+                /* The user specified CPU is in the valid field, we are
                  * good to go.
                  */
                 break;
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 6d5d43eda2..41b7e682c7 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -107,7 +107,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
     }
 
     if (*ptr) {
-        /* BlockBackend alread exists. So, we want to change attached node */
+        /* BlockBackend already exists. So, we want to change attached node */
         blk = *ptr;
         ctx = blk_get_aio_context(blk);
         bs = bdrv_lookup_bs(NULL, str, errp);
diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c
index 1f3630986d..8e2639224e 100644
--- a/hw/core/vm-change-state-handler.c
+++ b/hw/core/vm-change-state-handler.c
@@ -55,8 +55,20 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
                                                      VMChangeStateHandler *cb,
                                                      void *opaque)
+{
+    return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque);
+}
+
+/*
+ * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
+ * argument too.
+ */
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+    DeviceState *dev, VMChangeStateHandler *cb,
+    VMChangeStateHandler *prepare_cb, void *opaque)
 {
     int depth = qdev_get_dev_tree_depth(dev);
 
-    return qemu_add_vm_change_state_handler_prio(cb, opaque, depth);
+    return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque,
+                                                      depth);
 }
diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c
index 774ca9987a..bfd8aa5644 100644
--- a/hw/cpu/a15mpcore.c
+++ b/hw/cpu/a15mpcore.c
@@ -161,7 +161,7 @@ static void a15mp_priv_class_init(ObjectClass *klass, void *data)
 
     dc->realize = a15mp_priv_realize;
     device_class_set_props(dc, a15mp_priv_properties);
-    /* We currently have no savable state */
+    /* We currently have no saveable state */
 }
 
 static const TypeInfo a15mp_priv_info = {
diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c
index d161d57456..3ddd6369ad 100644
--- a/hw/cxl/cxl-events.c
+++ b/hw/cxl/cxl-events.c
@@ -197,7 +197,7 @@ CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *
 
     QEMU_LOCK_GUARD(&log->lock);
     /*
-     * Must itterate the queue twice.
+     * Must iterate the queue twice.
      * "The device shall verify the event record handles specified in the input
      * payload are in temporal order. If the device detects an older event
      * record that will not be cleared when Clear Event Records is executed,
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index 034c7805b3..f0920da956 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -39,12 +39,6 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
         return;
     }
 
-    fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
-    for (i = 0, target = object->targets; target; i++, target = target->next) {
-        /* This link cannot be resolved yet, so stash the name for now */
-        fw->targets[i] = g_strdup(target->value);
-    }
-
     if (object->size % (256 * MiB)) {
         error_setg(errp,
                    "Size of a CXL fixed memory window must be a multiple of 256MiB");
@@ -64,6 +58,12 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
         fw->enc_int_gran = 0;
     }
 
+    fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
+    for (i = 0, target = object->targets; target; i++, target = target->next) {
+        /* This link cannot be resolved yet, so stash the name for now */
+        fw->targets[i] = g_strdup(target->value);
+    }
+
     cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows,
                                              g_steal_pointer(&fw));
 
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 02f9b5a870..434ccc5f6e 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -39,7 +39,7 @@
  *    fill the output data into cmd->payload (overwriting what was there),
  *    setting the length, and returning a valid return code.
  *
- *  XXX: The handler need not worry about endianess. The payload is read out of
+ *  XXX: The handler need not worry about endianness. The payload is read out of
  *  a register interface that already deals with it.
  */
 
@@ -501,7 +501,7 @@ static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd,
     uint16_t out_pl_len;
 
     query_start = ldq_le_p(&in->pa);
-    /* 64 byte alignemnt required */
+    /* 64 byte alignment required */
     if (query_start & 0x3f) {
         return CXL_MBOX_INVALID_INPUT;
     }
diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c
index e7ec268184..9138e98c3b 100644
--- a/hw/display/bochs-display.c
+++ b/hw/display/bochs-display.c
@@ -164,7 +164,7 @@ static int bochs_display_get_mode(BochsDisplayState *s,
     memset(mode, 0, sizeof(*mode));
     switch (vbe[VBE_DISPI_INDEX_BPP]) {
     case 16:
-        /* best effort: support native endianess only */
+        /* best effort: support native endianness only */
         mode->format = PIXMAN_r5g6b5;
         mode->bytepp = 2;
         break;
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index f1c0eb7dfc..7bb00d68f5 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1544,7 +1544,7 @@ static void qxl_create_guest_primary(PCIQXLDevice *qxl, int loadvm,
     }
 }
 
-/* return 1 if surface destoy was initiated (in QXL_ASYNC case) or
+/* return 1 if surface destroy was initiated (in QXL_ASYNC case) or
  * done (in QXL_SYNC case), 0 otherwise. */
 static int qxl_destroy_primary(PCIQXLDevice *d, qxl_async_io async)
 {
@@ -1591,7 +1591,10 @@ static void qxl_set_mode(PCIQXLDevice *d, unsigned int modenr, int loadvm)
     }
 
     d->guest_slots[0].slot = slot;
-    assert(qxl_add_memslot(d, 0, devmem, QXL_SYNC) == 0);
+    if (qxl_add_memslot(d, 0, devmem, QXL_SYNC) != 0) {
+        qxl_set_guest_bug(d, "device isn't initialized yet");
+        return;
+    }
 
     d->guest_primary.surface = surface;
     qxl_create_guest_primary(d, 0, QXL_SYNC);
diff --git a/hw/display/ssd0303.c b/hw/display/ssd0303.c
index d67b0ad7b5..32b32a3044 100644
--- a/hw/display/ssd0303.c
+++ b/hw/display/ssd0303.c
@@ -8,7 +8,7 @@
  */
 
 /* The controller can support a variety of different displays, but we only
-   implement one.  Most of the commends relating to brightness and geometry
+   implement one.  Most of the commands relating to brightness and geometry
    setup are ignored. */
 
 #include "qemu/osdep.h"
diff --git a/hw/display/ssd0323.c b/hw/display/ssd0323.c
index ab229d32b7..09b1bbed0a 100644
--- a/hw/display/ssd0323.c
+++ b/hw/display/ssd0323.c
@@ -8,7 +8,7 @@
  */
 
 /* The controller can support a variety of different displays, but we only
-   implement one.  Most of the commends relating to brightness and geometry
+   implement one.  Most of the commands relating to brightness and geometry
    setup are ignored. */
 
 #include "qemu/osdep.h"
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index e8ee03094e..1150521d9d 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -32,6 +32,7 @@ typedef enum VhostUserGpuRequest {
     VHOST_USER_GPU_DMABUF_SCANOUT,
     VHOST_USER_GPU_DMABUF_UPDATE,
     VHOST_USER_GPU_GET_EDID,
+    VHOST_USER_GPU_DMABUF_SCANOUT2,
 } VhostUserGpuRequest;
 
 typedef struct VhostUserGpuDisplayInfoReply {
@@ -79,6 +80,11 @@ typedef struct VhostUserGpuDMABUFScanout {
     int fd_drm_fourcc;
 } QEMU_PACKED VhostUserGpuDMABUFScanout;
 
+typedef struct VhostUserGpuDMABUFScanout2 {
+    struct VhostUserGpuDMABUFScanout dmabuf_scanout;
+    uint64_t modifier;
+} QEMU_PACKED VhostUserGpuDMABUFScanout2;
+
 typedef struct VhostUserGpuEdidRequest {
     uint32_t scanout_id;
 } QEMU_PACKED VhostUserGpuEdidRequest;
@@ -93,6 +99,7 @@ typedef struct VhostUserGpuMsg {
         VhostUserGpuScanout scanout;
         VhostUserGpuUpdate update;
         VhostUserGpuDMABUFScanout dmabuf_scanout;
+        VhostUserGpuDMABUFScanout2 dmabuf_scanout2;
         VhostUserGpuEdidRequest edid_req;
         struct virtio_gpu_resp_edid resp_edid;
         struct virtio_gpu_resp_display_info display_info;
@@ -107,6 +114,7 @@ static VhostUserGpuMsg m __attribute__ ((unused));
 #define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
 
 #define VHOST_USER_GPU_PROTOCOL_F_EDID 0
+#define VHOST_USER_GPU_PROTOCOL_F_DMABUF2 1
 
 static void vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked);
 
@@ -171,7 +179,8 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
             .flags = VHOST_USER_GPU_MSG_FLAG_REPLY,
             .size = sizeof(uint64_t),
             .payload = {
-                .u64 = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID)
+                .u64 = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID) |
+                       (1 << VHOST_USER_GPU_PROTOCOL_F_DMABUF2)
             }
         };
 
@@ -236,6 +245,7 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
 
         break;
     }
+    case VHOST_USER_GPU_DMABUF_SCANOUT2:
     case VHOST_USER_GPU_DMABUF_SCANOUT: {
         VhostUserGpuDMABUFScanout *m = &msg->payload.dmabuf_scanout;
         int fd = qemu_chr_fe_get_msgfd(&g->vhost_chr);
@@ -269,6 +279,11 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
             .fourcc = m->fd_drm_fourcc,
             .y0_top = m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP,
         };
+        if (msg->request == VHOST_USER_GPU_DMABUF_SCANOUT2) {
+            VhostUserGpuDMABUFScanout2 *m2 = &msg->payload.dmabuf_scanout2;
+            dmabuf->modifier = m2->modifier;
+        }
+
         dpy_gl_scanout_dmabuf(con, dmabuf);
         break;
     }
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index bbd5c6561a..93857ad523 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1283,7 +1283,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
             g_free(res);
             return -EINVAL;
         }
-
+#ifdef WIN32
+        pixman_image_set_destroy_function(res->image, win32_pixman_image_destroy, res->handle);
+#endif
 
         res->addrs = g_new(uint64_t, res->iov_cnt);
         res->iov = g_new(struct iovec, res->iov_cnt);
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
index b0828d65aa..43c7dd8e9c 100644
--- a/hw/display/xlnx_dp.c
+++ b/hw/display/xlnx_dp.c
@@ -380,13 +380,16 @@ static inline void xlnx_dp_audio_mix_buffer(XlnxDPState *s)
 static void xlnx_dp_audio_callback(void *opaque, int avail)
 {
     /*
-     * Get some data from the DPDMA and compute these datas.
-     * Then wait for QEMU's audio subsystem to call this callback.
+     * Get the individual left and right audio streams from the DPDMA,
+     * and fill the output buffer with the combined stereo audio data
+     * adjusted by the volume controls.
+     * QEMU's audio subsystem will call this callback repeatedly;
+     * we return the data from the output buffer until it is emptied,
+     * and then we will read data from the DPDMA again.
      */
     XlnxDPState *s = XLNX_DP(opaque);
     size_t written = 0;
 
-    /* If there are already some data don't get more data. */
     if (s->byte_left == 0) {
         s->audio_data_available[0] = xlnx_dpdma_start_operation(s->dpdma, 4,
                                                                   true);
diff --git a/hw/dma/etraxfs_dma.c b/hw/dma/etraxfs_dma.c
index a1068b19ea..9c0003de51 100644
--- a/hw/dma/etraxfs_dma.c
+++ b/hw/dma/etraxfs_dma.c
@@ -28,6 +28,7 @@
 #include "qemu/main-loop.h"
 #include "sysemu/runstate.h"
 #include "exec/address-spaces.h"
+#include "exec/memory.h"
 
 #include "hw/cris/etraxfs_dma.h"
 
diff --git a/hw/dma/omap_dma.c b/hw/dma/omap_dma.c
index c6e35ba4b8..77797a67b5 100644
--- a/hw/dma/omap_dma.c
+++ b/hw/dma/omap_dma.c
@@ -247,7 +247,7 @@ static void omap_dma_deactivate_channel(struct omap_dma_s *s,
         return;
     }
 
-    /* Don't deactive the channel if it is synchronized and the DMA request is
+    /* Don't deactivate the channel if it is synchronized and the DMA request is
        active */
     if (ch->sync && ch->enable && (s->dma->drqbmp & (1ULL << ch->sync)))
         return;
@@ -422,7 +422,7 @@ static void omap_dma_transfer_generic(struct soc_dma_ch_s *dma)
 
         if (ch->fs && ch->bs) {
             a->pck_element ++;
-            /* Check if a full packet has beed transferred.  */
+            /* Check if a full packet has been transferred.  */
             if (a->pck_element == a->pck_elements) {
                 a->pck_element = 0;
 
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index 866e11d208..cf28cb9586 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -133,14 +133,10 @@ static FWCfgState *create_fw_cfg(MachineState *ms)
     fw_cfg_add_file(fw_cfg, "/etc/firmware-min-version",
                     g_memdup(&val, sizeof(val)), sizeof(val));
 
-    val = cpu_to_le64(HPPA_TLB_ENTRIES);
+    val = cpu_to_le64(HPPA_TLB_ENTRIES - HPPA_BTLB_ENTRIES);
     fw_cfg_add_file(fw_cfg, "/etc/cpu/tlb_entries",
                     g_memdup(&val, sizeof(val)), sizeof(val));
 
-    val = cpu_to_le64(HPPA_BTLB_ENTRIES);
-    fw_cfg_add_file(fw_cfg, "/etc/cpu/btlb_entries",
-                    g_memdup(&val, sizeof(val)), sizeof(val));
-
     val = cpu_to_le64(HPA_POWER_BUTTON);
     fw_cfg_add_file(fw_cfg, "/etc/power-button-addr",
                     g_memdup(&val, sizeof(val)), sizeof(val));
@@ -433,6 +429,10 @@ static void hppa_machine_reset(MachineState *ms, ShutdownCause reason)
 
         cs->exception_index = -1;
         cs->halted = 0;
+
+        /* clear any existing TLB and BTLB entries */
+        memset(cpu[i]->env.tlb, 0, sizeof(cpu[i]->env.tlb));
+        cpu[i]->env.tlb_last = HPPA_BTLB_ENTRIES;
     }
 
     /* already initialized by machine_hppa_init()? */
diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 1f071a3811..7275d40749 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -226,7 +226,7 @@ static int aspeed_i2c_dma_read(AspeedI2CBus *bus, uint8_t *data)
     return 0;
 }
 
-static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
+static int aspeed_i2c_bus_send(AspeedI2CBus *bus)
 {
     AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller);
     int ret = -1;
@@ -236,10 +236,10 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
     uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus);
     uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
     int pool_tx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                TX_COUNT);
+                                                TX_COUNT) + 1;
 
     if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-        for (i = pool_start; i < pool_tx_count; i++) {
+        for (i = 0; i < pool_tx_count; i++) {
             uint8_t *pool_base = aic->bus_pool_base(bus);
 
             trace_aspeed_i2c_bus_send("BUF", i + 1, pool_tx_count,
@@ -273,7 +273,7 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
         }
         SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_DMA_EN, 0);
     } else {
-        trace_aspeed_i2c_bus_send("BYTE", pool_start, 1,
+        trace_aspeed_i2c_bus_send("BYTE", 0, 1,
                                   bus->regs[reg_byte_buf]);
         ret = i2c_send(bus->bus, bus->regs[reg_byte_buf]);
     }
@@ -293,10 +293,14 @@ static void aspeed_i2c_bus_recv(AspeedI2CBus *bus)
     uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
     uint32_t reg_dma_addr = aspeed_i2c_bus_dma_addr_offset(bus);
     int pool_rx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                RX_COUNT);
+                                                RX_SIZE) + 1;
 
     if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
         uint8_t *pool_base = aic->bus_pool_base(bus);
+        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
+                                    BUF_ORGANIZATION)) {
+            pool_base += 16;
+        }
 
         for (i = 0; i < pool_rx_count; i++) {
             pool_base[i] = i2c_recv(bus->bus);
@@ -418,7 +422,7 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
     uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
     uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
     if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
-        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT);
+        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT) + 1;
     } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN)) {
         count = bus->regs[reg_dma_len];
     } else { /* BYTE mode */
@@ -446,10 +450,8 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
  */
 static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)
 {
-    uint8_t pool_start = 0;
     uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
     uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus);
-    uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus);
     uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
 
     if (!aspeed_i2c_check_sram(bus)) {
@@ -483,27 +485,11 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)
 
         SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_START_CMD, 0);
 
-        /*
-         * The START command is also a TX command, as the slave
-         * address is sent on the bus. Drop the TX flag if nothing
-         * else needs to be sent in this sequence.
-         */
-        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-            if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT)
-                == 1) {
-                SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
-            } else {
-                /*
-                 * Increase the start index in the TX pool buffer to
-                 * skip the address byte.
-                 */
-                pool_start++;
-            }
-        } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
+        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
             if (bus->regs[reg_dma_len] == 0) {
                 SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
             }
-        } else {
+        } else if (!SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
             SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
         }
 
@@ -520,7 +506,7 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)
 
     if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_TX_CMD)) {
         aspeed_i2c_set_state(bus, I2CD_MTXD);
-        if (aspeed_i2c_bus_send(bus, pool_start)) {
+        if (aspeed_i2c_bus_send(bus)) {
             SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_NAK, 1);
             i2c_end_transfer(bus->bus);
         } else {
diff --git a/hw/i2c/pm_smbus.c b/hw/i2c/pm_smbus.c
index d7eae548cb..9ad6a47739 100644
--- a/hw/i2c/pm_smbus.c
+++ b/hw/i2c/pm_smbus.c
@@ -1,6 +1,6 @@
 /*
  * PC SMBus implementation
- * splitted from acpi.c
+ * split from acpi.c
  *
  * Copyright (c) 2006 Fabrice Bellard
  *
diff --git a/hw/i2c/pmbus_device.c b/hw/i2c/pmbus_device.c
index 44fe4eddbb..cef51663d0 100644
--- a/hw/i2c/pmbus_device.c
+++ b/hw/i2c/pmbus_device.c
@@ -190,15 +190,18 @@ static void pmbus_quick_cmd(SMBusDevice *smd, uint8_t read)
     }
 }
 
+static uint8_t pmbus_pages_num(PMBusDevice *pmdev)
+{
+    const PMBusDeviceClass *k = PMBUS_DEVICE_GET_CLASS(pmdev);
+
+    /* some PMBus devices don't use the PAGE command, so they get 1 page */
+    return k->device_num_pages ? : 1;
+}
+
 static void pmbus_pages_alloc(PMBusDevice *pmdev)
 {
-    /* some PMBus devices don't use the PAGE command, so they get 1 page */
-    PMBusDeviceClass *k = PMBUS_DEVICE_GET_CLASS(pmdev);
-    if (k->device_num_pages == 0) {
-        k->device_num_pages = 1;
-    }
-    pmdev->num_pages = k->device_num_pages;
-    pmdev->pages = g_new0(PMBusPage, k->device_num_pages);
+    pmdev->num_pages = pmbus_pages_num(pmdev);
+    pmdev->pages = g_new0(PMBusPage, pmdev->num_pages);
 }
 
 void pmbus_check_limits(PMBusDevice *pmdev)
@@ -1623,7 +1626,7 @@ static int pmbus_write_data(SMBusDevice *smd, uint8_t *buf, uint8_t len)
         break;
 
 passthrough:
-    /* Unimplimented registers get passed to the device */
+    /* Unimplemented registers get passed to the device */
     default:
         if (pmdc->write_data) {
             ret = pmdc->write_data(pmdev, buf, len);
diff --git a/hw/i2c/smbus_slave.c b/hw/i2c/smbus_slave.c
index feb3ec6333..2ef2c7c5f6 100644
--- a/hw/i2c/smbus_slave.c
+++ b/hw/i2c/smbus_slave.c
@@ -2,7 +2,7 @@
  * QEMU SMBus device emulation.
  *
  * This code is a helper for SMBus device emulation.  It implements an
- * I2C device inteface and runs the SMBus protocol from the device
+ * I2C device interface and runs the SMBus protocol from the device
  * point of view and maps those to simple calls to emulate.
  *
  * Copyright (c) 2007 CodeSourcery.
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bb12b0ad43..4d2d40bab5 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -779,7 +779,7 @@ static Aml *initialize_route(Aml *route, const char *link_name,
  *
  * Returns an array of 128 routes, one for each device,
  * based on device location.
- * The main goal is to equaly distribute the interrupts
+ * The main goal is to equally distribute the interrupts
  * over the 4 existing ACPI links (works only for i440fx).
  * The hash function is  (slot + pin) & 3 -> "LNK[D|A|B|C]".
  *
@@ -2079,7 +2079,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 }
 
 /*
- * Insert DMAR scope for PCI bridges and endpoint devcie
+ * Insert DMAR scope for PCI bridges and endpoint devices
  */
 static void
 insert_scope(PCIBus *bus, PCIDevice *dev, void *opaque)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 9c77304438..c98a3c6e11 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -259,7 +259,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
     pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
             PCI_STATUS_SIG_TARGET_ABORT);
 }
-/* log an illegal comand event
+/* log an illegal command event
  *   @addr : address of illegal command
  */
 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
@@ -767,7 +767,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
         break;
     case AMDVI_MMIO_COMMAND_BASE:
         amdvi_mmio_reg_write(s, size, val, addr);
-        /* FIXME - make sure System Software has finished writing incase
+        /* FIXME - make sure System Software has finished writing in case
          * it writes in chucks less than 8 bytes in a robust way.As for
          * now, this hacks works for the linux driver
          */
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index 72a42f3c66..7362daa45a 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -24,6 +24,7 @@
 #include "kvm/kvm_i386.h"
 #include "qapi/error.h"
 #include CONFIG_DEVICES
+#include "target/i386/cpu.h"
 
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3ca71df369..c0ce896668 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -52,7 +52,7 @@
 
 /*
  * PCI bus number (or SID) is not reliable since the device is usaully
- * initalized before guest can configure the PCI bridge
+ * initialized before guest can configure the PCI bridge
  * (SECONDARY_BUS_NUMBER).
  */
 struct vtd_as_key {
@@ -1694,7 +1694,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
      * """
      *
      * We enable per as memory region (iommu_ir_fault) for catching
-     * the tranlsation for interrupt range through PASID + PT.
+     * the translation for interrupt range through PASID + PT.
      */
     if (pt && as->pasid != PCI_NO_PASID) {
         memory_region_set_enabled(&as->iommu_ir_fault, true);
@@ -4053,7 +4053,7 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
             error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
             return false;
         }
-        if (!kvm_enable_x2apic()) {
+        if (kvm_enabled() && !kvm_enable_x2apic()) {
             error_setg(errp, "eim=on requires support on the KVM side"
                              "(X2APIC_API, first shipped in v4.7)");
             return false;
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index df70b4a033..34348a3324 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -22,7 +22,7 @@
 #include "kvm/kvm_i386.h"
 #include "migration/vmstate.h"
 #include "hw/sysbus.h"
-#include "hw/kvm/clock.h"
+#include "hw/i386/kvm/clock.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 
@@ -332,8 +332,10 @@ void kvmclock_create(bool create_always)
 {
     X86CPU *cpu = X86_CPU(first_cpu);
 
-    if (!kvm_enabled() || !kvm_has_adjust_clock())
+    assert(kvm_enabled());
+    if (!kvm_has_adjust_clock()) {
         return;
+    }
 
     if (create_always ||
         cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
diff --git a/include/hw/kvm/clock.h b/hw/i386/kvm/clock.h
similarity index 65%
rename from include/hw/kvm/clock.h
rename to hw/i386/kvm/clock.h
index 7994071c4f..401c7e445b 100644
--- a/include/hw/kvm/clock.h
+++ b/hw/i386/kvm/clock.h
@@ -10,19 +10,9 @@
  * See the COPYING file in the top-level directory.
  */
 
-#ifndef HW_KVM_CLOCK_H
-#define HW_KVM_CLOCK_H
-
-#ifdef CONFIG_KVM
+#ifndef HW_I386_KVM_CLOCK_H
+#define HW_I386_KVM_CLOCK_H
 
 void kvmclock_create(bool create_always);
 
-#else /* CONFIG_KVM */
-
-static inline void kvmclock_create(bool create_always)
-{
-}
-
-#endif /* !CONFIG_KVM */
-
 #endif
diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c
index 6a7383d877..a649b2b7ca 100644
--- a/hw/i386/kvm/i8254.c
+++ b/hw/i386/kvm/i8254.c
@@ -34,6 +34,7 @@
 #include "hw/timer/i8254_internal.h"
 #include "hw/qdev-properties-system.h"
 #include "sysemu/kvm.h"
+#include "target/i386/kvm/kvm_i386.h"
 #include "qom/object.h"
 
 #define KVM_PIT_REINJECT_BIT 0
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index cd5ea5d60b..409d0c8c76 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -16,6 +16,7 @@
 #include "hw/intc/ioapic_internal.h"
 #include "hw/intc/kvm_irqcount.h"
 #include "sysemu/kvm.h"
+#include "kvm/kvm_i386.h"
 
 /* PC Utility function */
 void kvm_pc_setup_irq_routing(bool pci_enabled)
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 133d89e953..660d0b72f9 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -1156,7 +1156,7 @@ static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
 
     /*
      * This matches the barrier in copy_to_ring() (or the guest's
-     * equivalent) betweem writing the data to the ring and updating
+     * equivalent) between writing the data to the ring and updating
      * rsp_prod. It protects against the pathological case (which
      * again I think never happened except on Alpha) where our
      * subsequent writes to the ring could *cross* the read of
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index d9732b567e..1d134a6866 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -1436,7 +1436,7 @@ static void save_node(gpointer key, gpointer value, gpointer opaque)
     /*
      * If we already wrote this node, refer to the previous copy.
      * There's no rename/move in XenStore, so all we need to find
-     * it is the tx_id of the transation in which it exists. Which
+     * it is the tx_id of the transaction in which it exists. Which
      * may be the root tx.
      */
     if (n->serialized_tx != XBT_NULL) {
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 7227a2156c..8deeb62774 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -32,7 +32,7 @@
 
 #include "hw/loader.h"
 #include "hw/irq.h"
-#include "hw/kvm/clock.h"
+#include "hw/i386/kvm/clock.h"
 #include "hw/i386/microvm.h"
 #include "hw/i386/x86.h"
 #include "target/i386/cpu.h"
@@ -180,7 +180,9 @@ static void microvm_devices_init(MicrovmMachineState *mms)
         x86ms->ioapic2 = ioapic_init_secondary(gsi_state);
     }
 
-    kvmclock_create(true);
+    if (kvm_enabled()) {
+        kvmclock_create(true);
+    }
 
     mms->virtio_irq_base = 5;
     mms->virtio_num_transports = 8;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 54838c0c41..3db0743f31 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -436,7 +436,7 @@ static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
     return 0xffffffffffffffffULL;
 }
 
-/* MSDOS compatibility mode FPU exception support */
+/* MS-DOS compatibility mode FPU exception support */
 static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
                            unsigned size)
 {
@@ -1746,16 +1746,16 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v,
     }
 
     /*
-    * We don't have a theoretically justifiable exact lower bound on the base
-    * address of any flash mapping. In practice, the IO-APIC MMIO range is
-    * [0xFEE00000..0xFEE01000] -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
-    * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in
-    * size.
-    */
+     * We don't have a theoretically justifiable exact lower bound on the base
+     * address of any flash mapping. In practice, the IO-APIC MMIO range is
+     * [0xFEE00000..0xFEE01000] -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
+     * only 18MiB-4KiB below 4GiB. For now, restrict the cumulative mapping to
+     * 16MiB in size.
+     */
     if (value > 16 * MiB) {
         error_setg(errp,
                    "User specified max allowed firmware size %" PRIu64 " is "
-                   "greater than 16MiB. If combined firwmare size exceeds "
+                   "greater than 16MiB. If combined firmware size exceeds "
                    "16MiB the system may not boot, or experience intermittent"
                    "stability issues.",
                    value);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index ce1ac95274..8321f36f97 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -46,7 +46,7 @@
 #include "hw/ide/piix.h"
 #include "hw/irq.h"
 #include "sysemu/kvm.h"
-#include "hw/kvm/clock.h"
+#include "hw/i386/kvm/clock.h"
 #include "hw/sysbus.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "exec/memory.h"
@@ -69,6 +69,7 @@
 #include "hw/mem/nvdimm.h"
 #include "hw/i386/acpi-build.h"
 #include "kvm/kvm-cpu.h"
+#include "target/i386/cpu.h"
 
 #define MAX_IDE_BUS 2
 #define XEN_IOAPIC_NUM_PIRQS 128ULL
@@ -192,7 +193,7 @@ static void pc_init1(MachineState *machine,
     pc_machine_init_sgx_epc(pcms);
     x86_cpus_init(x86ms, pcmc->default_cpu_version);
 
-    if (pcmc->kvmclock_enabled) {
+    if (kvm_enabled() && pcmc->kvmclock_enabled) {
         kvmclock_create(pcmc->kvmclock_create_always);
     }
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 37c4814bed..2dd1158b70 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -34,8 +34,9 @@
 #include "hw/loader.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/rtc/mc146818rtc.h"
+#include "sysemu/tcg.h"
 #include "sysemu/kvm.h"
-#include "hw/kvm/clock.h"
+#include "hw/i386/kvm/clock.h"
 #include "hw/pci-host/q35.h"
 #include "hw/pci/pcie_port.h"
 #include "hw/qdev-properties.h"
@@ -57,6 +58,7 @@
 #include "hw/hyperv/vmbus-bridge.h"
 #include "hw/mem/nvdimm.h"
 #include "hw/i386/acpi-build.h"
+#include "target/i386/cpu.h"
 
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS     6
@@ -183,7 +185,9 @@ static void pc_q35_init(MachineState *machine)
     pc_machine_init_sgx_epc(pcms);
     x86_cpus_init(x86ms, pcmc->default_cpu_version);
 
-    kvmclock_create(pcmc->kvmclock_create_always);
+    if (kvm_enabled()) {
+        kvmclock_create(pcmc->kvmclock_create_always);
+    }
 
     /* pci enabled */
     if (pcmc->pci_enabled) {
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 04fd71bfc4..53c02d7ac8 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -121,3 +121,13 @@ x86_pic_interrupt(int irqn, int level) "PIC interrupt #%d level:%d"
 # port92.c
 port92_read(uint8_t val) "port92: read 0x%02x"
 port92_write(uint8_t val) "port92: write 0x%02x"
+
+# vmmouse.c
+vmmouse_get_status(void) ""
+vmmouse_mouse_event(int x, int y, int dz, int buttons_state) "event: x=%d y=%d dz=%d state=%d"
+vmmouse_init(void) ""
+vmmouse_read_id(void) ""
+vmmouse_request_relative(void) ""
+vmmouse_request_absolute(void) ""
+vmmouse_disable(void) ""
+vmmouse_data(uint32_t size) "data: size=%" PRIu32
diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c
index 6cd624bd09..91320afa2f 100644
--- a/hw/i386/vmmouse.c
+++ b/hw/i386/vmmouse.c
@@ -32,6 +32,8 @@
 #include "cpu.h"
 #include "qom/object.h"
 
+#include "trace.h"
+
 /* debug only vmmouse */
 //#define DEBUG_VMMOUSE
 
@@ -50,11 +52,10 @@
 #define VMMOUSE_RIGHT_BUTTON       0x10
 #define VMMOUSE_MIDDLE_BUTTON      0x08
 
-#ifdef DEBUG_VMMOUSE
-#define DPRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
-#else
-#define DPRINTF(fmt, ...) do { } while (0)
-#endif
+#define VMMOUSE_MIN_X 0
+#define VMMOUSE_MIN_Y 0
+#define VMMOUSE_MAX_X 0xFFFF
+#define VMMOUSE_MAX_Y 0xFFFF
 
 #define TYPE_VMMOUSE "vmmouse"
 OBJECT_DECLARE_SIMPLE_TYPE(VMMouseState, VMMOUSE)
@@ -93,7 +94,8 @@ static void vmmouse_set_data(const uint32_t *data)
 
 static uint32_t vmmouse_get_status(VMMouseState *s)
 {
-    DPRINTF("vmmouse_get_status()\n");
+    trace_vmmouse_get_status();
+
     return (s->status << 16) | s->nb_queue;
 }
 
@@ -105,8 +107,7 @@ static void vmmouse_mouse_event(void *opaque, int x, int y, int dz, int buttons_
     if (s->nb_queue > (VMMOUSE_QUEUE_SIZE - 4))
         return;
 
-    DPRINTF("vmmouse_mouse_event(%d, %d, %d, %d)\n",
-            x, y, dz, buttons_state);
+    trace_vmmouse_mouse_event(x, y, dz, buttons_state);
 
     if ((buttons_state & MOUSE_EVENT_LBUTTON))
         buttons |= VMMOUSE_LEFT_BUTTON;
@@ -116,8 +117,12 @@ static void vmmouse_mouse_event(void *opaque, int x, int y, int dz, int buttons_
         buttons |= VMMOUSE_MIDDLE_BUTTON;
 
     if (s->absolute) {
-        x <<= 1;
-        y <<= 1;
+        x = qemu_input_scale_axis(x,
+                                  INPUT_EVENT_ABS_MIN, INPUT_EVENT_ABS_MAX,
+                                  VMMOUSE_MIN_X, VMMOUSE_MAX_X);
+        y = qemu_input_scale_axis(y,
+                                  INPUT_EVENT_ABS_MIN, INPUT_EVENT_ABS_MAX,
+                                  VMMOUSE_MIN_Y, VMMOUSE_MAX_Y);
     } else{
         /* add for guest vmmouse driver to judge this is a relative packet. */
         buttons |= VMMOUSE_RELATIVE_PACKET;
@@ -160,7 +165,7 @@ static void vmmouse_update_handler(VMMouseState *s, int absolute)
 
 static void vmmouse_read_id(VMMouseState *s)
 {
-    DPRINTF("vmmouse_read_id()\n");
+    trace_vmmouse_read_id();
 
     if (s->nb_queue == VMMOUSE_QUEUE_SIZE)
         return;
@@ -172,19 +177,22 @@ static void vmmouse_read_id(VMMouseState *s)
 
 static void vmmouse_request_relative(VMMouseState *s)
 {
-    DPRINTF("vmmouse_request_relative()\n");
+    trace_vmmouse_request_relative();
+
     vmmouse_update_handler(s, 0);
 }
 
 static void vmmouse_request_absolute(VMMouseState *s)
 {
-    DPRINTF("vmmouse_request_absolute()\n");
+    trace_vmmouse_request_absolute();
+
     vmmouse_update_handler(s, 1);
 }
 
 static void vmmouse_disable(VMMouseState *s)
 {
-    DPRINTF("vmmouse_disable()\n");
+    trace_vmmouse_disable();
+
     s->status = 0xffff;
     vmmouse_remove_handler(s);
 }
@@ -193,7 +201,7 @@ static void vmmouse_data(VMMouseState *s, uint32_t *data, uint32_t size)
 {
     int i;
 
-    DPRINTF("vmmouse_data(%d)\n", size);
+    trace_vmmouse_data(size);
 
     if (size == 0 || size > 6 || size > s->nb_queue) {
         printf("vmmouse: driver requested too much data %d\n", size);
@@ -293,7 +301,7 @@ static void vmmouse_realizefn(DeviceState *dev, Error **errp)
 {
     VMMouseState *s = VMMOUSE(dev);
 
-    DPRINTF("vmmouse_init\n");
+    trace_vmmouse_init();
 
     if (!s->i8042) {
         error_setg(errp, "'i8042' link is not set");
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a88a126123..f034df8bf6 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -129,13 +129,10 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
                                                       ms->smp.max_cpus - 1) + 1;
 
     /*
-     * Can we support APIC ID 255 or higher?
-     *
-     * Under Xen: yes.
-     * With userspace emulated lapic: no
-     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+     * Can we support APIC ID 255 or higher?  With KVM, that requires
+     * both in-kernel lapic and X2APIC userspace API.
      */
-    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+    if (x86ms->apic_id_limit > 255 && kvm_enabled() &&
         (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
         error_report("current -smp configuration requires kernel "
                      "irqchip and X2APIC API support.");
@@ -424,7 +421,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
     cpu->thread_id = topo_ids.smt_id;
 
     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) &&
-        !kvm_hv_vpindex_settable()) {
+        kvm_enabled() && !kvm_hv_vpindex_settable()) {
         error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX");
         return;
     }
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 48d550f633..d0a774bc17 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -41,9 +41,10 @@
 #include "trace.h"
 
 static void check_cmd(AHCIState *s, int port);
-static int handle_cmd(AHCIState *s, int port, uint8_t slot);
+static void handle_cmd(AHCIState *s, int port, uint8_t slot);
 static void ahci_reset_port(AHCIState *s, int port);
-static bool ahci_write_fis_d2h(AHCIDevice *ad);
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i);
+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot);
 static void ahci_init_d2h(AHCIDevice *ad);
 static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit);
 static bool ahci_map_clb_address(AHCIDevice *ad);
@@ -328,6 +329,11 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
         ahci_check_irq(s);
         break;
     case AHCI_PORT_REG_CMD:
+        if ((pr->cmd & PORT_CMD_START) && !(val & PORT_CMD_START)) {
+            pr->scr_act = 0;
+            pr->cmd_issue = 0;
+        }
+
         /* Block any Read-only fields from being set;
          * including LIST_ON and FIS_ON.
          * The spec requires to set ICC bits to zero after the ICC change
@@ -591,9 +597,8 @@ static void check_cmd(AHCIState *s, int port)
 
     if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
         for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
-            if ((pr->cmd_issue & (1U << slot)) &&
-                !handle_cmd(s, port, slot)) {
-                pr->cmd_issue &= ~(1U << slot);
+            if (pr->cmd_issue & (1U << slot)) {
+                handle_cmd(s, port, slot);
             }
         }
     }
@@ -618,7 +623,7 @@ static void ahci_init_d2h(AHCIDevice *ad)
         return;
     }
 
-    if (ahci_write_fis_d2h(ad)) {
+    if (ahci_write_fis_d2h(ad, true)) {
         ad->init_d2h_sent = true;
         /* We're emulating receiving the first Reg H2D Fis from the device;
          * Update the SIG register, but otherwise proceed as normal. */
@@ -801,8 +806,14 @@ static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
     pr->scr_act &= ~ad->finished;
     ad->finished = 0;
 
-    /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
-    if (sdb_fis->flags & 0x40) {
+    /*
+     * TFES IRQ is always raised if ERR_STAT is set, regardless of I bit.
+     * If ERR_STAT is not set, trigger SDBS IRQ if interrupt bit is set
+     * (which currently, it always is).
+     */
+    if (sdb_fis->status & ERR_STAT) {
+        ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_TFES);
+    } else if (sdb_fis->flags & 0x40) {
         ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_SDBS);
     }
 }
@@ -850,7 +861,7 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len, bool pio_fis_i)
     }
 }
 
-static bool ahci_write_fis_d2h(AHCIDevice *ad)
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i)
 {
     AHCIPortRegs *pr = &ad->port_regs;
     uint8_t *d2h_fis;
@@ -864,7 +875,7 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
     d2h_fis = &ad->res_fis[RES_FIS_RFIS];
 
     d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
-    d2h_fis[1] = (1 << 6); /* interrupt bit */
+    d2h_fis[1] = d2h_fis_i ? (1 << 6) : 0; /* interrupt bit */
     d2h_fis[2] = s->status;
     d2h_fis[3] = s->error;
 
@@ -890,7 +901,10 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
         ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_TFES);
     }
 
-    ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    if (d2h_fis_i) {
+        ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    }
+
     return true;
 }
 
@@ -998,7 +1012,6 @@ static void ncq_err(NCQTransferState *ncq_tfs)
 
     ide_state->error = ABRT_ERR;
     ide_state->status = READY_STAT | ERR_STAT;
-    ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
     qemu_sglist_destroy(&ncq_tfs->sglist);
     ncq_tfs->used = 0;
 }
@@ -1008,7 +1021,7 @@ static void ncq_finish(NCQTransferState *ncq_tfs)
     /* If we didn't error out, set our finished bit. Errored commands
      * do not get a bit set for the SDB FIS ACT register, nor do they
      * clear the outstanding bit in scr_act (PxSACT). */
-    if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
+    if (ncq_tfs->used) {
         ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
     }
 
@@ -1120,6 +1133,24 @@ static void process_ncq_command(AHCIState *s, int port, const uint8_t *cmd_fis,
         return;
     }
 
+    /*
+     * A NCQ command clears the bit in PxCI after the command has been QUEUED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For NCQ commands, PxCI will always be cleared here.
+     *
+     * (Once the NCQ command is COMPLETED, the device will send a SDB FIS with
+     * the interrupt bit set, which will clear PxSACT and raise an interrupt.)
+     */
+    ahci_clear_cmd_issue(ad, slot);
+
+    /*
+     * In reality, for NCQ commands, PxCI is cleared after receiving a D2H FIS
+     * without the interrupt bit set, but since ahci_write_fis_d2h() can raise
+     * an IRQ on error, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, false);
+
     ncq_tfs->used = 1;
     ncq_tfs->drive = ad;
     ncq_tfs->slot = slot;
@@ -1192,6 +1223,7 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
 {
     IDEState *ide_state = &s->dev[port].port.ifs[0];
     AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
+    AHCIDevice *ad = &s->dev[port];
     uint16_t opts = le16_to_cpu(cmd->opts);
 
     if (cmd_fis[1] & 0x0F) {
@@ -1268,11 +1300,19 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
     /* Reset transferred byte counter */
     cmd->status = 0;
 
+    /*
+     * A non-NCQ command clears the bit in PxCI after the command has COMPLETED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For non-NCQ commands, PxCI will always be cleared by ahci_cmd_done().
+     */
+    ad->busy_slot = slot;
+
     /* We're ready to process the command in FIS byte 2. */
     ide_bus_exec_cmd(&s->dev[port].port, cmd_fis[2]);
 }
 
-static int handle_cmd(AHCIState *s, int port, uint8_t slot)
+static void handle_cmd(AHCIState *s, int port, uint8_t slot)
 {
     IDEState *ide_state;
     uint64_t tbl_addr;
@@ -1283,12 +1323,12 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
         /* Engine currently busy, try again later */
         trace_handle_cmd_busy(s, port);
-        return -1;
+        return;
     }
 
     if (!s->dev[port].lst) {
         trace_handle_cmd_nolist(s, port);
-        return -1;
+        return;
     }
     cmd = get_cmd_header(s, port, slot);
     /* remember current slot handle for later */
@@ -1298,7 +1338,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
     ide_state = &s->dev[port].port.ifs[0];
     if (!ide_state->blk) {
         trace_handle_cmd_badport(s, port);
-        return -1;
+        return;
     }
 
     tbl_addr = le64_to_cpu(cmd->tbl_addr);
@@ -1307,7 +1347,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
                              DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
     if (!cmd_fis) {
         trace_handle_cmd_badfis(s, port);
-        return -1;
+        return;
     } else if (cmd_len != 0x80) {
         ahci_trigger_irq(s, &s->dev[port], AHCI_PORT_IRQ_BIT_HBFS);
         trace_handle_cmd_badmap(s, port, cmd_len);
@@ -1331,15 +1371,6 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
 out:
     dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_TO_DEVICE,
                      cmd_len);
-
-    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
-        /* async command, complete later */
-        s->dev[port].busy_slot = slot;
-        return -1;
-    }
-
-    /* done handling the command */
-    return 0;
 }
 
 /* Transfer PIO data between RAM and device */
@@ -1493,22 +1524,39 @@ static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write)
     return 1;
 }
 
+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot)
+{
+    IDEState *ide_state = &ad->port.ifs[0];
+
+    if (!(ide_state->status & ERR_STAT) &&
+        !(ide_state->status & (BUSY_STAT | DRQ_STAT))) {
+        ad->port_regs.cmd_issue &= ~(1 << slot);
+    }
+}
+
+/* Non-NCQ command is done - This function is never called for NCQ commands. */
 static void ahci_cmd_done(const IDEDMA *dma)
 {
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+    IDEState *ide_state = &ad->port.ifs[0];
 
     trace_ahci_cmd_done(ad->hba, ad->port_no);
 
     /* no longer busy */
     if (ad->busy_slot != -1) {
-        ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
+        ahci_clear_cmd_issue(ad, ad->busy_slot);
         ad->busy_slot = -1;
     }
 
-    /* update d2h status */
-    ahci_write_fis_d2h(ad);
+    /*
+     * In reality, for non-NCQ commands, PxCI is cleared after receiving a D2H
+     * FIS with the interrupt bit set, but since ahci_write_fis_d2h() will raise
+     * an IRQ, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, true);
 
-    if (ad->port_regs.cmd_issue && !ad->check_bh) {
+    if (!(ide_state->status & ERR_STAT) &&
+        ad->port_regs.cmd_issue && !ad->check_bh) {
         ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
                                            &ad->mem_reentrancy_guard);
         qemu_bh_schedule(ad->check_bh);
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 2480455372..c244bbd8be 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -61,13 +61,13 @@ enum AHCIHostReg {
     AHCI_HOST_REG_CTL        = 1,  /* GHC: global host control */
     AHCI_HOST_REG_IRQ_STAT   = 2,  /* IS: interrupt status */
     AHCI_HOST_REG_PORTS_IMPL = 3,  /* PI: bitmap of implemented ports */
-    AHCI_HOST_REG_VERSION    = 4,  /* VS: AHCI spec. version compliancy */
+    AHCI_HOST_REG_VERSION    = 4,  /* VS: AHCI spec. version compliance */
     AHCI_HOST_REG_CCC_CTL    = 5,  /* CCC_CTL: CCC Control */
     AHCI_HOST_REG_CCC_PORTS  = 6,  /* CCC_PORTS: CCC Ports */
     AHCI_HOST_REG_EM_LOC     = 7,  /* EM_LOC: Enclosure Mgmt Location */
     AHCI_HOST_REG_EM_CTL     = 8,  /* EM_CTL: Enclosure Mgmt Control */
     AHCI_HOST_REG_CAP2       = 9,  /* CAP2: host capabilities, extended */
-    AHCI_HOST_REG_BOHC       = 10, /* BOHC: firmare/os handoff ctrl & status */
+    AHCI_HOST_REG_BOHC       = 10, /* BOHC: firmware/os handoff ctrl & status */
     AHCI_HOST_REG__COUNT     = 11
 };
 
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index cabe9048b1..c0bcfa4414 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -257,7 +257,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp)
 
     pci_conf[CNTRL] = CNTRL_EN_CH0; // enable IDE0
     if (d->secondary) {
-        /* XXX: if not enabled, really disable the seconday IDE controller */
+        /* XXX: if not enabled, really disable the secondary IDE controller */
         pci_conf[CNTRL] |= CNTRL_EN_CH1; /* enable IDE1 */
     }
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index de48ff9f86..b5e0dcd29b 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -533,9 +533,9 @@ BlockAIOCB *ide_issue_trim(
 
 void ide_abort_command(IDEState *s)
 {
-    ide_transfer_stop(s);
     s->status = READY_STAT | ERR_STAT;
     s->error = ABRT_ERR;
+    ide_transfer_stop(s);
 }
 
 static void ide_set_retry(IDEState *s)
@@ -1698,7 +1698,7 @@ static bool cmd_set_features(IDEState *s, uint8_t cmd)
                 put_le16(identify_data + 63, 0x07);
                 put_le16(identify_data + 88, 0x3f);
                 break;
-            case 0x02: /* sigle word dma mode*/
+            case 0x02: /* single word dma mode */
                 put_le16(identify_data + 62, 0x07 | (1 << (val + 8)));
                 put_le16(identify_data + 63, 0x07);
                 put_le16(identify_data + 88, 0x3f);
diff --git a/hw/input/hid.c b/hw/input/hid.c
index e7ecebdf8f..a9c7dd1ce1 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -209,7 +209,7 @@ static void hid_pointer_sync(DeviceState *dev)
         prev->dz += curr->dz;
         curr->dz = 0;
     } else {
-        /* prepate next (clear rel, copy abs + btns) */
+        /* prepare next (clear rel, copy abs + btns) */
         if (hs->kind == HID_MOUSE) {
             next->xdx = 0;
             next->ydy = 0;
diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c
index 555b677173..db2b80e35f 100644
--- a/hw/input/tsc2005.c
+++ b/hw/input/tsc2005.c
@@ -157,14 +157,14 @@ static uint16_t tsc2005_read(TSC2005State *s, int reg)
         s->reset = true;
         return ret;
 
-    case 0x8:	/* AUX high treshold */
+    case 0x8:   /* AUX high threshold */
         return s->aux_thr[1];
-    case 0x9:	/* AUX low treshold */
+    case 0x9:   /* AUX low threshold */
         return s->aux_thr[0];
 
-    case 0xa:	/* TEMP high treshold */
+    case 0xa:   /* TEMP high threshold */
         return s->temp_thr[1];
-    case 0xb:	/* TEMP low treshold */
+    case 0xb:   /* TEMP low threshold */
         return s->temp_thr[0];
 
     case 0xc:	/* CFR0 */
@@ -186,17 +186,17 @@ static uint16_t tsc2005_read(TSC2005State *s, int reg)
 static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
 {
     switch (reg) {
-    case 0x8:	/* AUX high treshold */
+    case 0x8:   /* AUX high threshold */
         s->aux_thr[1] = data;
         break;
-    case 0x9:	/* AUX low treshold */
+    case 0x9:   /* AUX low threshold */
         s->aux_thr[0] = data;
         break;
 
-    case 0xa:	/* TEMP high treshold */
+    case 0xa:   /* TEMP high threshold */
         s->temp_thr[1] = data;
         break;
-    case 0xb:	/* TEMP low treshold */
+    case 0xb:   /* TEMP low threshold */
         s->temp_thr[0] = data;
         break;
 
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4a34f03047..68ad30e2f5 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -28,7 +28,6 @@
 #include "hw/intc/kvm_irqcount.h"
 #include "trace.h"
 #include "hw/boards.h"
-#include "sysemu/hax.h"
 #include "sysemu/kvm.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
@@ -271,7 +270,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
 
     /* Note: We need at least 1M to map the VAPIC option ROM */
     if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
-        !hax_enabled() && current_machine->ram_size >= 1024 * 1024) {
+            current_machine->ram_size >= 1024 * 1024) {
         vapic = sysbus_create_simple("kvmvapic", -1, NULL);
     }
     s->vapic = vapic;
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index 43dfd7a35c..5f552b4d37 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -330,23 +330,20 @@ static MemTxResult get_vte(GICv3ITSState *s, uint32_t vpeid, VTEntry *vte)
     if (entry_addr == -1) {
         /* No L2 table entry, i.e. no valid VTE, or a memory error */
         vte->valid = false;
-        goto out;
+        trace_gicv3_its_vte_read_fault(vpeid);
+        return MEMTX_OK;
     }
     vteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res);
     if (res != MEMTX_OK) {
-        goto out;
+        trace_gicv3_its_vte_read_fault(vpeid);
+        return res;
     }
     vte->valid = FIELD_EX64(vteval, VTE, VALID);
     vte->vptsize = FIELD_EX64(vteval, VTE, VPTSIZE);
     vte->vptaddr = FIELD_EX64(vteval, VTE, VPTADDR);
     vte->rdbase = FIELD_EX64(vteval, VTE, RDBASE);
-out:
-    if (res != MEMTX_OK) {
-        trace_gicv3_its_vte_read_fault(vpeid);
-    } else {
-        trace_gicv3_its_vte_read(vpeid, vte->valid, vte->vptsize,
-                                 vte->vptaddr, vte->rdbase);
-    }
+    trace_gicv3_its_vte_read(vpeid, vte->valid, vte->vptsize,
+                             vte->vptaddr, vte->rdbase);
     return res;
 }
 
diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index af75460643..24fb3af8cc 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -191,7 +191,7 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr,
         cpu = attrs.requester_id;
         old_data = s->coreisr[cpu][index];
         s->coreisr[cpu][index] = old_data & ~val;
-        /* write 1 to clear interrrupt */
+        /* write 1 to clear interrupt */
         old_data &= val;
         irq = ctz32(old_data);
         while (irq != 32) {
diff --git a/hw/intc/loongson_liointc.c b/hw/intc/loongson_liointc.c
index cc11b544cb..c10fb97a06 100644
--- a/hw/intc/loongson_liointc.c
+++ b/hw/intc/loongson_liointc.c
@@ -1,5 +1,5 @@
 /*
- * QEMU Loongson Local I/O interrupt controler.
+ * QEMU Loongson Local I/O interrupt controller.
  *
  * Copyright (c) 2020 Huacai Chen <chenhc@lemote.com>
  * Copyright (c) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
diff --git a/hw/intc/omap_intc.c b/hw/intc/omap_intc.c
index 647bf324a8..435c47600f 100644
--- a/hw/intc/omap_intc.c
+++ b/hw/intc/omap_intc.c
@@ -68,7 +68,7 @@ static void omap_inth_sir_update(OMAPIntcState *s, int is_fiq)
     p_intr = 255;
 
     /* Find the interrupt line with the highest dynamic priority.
-     * Note: 0 denotes the hightest priority.
+     * Note: 0 denotes the highest priority.
      * If all interrupts have the same priority, the default order is IRQ_N,
      * IRQ_N-1,...,IRQ_0. */
     for (j = 0; j < s->nbanks; ++j) {
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index e536b3ec26..da10deceb8 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -84,28 +84,6 @@ static uint8_t pnv_xive_block_id(PnvXive *xive)
     return blk;
 }
 
-/*
- * Remote access to controllers. HW uses MMIOs. For now, a simple scan
- * of the chips is good enough.
- *
- * TODO: Block scope support
- */
-static PnvXive *pnv_xive_get_remote(uint8_t blk)
-{
-    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
-    int i;
-
-    for (i = 0; i < pnv->num_chips; i++) {
-        Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
-        PnvXive *xive = &chip9->xive;
-
-        if (pnv_xive_block_id(xive) == blk) {
-            return xive;
-        }
-    }
-    return NULL;
-}
-
 /*
  * VST accessors for SBE, EAT, ENDT, NVT
  *
@@ -209,6 +187,42 @@ static uint64_t pnv_xive_vst_addr_indirect(PnvXive *xive, uint32_t type,
     return pnv_xive_vst_addr_direct(xive, type, vsd, (idx % vst_per_page));
 }
 
+/*
+ * This is a simplified model of operation forwarding on a remote IC.
+ *
+ * A PC MMIO address is built to identify the NVT structure. The load
+ * on the remote IC will return the address of the structure in RAM,
+ * which will then be used by pnv_xive_vst_write/read to perform the
+ * RAM operation.
+ */
+static uint64_t pnv_xive_vst_addr_remote(PnvXive *xive, uint32_t type,
+                                         uint64_t vsd, uint8_t blk,
+                                         uint32_t idx)
+{
+    const XiveVstInfo *info = &vst_infos[type];
+    uint64_t remote_addr = vsd & VSD_ADDRESS_MASK;
+    uint64_t vst_addr;
+    MemTxResult result;
+
+    if (type != VST_TSEL_VPDT) {
+        xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?",
+                   info->name, blk, idx);
+        return 0;
+    }
+
+    remote_addr |= ((uint64_t)idx) << xive->pc_shift;
+
+    vst_addr = address_space_ldq_be(&address_space_memory, remote_addr,
+                                    MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        xive_error(xive, "VST: read failed at @0x%"  HWADDR_PRIx
+                   " for NVT %x/%x\n", remote_addr, blk, idx);
+        return 0;
+    }
+
+    return vst_addr;
+}
+
 static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk,
                                   uint32_t idx)
 {
@@ -225,9 +239,7 @@ static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk,
 
     /* Remote VST access */
     if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
-        xive = pnv_xive_get_remote(blk);
-
-        return xive ? pnv_xive_vst_addr(xive, type, blk, idx) : 0;
+        return pnv_xive_vst_addr_remote(xive, type, vsd, blk, idx);
     }
 
     if (VSD_INDIRECT & vsd) {
@@ -242,12 +254,20 @@ static int pnv_xive_vst_read(PnvXive *xive, uint32_t type, uint8_t blk,
 {
     const XiveVstInfo *info = &vst_infos[type];
     uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx);
+    MemTxResult result;
 
     if (!addr) {
         return -1;
     }
 
-    cpu_physical_memory_read(addr, data, info->size);
+    result = address_space_read(&address_space_memory, addr,
+                                MEMTXATTRS_UNSPECIFIED, data,
+                                info->size);
+    if (result != MEMTX_OK) {
+        xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx
+                   " for VST %s %x/%x\n", addr, info->name, blk, idx);
+        return -1;
+    }
     return 0;
 }
 
@@ -258,16 +278,27 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk,
 {
     const XiveVstInfo *info = &vst_infos[type];
     uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx);
+    MemTxResult result;
 
     if (!addr) {
         return -1;
     }
 
     if (word_number == XIVE_VST_WORD_ALL) {
-        cpu_physical_memory_write(addr, data, info->size);
+        result = address_space_write(&address_space_memory, addr,
+                                     MEMTXATTRS_UNSPECIFIED, data,
+                                     info->size);
     } else {
-        cpu_physical_memory_write(addr + word_number * 4,
-                                  data + word_number * 4, 4);
+        result = address_space_write(&address_space_memory,
+                                     addr + word_number * 4,
+                                     MEMTXATTRS_UNSPECIFIED,
+                                     data + word_number * 4, 4);
+    }
+
+    if (result != MEMTX_OK) {
+        xive_error(xive, "VST: write failed at @0x%" HWADDR_PRIx
+                    "for VST %s %x/%x\n", addr, info->name, blk, idx);
+        return -1;
     }
     return 0;
 }
@@ -275,12 +306,26 @@ static int pnv_xive_vst_write(PnvXive *xive, uint32_t type, uint8_t blk,
 static int pnv_xive_get_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx,
                             XiveEND *end)
 {
+    PnvXive *xive = PNV_XIVE(xrtr);
+
+    if (pnv_xive_block_id(xive) != blk) {
+        xive_error(xive, "VST: END %x/%x is remote !?", blk, idx);
+        return -1;
+    }
+
     return pnv_xive_vst_read(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end);
 }
 
 static int pnv_xive_write_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx,
                               XiveEND *end, uint8_t word_number)
 {
+    PnvXive *xive = PNV_XIVE(xrtr);
+
+    if (pnv_xive_block_id(xive) != blk) {
+        xive_error(xive, "VST: END %x/%x is remote !?", blk, idx);
+        return -1;
+    }
+
     return pnv_xive_vst_write(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end,
                               word_number);
 }
@@ -943,7 +988,7 @@ static void pnv_xive_ic_reg_write(void *opaque, hwaddr offset,
      */
     case VC_SBC_CONFIG: /* Store EOI configuration */
         /*
-         * Configure store EOI if required by firwmare (skiboot has removed
+         * Configure store EOI if required by firmware (skiboot has removed
          * support recently though)
          */
         if (val & (VC_SBC_CONF_CPLX_CIST | VC_SBC_CONF_CIST_BOTH)) {
@@ -1349,6 +1394,50 @@ static const MemoryRegionOps pnv_xive_ic_reg_ops = {
 #define PNV_XIVE_SYNC_PUSH          0xf00 /* Sync push context */
 #define PNV_XIVE_SYNC_VPC           0xf80 /* Sync remove VPC store */
 
+static void pnv_xive_end_notify(XiveRouter *xrtr, XiveEAS *eas)
+{
+    PnvXive *xive = PNV_XIVE(xrtr);
+    uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+    uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+    uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w);
+    uint64_t end_vsd = xive->vsds[VST_TSEL_EQDT][end_blk];
+
+    switch (GETFIELD(VSD_MODE, end_vsd)) {
+    case VSD_MODE_EXCLUSIVE:
+        /* Perform the END notification on the local IC. */
+        xive_router_end_notify(xrtr, eas);
+        break;
+
+    case VSD_MODE_FORWARD: {
+        MemTxResult result;
+        uint64_t notif_port = end_vsd & VSD_ADDRESS_MASK;
+        uint64_t data = XIVE_TRIGGER_END | XIVE_TRIGGER_PQ |
+            be64_to_cpu(eas->w);
+
+        /* Forward the store on the remote IC notify page. */
+        address_space_stq_be(&address_space_memory, notif_port, data,
+                             MEMTXATTRS_UNSPECIFIED, &result);
+        if (result != MEMTX_OK) {
+            xive_error(xive, "IC: Forward notif END %x/%x [%x] failed @%"
+                       HWADDR_PRIx, end_blk, end_idx, end_data, notif_port);
+            return;
+        }
+        break;
+    }
+
+    case VSD_MODE_INVALID:
+    default:
+        /* Set FIR */
+        xive_error(xive, "IC: Invalid END VSD for block %x", end_blk);
+        return;
+    }
+}
+
+/*
+ * The notify page can either be used to receive trigger events from
+ * the HW controllers (PHB, PSI) or to reroute interrupts between
+ * Interrupt controllers.
+ */
 static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val)
 {
     uint8_t blk;
@@ -1357,8 +1446,8 @@ static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val)
     trace_pnv_xive_ic_hw_trigger(addr, val);
 
     if (val & XIVE_TRIGGER_END) {
-        xive_error(xive, "IC: END trigger at @0x%"HWADDR_PRIx" data 0x%"PRIx64,
-                   addr, val);
+        val = cpu_to_be64(val);
+        pnv_xive_end_notify(XIVE_ROUTER(xive), (XiveEAS *) &val);
         return;
     }
 
@@ -1703,16 +1792,20 @@ static const MemoryRegionOps pnv_xive_vc_ops = {
 };
 
 /*
- * Presenter Controller MMIO region. The Virtualization Controller
- * updates the IPB in the NVT table when required. Not modeled.
+ * Presenter Controller MMIO region. Points to the NVT sets.
+ *
+ * HW implements all possible mem ops to the underlying NVT structure
+ * but QEMU does not need to be so precise. The model implementation
+ * simply returns the RAM address of the NVT structure which is then
+ * used by pnv_xive_vst_write/read to perform the RAM operation.
  */
-static uint64_t pnv_xive_pc_read(void *opaque, hwaddr addr,
-                                 unsigned size)
+static uint64_t pnv_xive_pc_read(void *opaque, hwaddr offset, unsigned size)
 {
     PnvXive *xive = PNV_XIVE(opaque);
+    uint32_t nvt_idx = offset >> xive->pc_shift;
+    uint8_t blk = pnv_xive_block_id(xive); /* TODO: VDT -> block xlate */
 
-    xive_error(xive, "PC: invalid read @%"HWADDR_PRIx, addr);
-    return -1;
+    return pnv_xive_vst_addr(xive, VST_TSEL_VPDT, blk, nvt_idx);
 }
 
 static void pnv_xive_pc_write(void *opaque, hwaddr addr,
@@ -1898,6 +1991,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&xive->ic_notify_mmio, OBJECT(dev),
                           &pnv_xive_ic_notify_ops,
                           xive, "xive-ic-notify", 1 << xive->ic_shift);
+    xive->ic_notify_mmio.disable_reentrancy_guard = true;
 
     /* The Pervasive LSI trigger and EOI pages (not modeled) */
     memory_region_init_io(&xive->ic_lsi_mmio, OBJECT(dev), &pnv_xive_ic_lsi_ops,
@@ -1933,6 +2027,7 @@ static void pnv_xive_realize(DeviceState *dev, Error **errp)
     /* Presenter Controller MMIO region (not modeled) */
     memory_region_init_io(&xive->pc_mmio, OBJECT(xive), &pnv_xive_pc_ops, xive,
                           "xive-pc", PNV9_XIVE_PC_SIZE);
+    xive->pc_mmio.disable_reentrancy_guard = true;
 
     /* Thread Interrupt Management Area (Direct) */
     memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &pnv_xive_tm_ops,
@@ -1998,6 +2093,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data)
     xrc->get_nvt = pnv_xive_get_nvt;
     xrc->write_nvt = pnv_xive_write_nvt;
     xrc->get_block_id = pnv_xive_get_block_id;
+    xrc->end_notify = pnv_xive_end_notify;
 
     xnc->notify = pnv_xive_notify;
     xpc->match_nvt  = pnv_xive_match_nvt;
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index bbb44a533c..4b8d0a5d81 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -240,12 +240,20 @@ static int pnv_xive2_vst_read(PnvXive2 *xive, uint32_t type, uint8_t blk,
 {
     const XiveVstInfo *info = &vst_infos[type];
     uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx);
+    MemTxResult result;
 
     if (!addr) {
         return -1;
     }
 
-    cpu_physical_memory_read(addr, data, info->size);
+    result = address_space_read(&address_space_memory, addr,
+                                MEMTXATTRS_UNSPECIFIED, data,
+                                info->size);
+    if (result != MEMTX_OK) {
+        xive2_error(xive, "VST: read failed at @0x%" HWADDR_PRIx
+                   " for VST %s %x/%x\n", addr, info->name, blk, idx);
+        return -1;
+    }
     return 0;
 }
 
@@ -256,16 +264,27 @@ static int pnv_xive2_vst_write(PnvXive2 *xive, uint32_t type, uint8_t blk,
 {
     const XiveVstInfo *info = &vst_infos[type];
     uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx);
+    MemTxResult result;
 
     if (!addr) {
         return -1;
     }
 
     if (word_number == XIVE_VST_WORD_ALL) {
-        cpu_physical_memory_write(addr, data, info->size);
+        result = address_space_write(&address_space_memory, addr,
+                                     MEMTXATTRS_UNSPECIFIED, data,
+                                     info->size);
     } else {
-        cpu_physical_memory_write(addr + word_number * 4,
-                                  data + word_number * 4, 4);
+        result = address_space_write(&address_space_memory,
+                                     addr + word_number * 4,
+                                     MEMTXATTRS_UNSPECIFIED,
+                                     data + word_number * 4, 4);
+    }
+
+    if (result != MEMTX_OK) {
+        xive2_error(xive, "VST: write failed at @0x%" HWADDR_PRIx
+                   "for VST %s %x/%x\n", addr, info->name, blk, idx);
+        return -1;
     }
     return 0;
 }
diff --git a/hw/intc/pnv_xive_regs.h b/hw/intc/pnv_xive_regs.h
index c78f030c02..793847638b 100644
--- a/hw/intc/pnv_xive_regs.h
+++ b/hw/intc/pnv_xive_regs.h
@@ -228,6 +228,7 @@
  *       VSD and is only meant to be used in indirect mode !
  */
 #define VSD_MODE                PPC_BITMASK(0, 1)
+#define  VSD_MODE_INVALID       0
 #define  VSD_MODE_SHARED        1
 #define  VSD_MODE_EXCLUSIVE     2
 #define  VSD_MODE_FORWARD       3
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index b466a6abaf..25cf7a5d9d 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -64,13 +64,13 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
     uint64_t next;
     uint64_t diff;
 
-    uint64_t rtc_r = cpu_riscv_read_rtc(mtimer);
+    uint64_t rtc = cpu_riscv_read_rtc(mtimer);
 
     /* Compute the relative hartid w.r.t the socket */
     hartid = hartid - mtimer->hartid_base;
 
     mtimer->timecmp[hartid] = value;
-    if (mtimer->timecmp[hartid] <= rtc_r) {
+    if (mtimer->timecmp[hartid] <= rtc) {
         /*
          * If we're setting an MTIMECMP value in the "past",
          * immediately raise the timer interrupt
@@ -81,7 +81,7 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
 
     /* otherwise, set up the future timer interrupt */
     qemu_irq_lower(mtimer->timer_irqs[hartid]);
-    diff = mtimer->timecmp[hartid] - rtc_r;
+    diff = mtimer->timecmp[hartid] - rtc;
     /* back to ns (note args switched in muldiv64) */
     uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
 
@@ -208,11 +208,12 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
         return;
     } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) {
         uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq);
+        uint64_t rtc = cpu_riscv_read_rtc(mtimer);
 
         if (addr == mtimer->time_base) {
             if (size == 4) {
                 /* time_lo for RV32/RV64 */
-                mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r;
+                mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r;
             } else {
                 /* time for RV64 */
                 mtimer->time_delta = value - rtc_r;
@@ -220,7 +221,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
         } else {
             if (size == 4) {
                 /* time_hi for RV32/RV64 */
-                mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r;
+                mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r;
             } else {
                 qemu_log_mask(LOG_GUEST_ERROR,
                               "aclint-mtimer: invalid time_hi write: %08x",
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 4bdc6a5d1a..99aae8ccbe 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -31,6 +31,8 @@
 #include "hw/irq.h"
 #include "target/riscv/cpu.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+#include "kvm_riscv.h"
 #include "migration/vmstate.h"
 
 #define APLIC_MAX_IDC                  (1UL << 14)
@@ -148,6 +150,15 @@
 
 #define APLIC_IDC_CLAIMI               0x1c
 
+/*
+ * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use
+ * APLIC Wired.
+ */
+static bool is_kvm_aia(bool msimode)
+{
+    return kvm_irqchip_in_kernel() && msimode;
+}
+
 static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic,
                                             uint32_t word)
 {
@@ -801,29 +812,35 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
     uint32_t i;
     RISCVAPLICState *aplic = RISCV_APLIC(dev);
 
-    aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
-    aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
-    aplic->state = g_new0(uint32_t, aplic->num_irqs);
-    aplic->target = g_new0(uint32_t, aplic->num_irqs);
-    if (!aplic->msimode) {
-        for (i = 0; i < aplic->num_irqs; i++) {
-            aplic->target[i] = 1;
+    if (!is_kvm_aia(aplic->msimode)) {
+        aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
+        aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
+        aplic->state = g_new0(uint32_t, aplic->num_irqs);
+        aplic->target = g_new0(uint32_t, aplic->num_irqs);
+        if (!aplic->msimode) {
+            for (i = 0; i < aplic->num_irqs; i++) {
+                aplic->target[i] = 1;
+            }
         }
-    }
-    aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
-    aplic->iforce = g_new0(uint32_t, aplic->num_harts);
-    aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
+        aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
+        aplic->iforce = g_new0(uint32_t, aplic->num_harts);
+        aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
 
-    memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic,
-                          TYPE_RISCV_APLIC, aplic->aperture_size);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
+        memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops,
+                              aplic, TYPE_RISCV_APLIC, aplic->aperture_size);
+        sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
+    }
 
     /*
      * Only root APLICs have hardware IRQ lines. All non-root APLICs
      * have IRQ lines delegated by their parent APLIC.
      */
     if (!aplic->parent) {
-        qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
+        if (kvm_enabled() && is_kvm_aia(aplic->msimode)) {
+            qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs);
+        } else {
+            qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
+        }
     }
 
     /* Create output IRQ lines for non-MSI mode */
@@ -958,7 +975,10 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
     qdev_prop_set_bit(dev, "mmode", mmode);
 
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+
+    if (!is_kvm_aia(msimode)) {
+        sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+    }
 
     if (parent) {
         riscv_aplic_add_child(parent, dev);
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index fea3385b51..760dbddcf7 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -32,6 +32,7 @@
 #include "target/riscv/cpu.h"
 #include "target/riscv/cpu_bits.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
 #include "migration/vmstate.h"
 
 #define IMSIC_MMIO_PAGE_LE             0x00
@@ -283,6 +284,20 @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value,
         goto err;
     }
 
+#if defined(CONFIG_KVM)
+    if (kvm_irqchip_in_kernel()) {
+        struct kvm_msi msi;
+
+        msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32);
+        msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32);
+        msi.data = le32_to_cpu(value);
+
+        kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
+
+        return;
+    }
+#endif
+
     /* Writes only supported for MSI little-endian registers */
     page = addr >> IMSIC_MMIO_PAGE_SHIFT;
     if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) {
@@ -320,10 +335,12 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
     CPUState *cpu = cpu_by_arch_id(imsic->hartid);
     CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
 
-    imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
-    imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
-    imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
-    imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
+    if (!kvm_irqchip_in_kernel()) {
+        imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
+        imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
+        imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
+        imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
+    }
 
     memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops,
                           imsic, TYPE_RISCV_IMSIC,
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 8bcab2846c..7f701d414b 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -27,7 +27,7 @@
 #include "trace.h"
 
 /*
- * XIVE Virtualization Controller BAR and Thread Managment BAR that we
+ * XIVE Virtualization Controller BAR and Thread Management BAR that we
  * use for the ESB pages and the TIMA pages
  */
 #define SPAPR_XIVE_VC_BASE   0x0006010000000000ull
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 61fe7bd2d3..5789062379 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -485,7 +485,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
  *
  * Whenever the VM is stopped, the VM change handler sets the source
  * PQs to PENDING to stop the flow of events and to possibly catch a
- * triggered interrupt occuring while the VM is stopped. The previous
+ * triggered interrupt occurring while the VM is stopped. The previous
  * state is saved in anticipation of a migration. The XIVE controller
  * is then synced through KVM to flush any in-flight event
  * notification and stabilize the EQs.
@@ -551,7 +551,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, bool running,
 
         /*
          * PQ is set to PENDING to possibly catch a triggered
-         * interrupt occuring while the VM is stopped (hotplug event
+         * interrupt occurring while the VM is stopped (hotplug event
          * for instance) .
          */
         if (pq != XIVE_ESB_OFF) {
@@ -633,7 +633,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     /* The KVM XIVE device should be in use */
     assert(xive->fd != -1);
 
-    /* Restore the ENDT first. The targetting depends on it. */
+    /* Restore the ENDT first. The targeting depends on it. */
     for (i = 0; i < xive->nr_ends; i++) {
         if (!xive_end_is_valid(&xive->endt[i])) {
             continue;
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 56670b2cac..a3585593d8 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1518,6 +1518,13 @@ static void xive_router_realize(DeviceState *dev, Error **errp)
     assert(xrtr->xfb);
 }
 
+static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
+{
+    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+    return xrc->end_notify(xrtr, eas);
+}
+
 /*
  * Encode the HW CAM line in the block group mode format :
  *
@@ -1601,7 +1608,7 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
  *
  * It receives notification requests sent by the IVRE to find one
  * matching NVT (or more) dispatched on the processor threads. In case
- * of a single NVT notification, the process is abreviated and the
+ * of a single NVT notification, the process is abbreviated and the
  * thread is signaled if a match is found. In case of a logical server
  * notification (bits ignored at the end of the NVT identifier), the
  * IVPE and IVRE select a winning thread using different filters. This
@@ -1664,8 +1671,7 @@ static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
  * another chip. We don't model the PowerBus but the END trigger
  * message has the same parameters than in the function below.
  */
-static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
-                                   uint32_t end_idx, uint32_t end_data)
+void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
 {
     XiveEND end;
     uint8_t priority;
@@ -1675,6 +1681,10 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
     XiveNVT nvt;
     bool found;
 
+    uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+    uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+    uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
+
     /* END cache lookup */
     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
@@ -1817,10 +1827,7 @@ do_escalation:
     /*
      * The END trigger becomes an Escalation trigger
      */
-    xive_router_end_notify(xrtr,
-                           xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
-                           xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
-                           xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
+    xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
 }
 
 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
@@ -1871,10 +1878,7 @@ void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
     /*
      * The event trigger becomes an END trigger
      */
-    xive_router_end_notify(xrtr,
-                           xive_get_field64(EAS_END_BLOCK, eas.w),
-                           xive_get_field64(EAS_END_INDEX, eas.w),
-                           xive_get_field64(EAS_END_DATA,  eas.w));
+    xive_router_end_notify_handler(xrtr, &eas);
 }
 
 static Property xive_router_properties[] = {
@@ -1887,12 +1891,16 @@ static void xive_router_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+    XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
 
     dc->desc    = "XIVE Router Engine";
     device_class_set_props(dc, xive_router_properties);
     /* Parent is SysBusDeviceClass. No need to call its realize hook */
     dc->realize = xive_router_realize;
     xnc->notify = xive_router_notify;
+
+    /* By default, the router handles END triggers locally */
+    xrc->end_notify = xive_router_end_notify;
 }
 
 static const TypeInfo xive_router_info = {
diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index c37ef25d44..98c0d8ba44 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c
@@ -542,7 +542,7 @@ static void xive2_router_realize(DeviceState *dev, Error **errp)
 
 /*
  * Notification using the END ESe/ESn bit (Event State Buffer for
- * escalation and notification). Profide futher coalescing in the
+ * escalation and notification). Profide further coalescing in the
  * Router.
  */
 static bool xive2_router_end_es_notify(Xive2Router *xrtr, uint8_t end_blk,
@@ -621,7 +621,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
 
     /*
      * Check the END ESn (Event State Buffer for notification) for
-     * even futher coalescing in the Router
+     * even further coalescing in the Router
      */
     if (!xive2_end_is_notify(&end)) {
         /* ESn[Q]=1 : end of notification */
@@ -702,7 +702,7 @@ do_escalation:
 
     /*
      * Check the END ESe (Event State Buffer for escalation) for even
-     * futher coalescing in the Router
+     * further coalescing in the Router
      */
     if (!xive2_end_is_uncond_escalation(&end)) {
         /* ESe[Q]=1 : end of escalation notification */
diff --git a/hw/ipmi/ipmi_bmc_extern.c b/hw/ipmi/ipmi_bmc_extern.c
index acf2bab35f..e232d35ba2 100644
--- a/hw/ipmi/ipmi_bmc_extern.c
+++ b/hw/ipmi/ipmi_bmc_extern.c
@@ -301,7 +301,7 @@ static void handle_msg(IPMIBmcExtern *ibe)
         ipmi_debug("msg checksum failure\n");
         return;
     } else {
-        ibe->inpos--; /* Remove checkum */
+        ibe->inpos--; /* Remove checksum */
     }
 
     timer_del(ibe->extern_timer);
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 4e314748d3..4cdcb3f7e7 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1,3 +1,14 @@
+/*
+ * CXL Type 3 (memory expander) device
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
+
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qemu/error-report.h"
@@ -538,7 +549,7 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
                                      FIRST_ERROR_POINTER, cxl_err->type);
             } else {
                 /*
-                 * If no more errors, then follow recomendation of PCI spec
+                 * If no more errors, then follow recommendation of PCI spec
                  * r6.0 6.2.4.2 to set the first error pointer to a status
                  * bit that will never be used.
                  */
@@ -697,7 +708,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
                          PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &ct3d->cxl_dstate.device_registers);
 
-    /* MSI(-X) Initailization */
+    /* MSI(-X) Initialization */
     rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
     if (rc) {
         goto err_address_space_free;
@@ -706,7 +717,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
         msix_vector_use(pci_dev, i);
     }
 
-    /* DOE Initailization */
+    /* DOE Initialization */
     pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
 
     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index f3e4a9fa72..8ba5d3d1f7 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -1,3 +1,13 @@
+/*
+ * CXL Type 3 (memory expander) device QMP stubs
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 31080c22c9..1631a7d13f 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -154,6 +154,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
                    object_get_canonical_path_component(OBJECT(hostmem)));
         return;
     }
+    if (memory_region_is_rom(mr)) {
+        nvdimm->readonly = true;
+    }
 
     nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
@@ -207,15 +210,16 @@ static void nvdimm_unrealize(PCDIMMDevice *dimm)
  * label read/write functions.
  */
 static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
-                                        uint64_t offset)
+                                        uint64_t offset, bool is_write)
 {
     assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
+    assert(!is_write || !nvdimm->readonly);
 }
 
 static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
                                    uint64_t size, uint64_t offset)
 {
-    nvdimm_validate_rw_label_data(nvdimm, size, offset);
+    nvdimm_validate_rw_label_data(nvdimm, size, offset, false);
 
     memcpy(buf, nvdimm->label_data + offset, size);
 }
@@ -229,7 +233,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
                                             "pmem", NULL);
     uint64_t backend_offset;
 
-    nvdimm_validate_rw_label_data(nvdimm, size, offset);
+    nvdimm_validate_rw_label_data(nvdimm, size, offset, true);
 
     if (!is_pmem) {
         memcpy(nvdimm->label_data + offset, buf, size);
diff --git a/hw/meson.build b/hw/meson.build
index c7ac7d3d75..f01fac4617 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -37,6 +37,7 @@ subdir('smbios')
 subdir('ssi')
 subdir('timer')
 subdir('tpm')
+subdir('ufs')
 subdir('usb')
 subdir('vfio')
 subdir('virtio')
diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c
index 25ad54754e..ed61e483ee 100644
--- a/hw/microblaze/boot.c
+++ b/hw/microblaze/boot.c
@@ -140,22 +140,17 @@ void microblaze_load_kernel(MicroBlazeCPU *cpu, hwaddr ddr_base,
         int kernel_size;
         uint64_t entry, high;
         uint32_t base32;
-        int big_endian = 0;
-
-#if TARGET_BIG_ENDIAN
-        big_endian = 1;
-#endif
 
         /* Boots a kernel elf binary.  */
         kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
                                &entry, NULL, &high, NULL,
-                               big_endian, EM_MICROBLAZE, 0, 0);
+                               TARGET_BIG_ENDIAN, EM_MICROBLAZE, 0, 0);
         base32 = entry;
         if (base32 == 0xc0000000) {
             kernel_size = load_elf(kernel_filename, NULL,
                                    translate_kernel_address, NULL,
                                    &entry, NULL, NULL, NULL,
-                                   big_endian, EM_MICROBLAZE, 0, 0);
+                                   TARGET_BIG_ENDIAN, EM_MICROBLAZE, 0, 0);
         }
         /* Always boot into physical ram.  */
         boot_info.bootstrap_pc = (uint32_t)entry;
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index babb053035..ea0fb68cf0 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -192,6 +192,7 @@ petalogix_ml605_init(MachineState *machine)
                                         blk_by_legacy_dinfo(dinfo),
                                         &error_fatal);
             }
+            qdev_prop_set_uint8(dev, "cs", i);
             qdev_realize_and_unref(dev, BUS(spi), &error_fatal);
 
             cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0);
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index ca4426a92c..0081dcf921 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -125,7 +125,7 @@ static void mips_jazz_init(MachineState *machine,
 {
     MemoryRegion *address_space = get_system_memory();
     char *filename;
-    int bios_size, n, big_endian;
+    int bios_size, n;
     Clock *cpuclk;
     MIPSCPU *cpu;
     MIPSCPUClass *mcc;
@@ -157,12 +157,6 @@ static void mips_jazz_init(MachineState *machine,
         [JAZZ_PICA61] = {33333333, 4},
     };
 
-#if TARGET_BIG_ENDIAN
-    big_endian = 1;
-#else
-    big_endian = 0;
-#endif
-
     if (machine->ram_size > 256 * MiB) {
         error_report("RAM size more than 256Mb is not supported");
         exit(EXIT_FAILURE);
@@ -301,7 +295,7 @@ static void mips_jazz_init(MachineState *machine,
             dev = qdev_new("dp8393x");
             qdev_set_nic_properties(dev, nd);
             qdev_prop_set_uint8(dev, "it_shift", 2);
-            qdev_prop_set_bit(dev, "big_endian", big_endian > 0);
+            qdev_prop_set_bit(dev, "big_endian", TARGET_BIG_ENDIAN);
             object_property_set_link(OBJECT(dev), "dma_mr",
                                      OBJECT(rc4030_dma_mr), &error_abort);
             sysbus = SYS_BUS_DEVICE(dev);
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index f9618fa5f5..dac27fad9d 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -627,7 +627,7 @@ static void bl_setup_gt64120_jump_kernel(void **p, uint64_t run_addr,
         10, 10, 11, 11 /* PIIX IRQRC[A:D] */
     };
 
-    /* Bus endianess is always reversed */
+    /* Bus endianness is always reversed */
 #if TARGET_BIG_ENDIAN
 #define cpu_to_gt32(x) (x)
 #else
@@ -870,7 +870,6 @@ static uint64_t load_kernel(void)
     uint64_t kernel_entry, kernel_high, initrd_size;
     long kernel_size;
     ram_addr_t initrd_offset;
-    int big_endian;
     uint32_t *prom_buf;
     long prom_size;
     int prom_index = 0;
@@ -878,16 +877,10 @@ static uint64_t load_kernel(void)
     char rng_seed_hex[sizeof(rng_seed) * 2 + 1];
     size_t rng_seed_prom_offset;
 
-#if TARGET_BIG_ENDIAN
-    big_endian = 1;
-#else
-    big_endian = 0;
-#endif
-
     kernel_size = load_elf(loaderparams.kernel_filename, NULL,
                            cpu_mips_kseg0_to_phys, NULL,
                            &kernel_entry, NULL,
-                           &kernel_high, NULL, big_endian, EM_MIPS,
+                           &kernel_high, NULL, TARGET_BIG_ENDIAN, EM_MIPS,
                            1, 0);
     if (kernel_size < 0) {
         error_report("could not load kernel '%s': %s",
@@ -1107,7 +1100,6 @@ void mips_malta_init(MachineState *machine)
     I2CBus *smbus;
     DriveInfo *dinfo;
     int fl_idx = 0;
-    int be;
     MaltaState *s;
     PCIDevice *piix4;
     DeviceState *dev;
@@ -1144,12 +1136,6 @@ void mips_malta_init(MachineState *machine)
                                     ram_low_postio);
     }
 
-#if TARGET_BIG_ENDIAN
-    be = 1;
-#else
-    be = 0;
-#endif
-
     /* FPGA */
 
     /* The CBUS UART is attached to the MIPS CPU INT2 pin, ie interrupt 4 */
@@ -1161,7 +1147,8 @@ void mips_malta_init(MachineState *machine)
                                FLASH_SIZE,
                                dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
                                65536,
-                               4, 0x0000, 0x0000, 0x0000, 0x0000, be);
+                               4, 0x0000, 0x0000, 0x0000, 0x0000,
+                               TARGET_BIG_ENDIAN);
     bios = pflash_cfi01_get_memory(fl);
     fl_idx++;
     if (kernel_filename) {
@@ -1245,7 +1232,7 @@ void mips_malta_init(MachineState *machine)
 
     /* Northbridge */
     dev = qdev_new("gt64120");
-    qdev_prop_set_bit(dev, "cpu-little-endian", !be);
+    qdev_prop_set_bit(dev, "cpu-little-endian", !TARGET_BIG_ENDIAN);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci"));
     pci_bus_map_irqs(pci_bus, malta_pci_slot_get_pirq);
diff --git a/hw/mips/mipssim.c b/hw/mips/mipssim.c
index 39f64448f2..2f951f7fc6 100644
--- a/hw/mips/mipssim.c
+++ b/hw/mips/mipssim.c
@@ -62,18 +62,11 @@ static uint64_t load_kernel(void)
     uint64_t entry, kernel_high, initrd_size;
     long kernel_size;
     ram_addr_t initrd_offset;
-    int big_endian;
-
-#if TARGET_BIG_ENDIAN
-    big_endian = 1;
-#else
-    big_endian = 0;
-#endif
 
     kernel_size = load_elf(loaderparams.kernel_filename, NULL,
                            cpu_mips_kseg0_to_phys, NULL,
                            &entry, NULL,
-                           &kernel_high, NULL, big_endian,
+                           &kernel_high, NULL, TARGET_BIG_ENDIAN,
                            EM_MIPS, 1, 0);
     if (kernel_size < 0) {
         error_report("could not load kernel '%s': %s",
diff --git a/hw/misc/imx7_ccm.c b/hw/misc/imx7_ccm.c
index f135ec7b7e..7539f7fb45 100644
--- a/hw/misc/imx7_ccm.c
+++ b/hw/misc/imx7_ccm.c
@@ -227,7 +227,7 @@ static uint32_t imx7_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock)
      * have fixed frequencies and we can provide requested frequency
      * easily. However for CCM provided clocks (like IPG) each GPT
      * timer can have its own clock root.
-     * This means we need additionnal information when calling this
+     * This means we need additional information when calling this
      * function to know the requester's identity.
      */
     uint32_t freq = 0;
diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 0787a0268d..f84cc68849 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -246,7 +246,7 @@
 #define vT2CL    0x1000  /* [VIA only] Timer two counter low. */
 #define vT2CH    0x1200  /* [VIA only] Timer two counter high. */
 #define vSR      0x1400  /* [VIA only] Shift register. */
-#define vACR     0x1600  /* [VIA only] Auxilary control register. */
+#define vACR     0x1600  /* [VIA only] Auxiliary control register. */
 #define vPCR     0x1800  /* [VIA only] Peripheral control register. */
                          /*
                           *           CHRP sez never ever to *write* this.
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index d9a370c1de..88ecab8392 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -98,6 +98,9 @@ specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-crl.c'))
 system_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files(
   'xlnx-versal-xramc.c',
   'xlnx-versal-pmc-iou-slcr.c',
+  'xlnx-versal-cfu.c',
+  'xlnx-cfi-if.c',
+  'xlnx-versal-cframe-reg.c',
 ))
 system_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c'))
 system_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c'))
diff --git a/hw/misc/stm32f2xx_syscfg.c b/hw/misc/stm32f2xx_syscfg.c
index 04c22c2850..19c1e86424 100644
--- a/hw/misc/stm32f2xx_syscfg.c
+++ b/hw/misc/stm32f2xx_syscfg.c
@@ -94,12 +94,12 @@ static void stm32f2xx_syscfg_write(void *opaque, hwaddr addr,
     switch (addr) {
     case SYSCFG_MEMRMP:
         qemu_log_mask(LOG_UNIMP,
-                      "%s: Changeing the memory mapping isn't supported " \
+                      "%s: Changing the memory mapping isn't supported " \
                       "in QEMU\n", __func__);
         return;
     case SYSCFG_PMC:
         qemu_log_mask(LOG_UNIMP,
-                      "%s: Changeing the memory mapping isn't supported " \
+                      "%s: Changing the memory mapping isn't supported " \
                       "in QEMU\n", __func__);
         return;
     case SYSCFG_EXTICR1:
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index e8b2be14c0..bc87cd3670 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -155,7 +155,7 @@ stm32f4xx_syscfg_read(uint64_t addr) "reg read: addr: 0x%" PRIx64 " "
 stm32f4xx_syscfg_write(uint64_t addr, uint64_t data) "reg write: addr: 0x%" PRIx64 " val: 0x%" PRIx64 ""
 
 # stm32f4xx_exti.c
-stm32f4xx_exti_set_irq(int irq, int leve) "Set EXTI: %d to %d"
+stm32f4xx_exti_set_irq(int irq, int level) "Set EXTI: %d to %d"
 stm32f4xx_exti_read(uint64_t addr) "reg read: addr: 0x%" PRIx64 " "
 stm32f4xx_exti_write(uint64_t addr, uint64_t data) "reg write: addr: 0x%" PRIx64 " val: 0x%" PRIx64 ""
 
diff --git a/hw/misc/xlnx-cfi-if.c b/hw/misc/xlnx-cfi-if.c
new file mode 100644
index 0000000000..c45f05c4aa
--- /dev/null
+++ b/hw/misc/xlnx-cfi-if.c
@@ -0,0 +1,34 @@
+/*
+ * Xilinx CFI interface
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "hw/misc/xlnx-cfi-if.h"
+
+void xlnx_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt)
+{
+    XlnxCfiIfClass *xcic = XLNX_CFI_IF_GET_CLASS(cfi_if);
+
+    if (xcic->cfi_transfer_packet) {
+        xcic->cfi_transfer_packet(cfi_if, pkt);
+    }
+}
+
+static const TypeInfo xlnx_cfi_if_info = {
+    .name          = TYPE_XLNX_CFI_IF,
+    .parent        = TYPE_INTERFACE,
+    .class_size = sizeof(XlnxCfiIfClass),
+};
+
+static void xlnx_cfi_if_register_types(void)
+{
+    type_register_static(&xlnx_cfi_if_info);
+}
+
+type_init(xlnx_cfi_if_register_types)
+
diff --git a/hw/misc/xlnx-versal-cframe-reg.c b/hw/misc/xlnx-versal-cframe-reg.c
new file mode 100644
index 0000000000..8e8ec0715a
--- /dev/null
+++ b/hw/misc/xlnx-versal-cframe-reg.c
@@ -0,0 +1,858 @@
+/*
+ * QEMU model of the Configuration Frame Control module
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/registerfields.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/irq.h"
+#include "hw/misc/xlnx-versal-cframe-reg.h"
+
+#ifndef XLNX_VERSAL_CFRAME_REG_ERR_DEBUG
+#define XLNX_VERSAL_CFRAME_REG_ERR_DEBUG 0
+#endif
+
+#define KEYHOLE_STREAM_4K (4 * KiB)
+#define N_WORDS_128BIT 4
+
+#define MAX_BLOCKTYPE 6
+#define MAX_BLOCKTYPE_FRAMES 0xFFFFF
+
+enum {
+    CFRAME_CMD_WCFG = 1,
+    CFRAME_CMD_ROWON = 2,
+    CFRAME_CMD_ROWOFF = 3,
+    CFRAME_CMD_RCFG = 4,
+    CFRAME_CMD_DLPARK = 5,
+};
+
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    guint ua = GPOINTER_TO_UINT(a);
+    guint ub = GPOINTER_TO_UINT(b);
+    return (ua > ub) - (ua < ub);
+}
+
+static void cfrm_imr_update_irq(XlnxVersalCFrameReg *s)
+{
+    bool pending = s->regs[R_CFRM_ISR0] & ~s->regs[R_CFRM_IMR0];
+    qemu_set_irq(s->irq_cfrm_imr, pending);
+}
+
+static void cfrm_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+    cfrm_imr_update_irq(s);
+}
+
+static uint64_t cfrm_ier_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    s->regs[R_CFRM_IMR0] &= ~s->regs[R_CFRM_IER0];
+    s->regs[R_CFRM_IER0] = 0;
+    cfrm_imr_update_irq(s);
+    return 0;
+}
+
+static uint64_t cfrm_idr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    s->regs[R_CFRM_IMR0] |= s->regs[R_CFRM_IDR0];
+    s->regs[R_CFRM_IDR0] = 0;
+    cfrm_imr_update_irq(s);
+    return 0;
+}
+
+static uint64_t cfrm_itr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    s->regs[R_CFRM_ISR0] |= s->regs[R_CFRM_ITR0];
+    s->regs[R_CFRM_ITR0] = 0;
+    cfrm_imr_update_irq(s);
+    return 0;
+}
+
+static void cframe_incr_far(XlnxVersalCFrameReg *s)
+{
+    uint32_t faddr = ARRAY_FIELD_EX32(s->regs, FAR0, FRAME_ADDR);
+    uint32_t blktype = ARRAY_FIELD_EX32(s->regs, FAR0, BLOCKTYPE);
+
+    assert(blktype <= MAX_BLOCKTYPE);
+
+    faddr++;
+    if (faddr > s->cfg.blktype_num_frames[blktype]) {
+        /* Restart from 0 and increment block type */
+        faddr = 0;
+        blktype++;
+
+        assert(blktype <= MAX_BLOCKTYPE);
+
+        ARRAY_FIELD_DP32(s->regs, FAR0, BLOCKTYPE, blktype);
+    }
+
+    ARRAY_FIELD_DP32(s->regs, FAR0, FRAME_ADDR, faddr);
+}
+
+static void cfrm_fdri_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    if (s->row_configured && s->rowon && s->wcfg) {
+
+        if (fifo32_num_free(&s->new_f_data) >= N_WORDS_128BIT) {
+            fifo32_push(&s->new_f_data, s->regs[R_FDRI0]);
+            fifo32_push(&s->new_f_data, s->regs[R_FDRI1]);
+            fifo32_push(&s->new_f_data, s->regs[R_FDRI2]);
+            fifo32_push(&s->new_f_data, s->regs[R_FDRI3]);
+        }
+
+        if (fifo32_is_full(&s->new_f_data)) {
+            uint32_t addr = extract32(s->regs[R_FAR0], 0, 23);
+            XlnxCFrame *f = g_new(XlnxCFrame, 1);
+
+            for (int i = 0; i < FRAME_NUM_WORDS; i++) {
+                f->data[i] = fifo32_pop(&s->new_f_data);
+            }
+
+            g_tree_replace(s->cframes, GUINT_TO_POINTER(addr), f);
+
+            cframe_incr_far(s);
+
+            fifo32_reset(&s->new_f_data);
+        }
+    }
+}
+
+static void cfrm_readout_frames(XlnxVersalCFrameReg *s, uint32_t start_addr,
+                                uint32_t end_addr)
+{
+    /*
+     * NB: when our minimum glib version is at least 2.68 we can improve the
+     * performance of the cframe traversal by using g_tree_lookup_node and
+     * g_tree_node_next (instead of calling g_tree_lookup for finding each
+     * cframe).
+     */
+    for (uint32_t addr = start_addr; addr < end_addr; addr++) {
+        XlnxCFrame *f = g_tree_lookup(s->cframes, GUINT_TO_POINTER(addr));
+
+        /* Transmit the data if a frame was found */
+        if (f) {
+            for (int i = 0; i < FRAME_NUM_WORDS; i += 4) {
+                XlnxCfiPacket pkt = {};
+
+                pkt.data[0] = f->data[i];
+                pkt.data[1] = f->data[i + 1];
+                pkt.data[2] = f->data[i + 2];
+                pkt.data[3] = f->data[i + 3];
+
+                if (s->cfg.cfu_fdro) {
+                    xlnx_cfi_transfer_packet(s->cfg.cfu_fdro, &pkt);
+                }
+            }
+        }
+    }
+}
+
+static void cfrm_frcnt_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    if (s->row_configured && s->rowon && s->rcfg) {
+        uint32_t start_addr = extract32(s->regs[R_FAR0], 0, 23);
+        uint32_t end_addr = start_addr + s->regs[R_FRCNT0] / FRAME_NUM_QWORDS;
+
+        cfrm_readout_frames(s, start_addr, end_addr);
+    }
+}
+
+static void cfrm_cmd_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    if (s->row_configured) {
+        uint8_t cmd = ARRAY_FIELD_EX32(s->regs, CMD0, CMD);
+
+        switch (cmd) {
+        case CFRAME_CMD_WCFG:
+            s->wcfg = true;
+            break;
+        case CFRAME_CMD_ROWON:
+            s->rowon = true;
+            break;
+        case CFRAME_CMD_ROWOFF:
+            s->rowon = false;
+            break;
+        case CFRAME_CMD_RCFG:
+            s->rcfg = true;
+            break;
+        case CFRAME_CMD_DLPARK:
+            s->wcfg = false;
+            s->rcfg = false;
+            break;
+        default:
+            break;
+        };
+    }
+}
+
+static uint64_t cfrm_last_frame_bot_post_read(RegisterInfo *reg,
+                                              uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+    uint64_t val = 0;
+
+    switch (reg->access->addr) {
+    case A_LAST_FRAME_BOT0:
+        val = FIELD_DP32(val, LAST_FRAME_BOT0, BLOCKTYPE1_LAST_FRAME_LSB,
+                         s->cfg.blktype_num_frames[1]);
+        val = FIELD_DP32(val, LAST_FRAME_BOT0, BLOCKTYPE0_LAST_FRAME,
+                         s->cfg.blktype_num_frames[0]);
+        break;
+    case A_LAST_FRAME_BOT1:
+        val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE3_LAST_FRAME_LSB,
+                         s->cfg.blktype_num_frames[3]);
+        val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE2_LAST_FRAME,
+                         s->cfg.blktype_num_frames[2]);
+        val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE1_LAST_FRAME_MSB,
+                         (s->cfg.blktype_num_frames[1] >> 12));
+        break;
+    case A_LAST_FRAME_BOT2:
+        val = FIELD_DP32(val, LAST_FRAME_BOT2, BLOCKTYPE3_LAST_FRAME_MSB,
+                         (s->cfg.blktype_num_frames[3] >> 4));
+        break;
+    case A_LAST_FRAME_BOT3:
+    default:
+        break;
+    }
+
+    return val;
+}
+
+static uint64_t cfrm_last_frame_top_post_read(RegisterInfo *reg,
+                                              uint64_t val64)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+    uint64_t val = 0;
+
+    switch (reg->access->addr) {
+    case A_LAST_FRAME_TOP0:
+        val = FIELD_DP32(val, LAST_FRAME_TOP0, BLOCKTYPE5_LAST_FRAME_LSB,
+                         s->cfg.blktype_num_frames[5]);
+        val = FIELD_DP32(val, LAST_FRAME_TOP0, BLOCKTYPE4_LAST_FRAME,
+                         s->cfg.blktype_num_frames[4]);
+        break;
+    case A_LAST_FRAME_TOP1:
+        val = FIELD_DP32(val, LAST_FRAME_TOP1, BLOCKTYPE6_LAST_FRAME,
+                         s->cfg.blktype_num_frames[6]);
+        val = FIELD_DP32(val, LAST_FRAME_TOP1, BLOCKTYPE5_LAST_FRAME_MSB,
+                         (s->cfg.blktype_num_frames[5] >> 12));
+        break;
+    case A_LAST_FRAME_TOP2:
+    case A_LAST_FRAME_BOT3:
+    default:
+        break;
+    }
+
+    return val;
+}
+
+static void cfrm_far_sfr_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque);
+
+    if (s->row_configured && s->rowon && s->rcfg) {
+        uint32_t start_addr = extract32(s->regs[R_FAR_SFR0], 0, 23);
+
+        /* Readback 1 frame */
+        cfrm_readout_frames(s, start_addr, start_addr + 1);
+    }
+}
+
+static const RegisterAccessInfo cframe_reg_regs_info[] = {
+    {   .name = "CRC0",  .addr = A_CRC0,
+        .rsvd = 0x00000000,
+    },{ .name = "CRC1",  .addr = A_CRC0,
+        .rsvd = 0xffffffff,
+    },{ .name = "CRC2",  .addr = A_CRC0,
+        .rsvd = 0xffffffff,
+    },{ .name = "CRC3",  .addr = A_CRC0,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR0",  .addr = A_FAR0,
+        .rsvd = 0xfe000000,
+    },{ .name = "FAR1",  .addr = A_FAR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR2",  .addr = A_FAR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR3",  .addr = A_FAR3,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR_SFR0",  .addr = A_FAR_SFR0,
+        .rsvd = 0xff800000,
+    },{ .name = "FAR_SFR1",  .addr = A_FAR_SFR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR_SFR2",  .addr = A_FAR_SFR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "FAR_SFR3",  .addr = A_FAR_SFR3,
+        .rsvd = 0xffffffff,
+        .post_write = cfrm_far_sfr_post_write,
+    },{ .name = "FDRI0",  .addr = A_FDRI0,
+    },{ .name = "FDRI1",  .addr = A_FDRI1,
+    },{ .name = "FDRI2",  .addr = A_FDRI2,
+    },{ .name = "FDRI3",  .addr = A_FDRI3,
+        .post_write = cfrm_fdri_post_write,
+    },{ .name = "FRCNT0",  .addr = A_FRCNT0,
+        .rsvd = 0x00000000,
+    },{ .name = "FRCNT1",  .addr = A_FRCNT1,
+        .rsvd = 0xffffffff,
+    },{ .name = "FRCNT2",  .addr = A_FRCNT2,
+        .rsvd = 0xffffffff,
+    },{ .name = "FRCNT3",  .addr = A_FRCNT3,
+        .rsvd = 0xffffffff,
+        .post_write = cfrm_frcnt_post_write
+    },{ .name = "CMD0",  .addr = A_CMD0,
+        .rsvd = 0xffffffe0,
+    },{ .name = "CMD1",  .addr = A_CMD1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CMD2",  .addr = A_CMD2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CMD3",  .addr = A_CMD3,
+        .rsvd = 0xffffffff,
+        .post_write = cfrm_cmd_post_write
+    },{ .name = "CR_MASK0",  .addr = A_CR_MASK0,
+        .rsvd = 0x00000000,
+    },{ .name = "CR_MASK1",  .addr = A_CR_MASK1,
+        .rsvd = 0x00000000,
+    },{ .name = "CR_MASK2",  .addr = A_CR_MASK2,
+        .rsvd = 0x00000000,
+    },{ .name = "CR_MASK3",  .addr = A_CR_MASK3,
+        .rsvd = 0xffffffff,
+    },{ .name = "CTL0",  .addr = A_CTL0,
+        .rsvd = 0xfffffff8,
+    },{ .name = "CTL1",  .addr = A_CTL1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CTL2",  .addr = A_CTL2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CTL3",  .addr = A_CTL3,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_ISR0",  .addr = A_CFRM_ISR0,
+        .rsvd = 0xffc04000,
+        .w1c = 0x3bfff,
+    },{ .name = "CFRM_ISR1",  .addr = A_CFRM_ISR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_ISR2",  .addr = A_CFRM_ISR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_ISR3",  .addr = A_CFRM_ISR3,
+        .rsvd = 0xffffffff,
+        .post_write = cfrm_isr_postw,
+    },{ .name = "CFRM_IMR0",  .addr = A_CFRM_IMR0,
+        .rsvd = 0xffc04000,
+        .ro = 0xfffff,
+        .reset = 0x3bfff,
+    },{ .name = "CFRM_IMR1",  .addr = A_CFRM_IMR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IMR2",  .addr = A_CFRM_IMR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IMR3",  .addr = A_CFRM_IMR3,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IER0",  .addr = A_CFRM_IER0,
+        .rsvd = 0xffc04000,
+    },{ .name = "CFRM_IER1",  .addr = A_CFRM_IER1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IER2",  .addr = A_CFRM_IER2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IER3",  .addr = A_CFRM_IER3,
+        .rsvd = 0xffffffff,
+        .pre_write = cfrm_ier_prew,
+    },{ .name = "CFRM_IDR0",  .addr = A_CFRM_IDR0,
+        .rsvd = 0xffc04000,
+    },{ .name = "CFRM_IDR1",  .addr = A_CFRM_IDR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IDR2",  .addr = A_CFRM_IDR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_IDR3",  .addr = A_CFRM_IDR3,
+        .rsvd = 0xffffffff,
+        .pre_write = cfrm_idr_prew,
+    },{ .name = "CFRM_ITR0",  .addr = A_CFRM_ITR0,
+        .rsvd = 0xffc04000,
+    },{ .name = "CFRM_ITR1",  .addr = A_CFRM_ITR1,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_ITR2",  .addr = A_CFRM_ITR2,
+        .rsvd = 0xffffffff,
+    },{ .name = "CFRM_ITR3",  .addr = A_CFRM_ITR3,
+        .rsvd = 0xffffffff,
+        .pre_write = cfrm_itr_prew,
+    },{ .name = "SEU_SYNDRM00",  .addr = A_SEU_SYNDRM00,
+    },{ .name = "SEU_SYNDRM01",  .addr = A_SEU_SYNDRM01,
+    },{ .name = "SEU_SYNDRM02",  .addr = A_SEU_SYNDRM02,
+    },{ .name = "SEU_SYNDRM03",  .addr = A_SEU_SYNDRM03,
+    },{ .name = "SEU_SYNDRM10",  .addr = A_SEU_SYNDRM10,
+    },{ .name = "SEU_SYNDRM11",  .addr = A_SEU_SYNDRM11,
+    },{ .name = "SEU_SYNDRM12",  .addr = A_SEU_SYNDRM12,
+    },{ .name = "SEU_SYNDRM13",  .addr = A_SEU_SYNDRM13,
+    },{ .name = "SEU_SYNDRM20",  .addr = A_SEU_SYNDRM20,
+    },{ .name = "SEU_SYNDRM21",  .addr = A_SEU_SYNDRM21,
+    },{ .name = "SEU_SYNDRM22",  .addr = A_SEU_SYNDRM22,
+    },{ .name = "SEU_SYNDRM23",  .addr = A_SEU_SYNDRM23,
+    },{ .name = "SEU_SYNDRM30",  .addr = A_SEU_SYNDRM30,
+    },{ .name = "SEU_SYNDRM31",  .addr = A_SEU_SYNDRM31,
+    },{ .name = "SEU_SYNDRM32",  .addr = A_SEU_SYNDRM32,
+    },{ .name = "SEU_SYNDRM33",  .addr = A_SEU_SYNDRM33,
+    },{ .name = "SEU_VIRTUAL_SYNDRM0",  .addr = A_SEU_VIRTUAL_SYNDRM0,
+    },{ .name = "SEU_VIRTUAL_SYNDRM1",  .addr = A_SEU_VIRTUAL_SYNDRM1,
+    },{ .name = "SEU_VIRTUAL_SYNDRM2",  .addr = A_SEU_VIRTUAL_SYNDRM2,
+    },{ .name = "SEU_VIRTUAL_SYNDRM3",  .addr = A_SEU_VIRTUAL_SYNDRM3,
+    },{ .name = "SEU_CRC0",  .addr = A_SEU_CRC0,
+    },{ .name = "SEU_CRC1",  .addr = A_SEU_CRC1,
+    },{ .name = "SEU_CRC2",  .addr = A_SEU_CRC2,
+    },{ .name = "SEU_CRC3",  .addr = A_SEU_CRC3,
+    },{ .name = "CFRAME_FAR_BOT0",  .addr = A_CFRAME_FAR_BOT0,
+    },{ .name = "CFRAME_FAR_BOT1",  .addr = A_CFRAME_FAR_BOT1,
+    },{ .name = "CFRAME_FAR_BOT2",  .addr = A_CFRAME_FAR_BOT2,
+    },{ .name = "CFRAME_FAR_BOT3",  .addr = A_CFRAME_FAR_BOT3,
+    },{ .name = "CFRAME_FAR_TOP0",  .addr = A_CFRAME_FAR_TOP0,
+    },{ .name = "CFRAME_FAR_TOP1",  .addr = A_CFRAME_FAR_TOP1,
+    },{ .name = "CFRAME_FAR_TOP2",  .addr = A_CFRAME_FAR_TOP2,
+    },{ .name = "CFRAME_FAR_TOP3",  .addr = A_CFRAME_FAR_TOP3,
+    },{ .name = "LAST_FRAME_BOT0",  .addr = A_LAST_FRAME_BOT0,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_bot_post_read,
+    },{ .name = "LAST_FRAME_BOT1",  .addr = A_LAST_FRAME_BOT1,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_bot_post_read,
+    },{ .name = "LAST_FRAME_BOT2",  .addr = A_LAST_FRAME_BOT2,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_bot_post_read,
+    },{ .name = "LAST_FRAME_BOT3",  .addr = A_LAST_FRAME_BOT3,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_bot_post_read,
+    },{ .name = "LAST_FRAME_TOP0",  .addr = A_LAST_FRAME_TOP0,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_top_post_read,
+    },{ .name = "LAST_FRAME_TOP1",  .addr = A_LAST_FRAME_TOP1,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_top_post_read,
+    },{ .name = "LAST_FRAME_TOP2",  .addr = A_LAST_FRAME_TOP2,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_top_post_read,
+    },{ .name = "LAST_FRAME_TOP3",  .addr = A_LAST_FRAME_TOP3,
+        .ro = 0xffffffff,
+        .post_read = cfrm_last_frame_top_post_read,
+    }
+};
+
+static void cframe_reg_cfi_transfer_packet(XlnxCfiIf *cfi_if,
+                                           XlnxCfiPacket *pkt)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(cfi_if);
+    uint64_t we = MAKE_64BIT_MASK(0, 4 * 8);
+
+    if (!s->row_configured) {
+        return;
+    }
+
+    switch (pkt->reg_addr) {
+    case CFRAME_FAR:
+        s->regs[R_FAR0] = pkt->data[0];
+        break;
+    case CFRAME_SFR:
+        s->regs[R_FAR_SFR0] = pkt->data[0];
+        register_write(&s->regs_info[R_FAR_SFR3], 0,
+                       we, object_get_typename(OBJECT(s)),
+                       XLNX_VERSAL_CFRAME_REG_ERR_DEBUG);
+        break;
+    case CFRAME_FDRI:
+        s->regs[R_FDRI0] = pkt->data[0];
+        s->regs[R_FDRI1] = pkt->data[1];
+        s->regs[R_FDRI2] = pkt->data[2];
+        register_write(&s->regs_info[R_FDRI3], pkt->data[3],
+                       we, object_get_typename(OBJECT(s)),
+                       XLNX_VERSAL_CFRAME_REG_ERR_DEBUG);
+        break;
+    case CFRAME_CMD:
+        ARRAY_FIELD_DP32(s->regs, CMD0, CMD, pkt->data[0]);
+
+        register_write(&s->regs_info[R_CMD3], 0,
+                       we, object_get_typename(OBJECT(s)),
+                       XLNX_VERSAL_CFRAME_REG_ERR_DEBUG);
+        break;
+    default:
+        break;
+    }
+}
+
+static uint64_t cframe_reg_fdri_read(void *opaque, hwaddr addr, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+    return 0;
+}
+
+static void cframe_reg_fdri_write(void *opaque, hwaddr addr, uint64_t value,
+                      unsigned size)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(opaque);
+    uint32_t wfifo[WFIFO_SZ];
+
+    if (update_wfifo(addr, value, s->wfifo, wfifo)) {
+        uint64_t we = MAKE_64BIT_MASK(0, 4 * 8);
+
+        s->regs[R_FDRI0] = wfifo[0];
+        s->regs[R_FDRI1] = wfifo[1];
+        s->regs[R_FDRI2] = wfifo[2];
+        register_write(&s->regs_info[R_FDRI3], wfifo[3],
+                       we, object_get_typename(OBJECT(s)),
+                       XLNX_VERSAL_CFRAME_REG_ERR_DEBUG);
+    }
+}
+
+static void cframe_reg_reset_enter(Object *obj, ResetType type)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        register_reset(&s->regs_info[i]);
+    }
+    memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t));
+    fifo32_reset(&s->new_f_data);
+
+    if (g_tree_nnodes(s->cframes)) {
+        /*
+         * Take a reference so when g_tree_destroy() unrefs it we keep the
+         * GTree and only destroy its contents. NB: when our minimum
+         * glib version is at least 2.70 we could use g_tree_remove_all().
+         */
+        g_tree_ref(s->cframes);
+        g_tree_destroy(s->cframes);
+    }
+}
+
+static void cframe_reg_reset_hold(Object *obj)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj);
+
+    cfrm_imr_update_irq(s);
+}
+
+static const MemoryRegionOps cframe_reg_ops = {
+    .read = register_read_memory,
+    .write = register_write_memory,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps cframe_reg_fdri_ops = {
+    .read = cframe_reg_fdri_read,
+    .write = cframe_reg_fdri_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t cframes_bcast_reg_read(void *opaque, hwaddr addr, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+    return 0;
+}
+
+static void cframes_bcast_write(XlnxVersalCFrameBcastReg *s, uint8_t reg_addr,
+                                uint32_t *wfifo)
+{
+    XlnxCfiPacket pkt = {
+        .reg_addr = reg_addr,
+        .data[0] = wfifo[0],
+        .data[1] = wfifo[1],
+        .data[2] = wfifo[2],
+        .data[3] = wfifo[3]
+    };
+
+    for (int i = 0; i < ARRAY_SIZE(s->cfg.cframe); i++) {
+        if (s->cfg.cframe[i]) {
+            xlnx_cfi_transfer_packet(s->cfg.cframe[i], &pkt);
+        }
+    }
+}
+
+static void cframes_bcast_reg_write(void *opaque, hwaddr addr, uint64_t value,
+                      unsigned size)
+{
+    XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(opaque);
+    uint32_t wfifo[WFIFO_SZ];
+
+    if (update_wfifo(addr, value, s->wfifo, wfifo)) {
+        uint8_t reg_addr = extract32(addr, 4, 6);
+
+        cframes_bcast_write(s, reg_addr, wfifo);
+    }
+}
+
+static uint64_t cframes_bcast_fdri_read(void *opaque, hwaddr addr,
+                                        unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+    return 0;
+}
+
+static void cframes_bcast_fdri_write(void *opaque, hwaddr addr, uint64_t value,
+                      unsigned size)
+{
+    XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(opaque);
+    uint32_t wfifo[WFIFO_SZ];
+
+    if (update_wfifo(addr, value, s->wfifo, wfifo)) {
+        cframes_bcast_write(s, CFRAME_FDRI, wfifo);
+    }
+}
+
+static const MemoryRegionOps cframes_bcast_reg_reg_ops = {
+    .read = cframes_bcast_reg_read,
+    .write = cframes_bcast_reg_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps cframes_bcast_reg_fdri_ops = {
+    .read = cframes_bcast_fdri_read,
+    .write = cframes_bcast_fdri_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void cframe_reg_realize(DeviceState *dev, Error **errp)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(dev);
+
+    for (int i = 0; i < ARRAY_SIZE(s->cfg.blktype_num_frames); i++) {
+        if (s->cfg.blktype_num_frames[i] > MAX_BLOCKTYPE_FRAMES) {
+            error_setg(errp,
+                       "blktype-frames%d > 0xFFFFF (max frame per block)",
+                       i);
+            return;
+        }
+        if (s->cfg.blktype_num_frames[i]) {
+            s->row_configured = true;
+        }
+    }
+}
+
+static void cframe_reg_init(Object *obj)
+{
+    XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    RegisterInfoArray *reg_array;
+
+    memory_region_init(&s->iomem, obj, TYPE_XLNX_VERSAL_CFRAME_REG,
+                       CFRAME_REG_R_MAX * 4);
+    reg_array =
+        register_init_block32(DEVICE(obj), cframe_reg_regs_info,
+                              ARRAY_SIZE(cframe_reg_regs_info),
+                              s->regs_info, s->regs,
+                              &cframe_reg_ops,
+                              XLNX_VERSAL_CFRAME_REG_ERR_DEBUG,
+                              CFRAME_REG_R_MAX * 4);
+    memory_region_add_subregion(&s->iomem,
+                                0x0,
+                                &reg_array->mem);
+    sysbus_init_mmio(sbd, &s->iomem);
+    memory_region_init_io(&s->iomem_fdri, obj, &cframe_reg_fdri_ops, s,
+                          TYPE_XLNX_VERSAL_CFRAME_REG "-fdri",
+                          KEYHOLE_STREAM_4K);
+    sysbus_init_mmio(sbd, &s->iomem_fdri);
+    sysbus_init_irq(sbd, &s->irq_cfrm_imr);
+
+    s->cframes = g_tree_new_full((GCompareDataFunc)int_cmp, NULL,
+                                  NULL, (GDestroyNotify)g_free);
+    fifo32_create(&s->new_f_data, FRAME_NUM_WORDS);
+}
+
+static const VMStateDescription vmstate_cframe = {
+    .name = "cframe",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(data, XlnxCFrame, FRAME_NUM_WORDS),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_cframe_reg = {
+    .name = TYPE_XLNX_VERSAL_CFRAME_REG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFrameReg, 4),
+        VMSTATE_UINT32_ARRAY(regs, XlnxVersalCFrameReg, CFRAME_REG_R_MAX),
+        VMSTATE_BOOL(rowon, XlnxVersalCFrameReg),
+        VMSTATE_BOOL(wcfg, XlnxVersalCFrameReg),
+        VMSTATE_BOOL(rcfg, XlnxVersalCFrameReg),
+        VMSTATE_GTREE_DIRECT_KEY_V(cframes, XlnxVersalCFrameReg, 1,
+                                   &vmstate_cframe, XlnxCFrame),
+        VMSTATE_FIFO32(new_f_data, XlnxVersalCFrameReg),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property cframe_regs_props[] = {
+    DEFINE_PROP_LINK("cfu-fdro", XlnxVersalCFrameReg, cfg.cfu_fdro,
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_UINT32("blktype0-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[0], 0),
+    DEFINE_PROP_UINT32("blktype1-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[1], 0),
+    DEFINE_PROP_UINT32("blktype2-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[2], 0),
+    DEFINE_PROP_UINT32("blktype3-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[3], 0),
+    DEFINE_PROP_UINT32("blktype4-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[4], 0),
+    DEFINE_PROP_UINT32("blktype5-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[5], 0),
+    DEFINE_PROP_UINT32("blktype6-frames", XlnxVersalCFrameReg,
+                       cfg.blktype_num_frames[6], 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cframe_bcast_reg_init(Object *obj)
+{
+    XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init_io(&s->iomem_reg, obj, &cframes_bcast_reg_reg_ops, s,
+                          TYPE_XLNX_VERSAL_CFRAME_BCAST_REG, KEYHOLE_STREAM_4K);
+    memory_region_init_io(&s->iomem_fdri, obj, &cframes_bcast_reg_fdri_ops, s,
+                          TYPE_XLNX_VERSAL_CFRAME_BCAST_REG "-fdri",
+                          KEYHOLE_STREAM_4K);
+    sysbus_init_mmio(sbd, &s->iomem_reg);
+    sysbus_init_mmio(sbd, &s->iomem_fdri);
+}
+
+static void cframe_bcast_reg_reset_enter(Object *obj, ResetType type)
+{
+    XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(obj);
+
+    memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t));
+}
+
+static const VMStateDescription vmstate_cframe_bcast_reg = {
+    .name = TYPE_XLNX_VERSAL_CFRAME_BCAST_REG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFrameBcastReg, 4),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property cframe_bcast_regs_props[] = {
+    DEFINE_PROP_LINK("cframe0", XlnxVersalCFrameBcastReg, cfg.cframe[0],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe1", XlnxVersalCFrameBcastReg, cfg.cframe[1],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe2", XlnxVersalCFrameBcastReg, cfg.cframe[2],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe3", XlnxVersalCFrameBcastReg, cfg.cframe[3],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe4", XlnxVersalCFrameBcastReg, cfg.cframe[4],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe5", XlnxVersalCFrameBcastReg, cfg.cframe[5],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe6", XlnxVersalCFrameBcastReg, cfg.cframe[6],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe7", XlnxVersalCFrameBcastReg, cfg.cframe[7],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe8", XlnxVersalCFrameBcastReg, cfg.cframe[8],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe9", XlnxVersalCFrameBcastReg, cfg.cframe[9],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe10", XlnxVersalCFrameBcastReg, cfg.cframe[10],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe11", XlnxVersalCFrameBcastReg, cfg.cframe[11],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe12", XlnxVersalCFrameBcastReg, cfg.cframe[12],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe13", XlnxVersalCFrameBcastReg, cfg.cframe[13],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_LINK("cframe14", XlnxVersalCFrameBcastReg, cfg.cframe[14],
+                     TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cframe_reg_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XlnxCfiIfClass *xcic = XLNX_CFI_IF_CLASS(klass);
+
+    dc->vmsd = &vmstate_cframe_reg;
+    dc->realize = cframe_reg_realize;
+    rc->phases.enter = cframe_reg_reset_enter;
+    rc->phases.hold = cframe_reg_reset_hold;
+    device_class_set_props(dc, cframe_regs_props);
+    xcic->cfi_transfer_packet = cframe_reg_cfi_transfer_packet;
+}
+
+static void cframe_bcast_reg_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    dc->vmsd = &vmstate_cframe_bcast_reg;
+    device_class_set_props(dc, cframe_bcast_regs_props);
+    rc->phases.enter = cframe_bcast_reg_reset_enter;
+}
+
+static const TypeInfo cframe_reg_info = {
+    .name          = TYPE_XLNX_VERSAL_CFRAME_REG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalCFrameReg),
+    .class_init    = cframe_reg_class_init,
+    .instance_init = cframe_reg_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_XLNX_CFI_IF },
+        { }
+    }
+};
+
+static const TypeInfo cframe_bcast_reg_info = {
+    .name          = TYPE_XLNX_VERSAL_CFRAME_BCAST_REG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalCFrameBcastReg),
+    .class_init    = cframe_bcast_reg_class_init,
+    .instance_init = cframe_bcast_reg_init,
+};
+
+static void cframe_reg_register_types(void)
+{
+    type_register_static(&cframe_reg_info);
+    type_register_static(&cframe_bcast_reg_info);
+}
+
+type_init(cframe_reg_register_types)
diff --git a/hw/misc/xlnx-versal-cfu.c b/hw/misc/xlnx-versal-cfu.c
new file mode 100644
index 0000000000..8e588ac1d8
--- /dev/null
+++ b/hw/misc/xlnx-versal-cfu.c
@@ -0,0 +1,563 @@
+/*
+ * QEMU model of the CFU Configuration Unit.
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Edgar E. Iglesias <edgar.iglesias@gmail.com>,
+ *            Sai Pavan Boddu <sai.pavan.boddu@amd.com>,
+ *            Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/irq.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qemu/units.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+
+#ifndef XLNX_VERSAL_CFU_APB_ERR_DEBUG
+#define XLNX_VERSAL_CFU_APB_ERR_DEBUG 0
+#endif
+
+#define KEYHOLE_STREAM_4K (4 * KiB)
+#define KEYHOLE_STREAM_256K (256 * KiB)
+#define CFRAME_BROADCAST_ROW 0x1F
+
+bool update_wfifo(hwaddr addr, uint64_t value,
+                  uint32_t *wfifo, uint32_t *wfifo_ret)
+{
+    unsigned int idx = extract32(addr, 2, 2);
+
+    wfifo[idx] = value;
+
+    if (idx == 3) {
+        memcpy(wfifo_ret, wfifo, WFIFO_SZ * sizeof(uint32_t));
+        memset(wfifo, 0, WFIFO_SZ * sizeof(uint32_t));
+        return true;
+    }
+
+    return false;
+}
+
+static void cfu_imr_update_irq(XlnxVersalCFUAPB *s)
+{
+    bool pending = s->regs[R_CFU_ISR] & ~s->regs[R_CFU_IMR];
+    qemu_set_irq(s->irq_cfu_imr, pending);
+}
+
+static void cfu_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque);
+    cfu_imr_update_irq(s);
+}
+
+static uint64_t cfu_ier_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_CFU_IMR] &= ~val;
+    cfu_imr_update_irq(s);
+    return 0;
+}
+
+static uint64_t cfu_idr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_CFU_IMR] |= val;
+    cfu_imr_update_irq(s);
+    return 0;
+}
+
+static uint64_t cfu_itr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_CFU_ISR] |= val;
+    cfu_imr_update_irq(s);
+    return 0;
+}
+
+static void cfu_fgcr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque);
+    uint32_t val = (uint32_t)val64;
+
+    /* Do a scan. It always looks good. */
+    if (FIELD_EX32(val, CFU_FGCR, SC_HBC_TRIGGER)) {
+        ARRAY_FIELD_DP32(s->regs, CFU_STATUS, SCAN_CLEAR_PASS, 1);
+        ARRAY_FIELD_DP32(s->regs, CFU_STATUS, SCAN_CLEAR_DONE, 1);
+    }
+}
+
+static const RegisterAccessInfo cfu_apb_regs_info[] = {
+    {   .name = "CFU_ISR",  .addr = A_CFU_ISR,
+        .rsvd = 0xfffffc00,
+        .w1c = 0x3ff,
+        .post_write = cfu_isr_postw,
+    },{ .name = "CFU_IMR",  .addr = A_CFU_IMR,
+        .reset = 0x3ff,
+        .rsvd = 0xfffffc00,
+        .ro = 0x3ff,
+    },{ .name = "CFU_IER",  .addr = A_CFU_IER,
+        .rsvd = 0xfffffc00,
+        .pre_write = cfu_ier_prew,
+    },{ .name = "CFU_IDR",  .addr = A_CFU_IDR,
+        .rsvd = 0xfffffc00,
+        .pre_write = cfu_idr_prew,
+    },{ .name = "CFU_ITR",  .addr = A_CFU_ITR,
+        .rsvd = 0xfffffc00,
+        .pre_write = cfu_itr_prew,
+    },{ .name = "CFU_PROTECT",  .addr = A_CFU_PROTECT,
+        .reset = 0x1,
+    },{ .name = "CFU_FGCR",  .addr = A_CFU_FGCR,
+        .rsvd = 0xffff8000,
+        .post_write = cfu_fgcr_postw,
+    },{ .name = "CFU_CTL",  .addr = A_CFU_CTL,
+        .rsvd = 0xffff0000,
+    },{ .name = "CFU_CRAM_RW",  .addr = A_CFU_CRAM_RW,
+        .reset = 0x401f7d9,
+        .rsvd = 0xf8000000,
+    },{ .name = "CFU_MASK",  .addr = A_CFU_MASK,
+    },{ .name = "CFU_CRC_EXPECT",  .addr = A_CFU_CRC_EXPECT,
+    },{ .name = "CFU_CFRAME_LEFT_T0",  .addr = A_CFU_CFRAME_LEFT_T0,
+        .rsvd = 0xfff00000,
+    },{ .name = "CFU_CFRAME_LEFT_T1",  .addr = A_CFU_CFRAME_LEFT_T1,
+        .rsvd = 0xfff00000,
+    },{ .name = "CFU_CFRAME_LEFT_T2",  .addr = A_CFU_CFRAME_LEFT_T2,
+        .rsvd = 0xfff00000,
+    },{ .name = "CFU_ROW_RANGE",  .addr = A_CFU_ROW_RANGE,
+        .rsvd = 0xffffffc0,
+        .ro = 0x3f,
+    },{ .name = "CFU_STATUS",  .addr = A_CFU_STATUS,
+        .rsvd = 0x80000000,
+        .ro = 0x7fffffff,
+    },{ .name = "CFU_INTERNAL_STATUS",  .addr = A_CFU_INTERNAL_STATUS,
+        .rsvd = 0xff800000,
+        .ro = 0x7fffff,
+    },{ .name = "CFU_QWORD_CNT",  .addr = A_CFU_QWORD_CNT,
+        .ro = 0xffffffff,
+    },{ .name = "CFU_CRC_LIVE",  .addr = A_CFU_CRC_LIVE,
+        .ro = 0xffffffff,
+    },{ .name = "CFU_PENDING_READ_CNT",  .addr = A_CFU_PENDING_READ_CNT,
+        .rsvd = 0xfe000000,
+        .ro = 0x1ffffff,
+    },{ .name = "CFU_FDRI_CNT",  .addr = A_CFU_FDRI_CNT,
+        .ro = 0xffffffff,
+    },{ .name = "CFU_ECO1",  .addr = A_CFU_ECO1,
+    },{ .name = "CFU_ECO2",  .addr = A_CFU_ECO2,
+    }
+};
+
+static void cfu_apb_reset(DeviceState *dev)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(dev);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        register_reset(&s->regs_info[i]);
+    }
+    memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t));
+
+    s->regs[R_CFU_STATUS] |= R_CFU_STATUS_HC_COMPLETE_MASK;
+    cfu_imr_update_irq(s);
+}
+
+static const MemoryRegionOps cfu_apb_ops = {
+    .read = register_read_memory,
+    .write = register_write_memory,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void cfu_transfer_cfi_packet(XlnxVersalCFUAPB *s, uint8_t row_addr,
+                                    XlnxCfiPacket *pkt)
+{
+    if (row_addr == CFRAME_BROADCAST_ROW) {
+        for (int i = 0; i < ARRAY_SIZE(s->cfg.cframe); i++) {
+            if (s->cfg.cframe[i]) {
+                xlnx_cfi_transfer_packet(s->cfg.cframe[i], pkt);
+            }
+        }
+    } else {
+            assert(row_addr < ARRAY_SIZE(s->cfg.cframe));
+
+            if (s->cfg.cframe[row_addr]) {
+                xlnx_cfi_transfer_packet(s->cfg.cframe[row_addr], pkt);
+            }
+    }
+}
+
+static uint64_t cfu_stream_read(void *opaque, hwaddr addr, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+    return 0;
+}
+
+static void cfu_stream_write(void *opaque, hwaddr addr, uint64_t value,
+                      unsigned size)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(opaque);
+    uint32_t wfifo[WFIFO_SZ];
+
+    if (update_wfifo(addr, value, s->wfifo, wfifo)) {
+        uint8_t packet_type, row_addr, reg_addr;
+
+        packet_type = extract32(wfifo[0], 24, 8);
+        row_addr = extract32(wfifo[0], 16, 5);
+        reg_addr = extract32(wfifo[0], 8, 6);
+
+        /* Compressed bitstreams are not supported yet. */
+        if (ARRAY_FIELD_EX32(s->regs, CFU_CTL, DECOMPRESS) == 0) {
+            if (s->regs[R_CFU_FDRI_CNT]) {
+                XlnxCfiPacket pkt = {
+                    .reg_addr = CFRAME_FDRI,
+                    .data[0] = wfifo[0],
+                    .data[1] = wfifo[1],
+                    .data[2] = wfifo[2],
+                    .data[3] = wfifo[3]
+                };
+
+                cfu_transfer_cfi_packet(s, s->fdri_row_addr, &pkt);
+
+                s->regs[R_CFU_FDRI_CNT]--;
+
+            } else if (packet_type == PACKET_TYPE_CFU &&
+                       reg_addr == CFRAME_FDRI) {
+
+                /* Load R_CFU_FDRI_CNT, must be multiple of 25 */
+                s->regs[R_CFU_FDRI_CNT] = wfifo[1];
+
+                /* Store target row_addr */
+                s->fdri_row_addr = row_addr;
+
+                if (wfifo[1] % 25 != 0) {
+                    qemu_log_mask(LOG_GUEST_ERROR,
+                                  "CFU FDRI_CNT is not loaded with "
+                                  "a multiple of 25 value\n");
+                }
+
+            } else if (packet_type == PACKET_TYPE_CFRAME) {
+                XlnxCfiPacket pkt = {
+                    .reg_addr = reg_addr,
+                    .data[0] = wfifo[1],
+                    .data[1] = wfifo[2],
+                    .data[2] = wfifo[3],
+                };
+                cfu_transfer_cfi_packet(s, row_addr, &pkt);
+            }
+        }
+    }
+}
+
+static uint64_t cfu_sfr_read(void *opaque, hwaddr addr, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+    return 0;
+}
+
+static void cfu_sfr_write(void *opaque, hwaddr addr, uint64_t value,
+                      unsigned size)
+{
+    XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(opaque);
+    uint32_t wfifo[WFIFO_SZ];
+
+    if (update_wfifo(addr, value, s->wfifo, wfifo)) {
+        uint8_t row_addr = extract32(wfifo[0], 23, 5);
+        uint32_t frame_addr = extract32(wfifo[0], 0, 23);
+        XlnxCfiPacket pkt = { .reg_addr = CFRAME_SFR,
+                              .data[0] = frame_addr };
+
+        if (s->cfg.cfu) {
+            cfu_transfer_cfi_packet(s->cfg.cfu, row_addr, &pkt);
+        }
+    }
+}
+
+static uint64_t cfu_fdro_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(opaque);
+    uint64_t ret = 0;
+
+    if (!fifo32_is_empty(&s->fdro_data)) {
+        ret = fifo32_pop(&s->fdro_data);
+    }
+
+    return ret;
+}
+
+static void cfu_fdro_write(void *opaque, hwaddr addr, uint64_t value,
+                           unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported write from addr=%"
+                  HWADDR_PRIx "\n", __func__, addr);
+}
+
+static const MemoryRegionOps cfu_stream_ops = {
+    .read = cfu_stream_read,
+    .write = cfu_stream_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+};
+
+static const MemoryRegionOps cfu_sfr_ops = {
+    .read = cfu_sfr_read,
+    .write = cfu_sfr_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps cfu_fdro_ops = {
+    .read = cfu_fdro_read,
+    .write = cfu_fdro_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void cfu_apb_init(Object *obj)
+{
+    XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    RegisterInfoArray *reg_array;
+    unsigned int i;
+    char *name;
+
+    memory_region_init(&s->iomem, obj, TYPE_XLNX_VERSAL_CFU_APB, R_MAX * 4);
+    reg_array =
+        register_init_block32(DEVICE(obj), cfu_apb_regs_info,
+                              ARRAY_SIZE(cfu_apb_regs_info),
+                              s->regs_info, s->regs,
+                              &cfu_apb_ops,
+                              XLNX_VERSAL_CFU_APB_ERR_DEBUG,
+                              R_MAX * 4);
+    memory_region_add_subregion(&s->iomem,
+                                0x0,
+                                &reg_array->mem);
+    sysbus_init_mmio(sbd, &s->iomem);
+    for (i = 0; i < NUM_STREAM; i++) {
+        name = g_strdup_printf(TYPE_XLNX_VERSAL_CFU_APB "-stream%d", i);
+        memory_region_init_io(&s->iomem_stream[i], obj, &cfu_stream_ops, s,
+                          name, i == 0 ? KEYHOLE_STREAM_4K :
+                                         KEYHOLE_STREAM_256K);
+        sysbus_init_mmio(sbd, &s->iomem_stream[i]);
+        g_free(name);
+    }
+    sysbus_init_irq(sbd, &s->irq_cfu_imr);
+}
+
+static void cfu_sfr_init(Object *obj)
+{
+    XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init_io(&s->iomem_sfr, obj, &cfu_sfr_ops, s,
+                          TYPE_XLNX_VERSAL_CFU_SFR, KEYHOLE_STREAM_4K);
+    sysbus_init_mmio(sbd, &s->iomem_sfr);
+}
+
+static void cfu_sfr_reset_enter(Object *obj, ResetType type)
+{
+    XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(obj);
+
+    memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t));
+}
+
+static void cfu_fdro_init(Object *obj)
+{
+    XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init_io(&s->iomem_fdro, obj, &cfu_fdro_ops, s,
+                          TYPE_XLNX_VERSAL_CFU_FDRO, KEYHOLE_STREAM_4K);
+    sysbus_init_mmio(sbd, &s->iomem_fdro);
+    fifo32_create(&s->fdro_data, 8 * KiB / sizeof(uint32_t));
+}
+
+static void cfu_fdro_reset_enter(Object *obj, ResetType type)
+{
+    XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj);
+
+    fifo32_reset(&s->fdro_data);
+}
+
+static void cfu_fdro_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt)
+{
+    XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(cfi_if);
+
+    if (fifo32_num_free(&s->fdro_data) >= ARRAY_SIZE(pkt->data)) {
+        for (int i = 0; i < ARRAY_SIZE(pkt->data); i++) {
+            fifo32_push(&s->fdro_data, pkt->data[i]);
+        }
+    } else {
+        /* It is a programming error to fill the fifo. */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "CFU_FDRO: CFI data dropped due to full read fifo\n");
+    }
+}
+
+static Property cfu_props[] = {
+        DEFINE_PROP_LINK("cframe0", XlnxVersalCFUAPB, cfg.cframe[0],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe1", XlnxVersalCFUAPB, cfg.cframe[1],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe2", XlnxVersalCFUAPB, cfg.cframe[2],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe3", XlnxVersalCFUAPB, cfg.cframe[3],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe4", XlnxVersalCFUAPB, cfg.cframe[4],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe5", XlnxVersalCFUAPB, cfg.cframe[5],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe6", XlnxVersalCFUAPB, cfg.cframe[6],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe7", XlnxVersalCFUAPB, cfg.cframe[7],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe8", XlnxVersalCFUAPB, cfg.cframe[8],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe9", XlnxVersalCFUAPB, cfg.cframe[9],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe10", XlnxVersalCFUAPB, cfg.cframe[10],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe11", XlnxVersalCFUAPB, cfg.cframe[11],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe12", XlnxVersalCFUAPB, cfg.cframe[12],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe13", XlnxVersalCFUAPB, cfg.cframe[13],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_LINK("cframe14", XlnxVersalCFUAPB, cfg.cframe[14],
+                         TYPE_XLNX_CFI_IF, XlnxCfiIf *),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static Property cfu_sfr_props[] = {
+        DEFINE_PROP_LINK("cfu", XlnxVersalCFUSFR, cfg.cfu,
+                         TYPE_XLNX_VERSAL_CFU_APB, XlnxVersalCFUAPB *),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_cfu_apb = {
+    .name = TYPE_XLNX_VERSAL_CFU_APB,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFUAPB, 4),
+        VMSTATE_UINT32_ARRAY(regs, XlnxVersalCFUAPB, R_MAX),
+        VMSTATE_UINT8(fdri_row_addr, XlnxVersalCFUAPB),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static const VMStateDescription vmstate_cfu_fdro = {
+    .name = TYPE_XLNX_VERSAL_CFU_FDRO,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_FIFO32(fdro_data, XlnxVersalCFUFDRO),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static const VMStateDescription vmstate_cfu_sfr = {
+    .name = TYPE_XLNX_VERSAL_CFU_SFR,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFUSFR, 4),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static void cfu_apb_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = cfu_apb_reset;
+    dc->vmsd = &vmstate_cfu_apb;
+    device_class_set_props(dc, cfu_props);
+}
+
+static void cfu_fdro_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    XlnxCfiIfClass *xcic = XLNX_CFI_IF_CLASS(klass);
+
+    dc->vmsd = &vmstate_cfu_fdro;
+    xcic->cfi_transfer_packet = cfu_fdro_cfi_transfer_packet;
+    rc->phases.enter = cfu_fdro_reset_enter;
+}
+
+static void cfu_sfr_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+    device_class_set_props(dc, cfu_sfr_props);
+    dc->vmsd = &vmstate_cfu_sfr;
+    rc->phases.enter = cfu_sfr_reset_enter;
+}
+
+static const TypeInfo cfu_apb_info = {
+    .name          = TYPE_XLNX_VERSAL_CFU_APB,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalCFUAPB),
+    .class_init    = cfu_apb_class_init,
+    .instance_init = cfu_apb_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_XLNX_CFI_IF },
+        { }
+    }
+};
+
+static const TypeInfo cfu_fdro_info = {
+    .name          = TYPE_XLNX_VERSAL_CFU_FDRO,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalCFUFDRO),
+    .class_init    = cfu_fdro_class_init,
+    .instance_init = cfu_fdro_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_XLNX_CFI_IF },
+        { }
+    }
+};
+
+static const TypeInfo cfu_sfr_info = {
+    .name          = TYPE_XLNX_VERSAL_CFU_SFR,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalCFUSFR),
+    .class_init    = cfu_sfr_class_init,
+    .instance_init = cfu_sfr_init,
+};
+
+static void cfu_apb_register_types(void)
+{
+    type_register_static(&cfu_apb_info);
+    type_register_static(&cfu_fdro_info);
+    type_register_static(&cfu_sfr_info);
+}
+
+type_init(cfu_apb_register_types)
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index 8b70285961..41f38a98e9 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -285,7 +285,7 @@ static void zynq_slcr_compute_clocks_internal(ZynqSLCRState *s, uint64_t ps_clk)
 }
 
 /**
- * Compute and set the ouputs clocks periods.
+ * Compute and set the outputs clocks periods.
  * But do not propagate them further. Connected clocks
  * will not receive any updates (See zynq_slcr_compute_clocks())
  */
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 42ea2411a2..f445d8bb5e 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -81,8 +81,8 @@
 #define GEM_IPGSTRETCH    (0x000000BC / 4) /* IPG Stretch reg */
 #define GEM_SVLAN         (0x000000C0 / 4) /* Stacked VLAN reg */
 #define GEM_MODID         (0x000000FC / 4) /* Module ID reg */
-#define GEM_OCTTXLO       (0x00000100 / 4) /* Octects transmitted Low reg */
-#define GEM_OCTTXHI       (0x00000104 / 4) /* Octects transmitted High reg */
+#define GEM_OCTTXLO       (0x00000100 / 4) /* Octets transmitted Low reg */
+#define GEM_OCTTXHI       (0x00000104 / 4) /* Octets transmitted High reg */
 #define GEM_TXCNT         (0x00000108 / 4) /* Error-free Frames transmitted */
 #define GEM_TXBCNT        (0x0000010C / 4) /* Error-free Broadcast Frames */
 #define GEM_TXMCNT        (0x00000110 / 4) /* Error-free Multicast Frame */
@@ -101,8 +101,8 @@
 #define GEM_LATECOLLCNT   (0x00000144 / 4) /* Late Collision Frames */
 #define GEM_DEFERTXCNT    (0x00000148 / 4) /* Deferred Transmission Frames */
 #define GEM_CSENSECNT     (0x0000014C / 4) /* Carrier Sense Error Counter */
-#define GEM_OCTRXLO       (0x00000150 / 4) /* Octects Received register Low */
-#define GEM_OCTRXHI       (0x00000154 / 4) /* Octects Received register High */
+#define GEM_OCTRXLO       (0x00000150 / 4) /* Octets Received register Low */
+#define GEM_OCTRXHI       (0x00000154 / 4) /* Octets Received register High */
 #define GEM_RXCNT         (0x00000158 / 4) /* Error-free Frames Received */
 #define GEM_RXBROADCNT    (0x0000015C / 4) /* Error-free Broadcast Frames RX */
 #define GEM_RXMULTICNT    (0x00000160 / 4) /* Error-free Multicast Frames RX */
@@ -954,7 +954,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     /* Is this destination MAC address "for us" ? */
     maf = gem_mac_address_filter(s, buf);
     if (maf == GEM_RX_REJECT) {
-        return size;  /* no, drop siliently b/c it's not an error */
+        return size;  /* no, drop silently b/c it's not an error */
     }
 
     /* Discard packets with receive length error enabled ? */
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index a596f7fbc6..c6f5fb7dce 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -551,7 +551,7 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
             val = s->cam[s->regs[SONIC_CEP] & 0xf][SONIC_CAP0 - reg];
         }
         break;
-    /* All other registers have no special contraints */
+    /* All other registers have no special constraints */
     default:
         val = s->regs[reg];
     }
diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h
index 8a4ce82034..39f4882510 100644
--- a/hw/net/e1000_regs.h
+++ b/hw/net/e1000_regs.h
@@ -130,7 +130,7 @@
 
 #define E1000_GCR2      0x05B64 /* 3GIO Control Register 2 */
 #define E1000_FFLT_DBG  0x05F04 /* Debug Register */
-#define E1000_HICR      0x08F00 /* Host Inteface Control */
+#define E1000_HICR      0x08F00 /* Host Interface Control */
 
 #define E1000_RXMTRL     0x0B634 /* Time sync Rx EtherType and Msg Type - RW */
 #define E1000_RXUDP      0x0B638 /* Time Sync Rx UDP Port - RW */
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8aeafa16b..e324c02dd5 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -810,24 +810,24 @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base,
     return e1000e_tx_wb_interrupt_cause(core, queue_idx);
 }
 
-typedef struct E1000E_RingInfo_st {
+typedef struct E1000ERingInfo {
     int dbah;
     int dbal;
     int dlen;
     int dh;
     int dt;
     int idx;
-} E1000E_RingInfo;
+} E1000ERingInfo;
 
 static inline bool
-e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dh] == core->mac[r->dt] ||
                 core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 }
 
 static inline uint64_t
-e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r)
 {
     uint64_t bah = core->mac[r->dbah];
     uint64_t bal = core->mac[r->dbal];
@@ -836,13 +836,13 @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
 }
 
 static inline uint64_t
-e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r)
 {
     return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 }
 
 static inline void
-e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
+e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count)
 {
     core->mac[r->dh] += count;
 
@@ -852,7 +852,7 @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
 }
 
 static inline uint32_t
-e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r)
 {
     trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
                                  core->mac[r->dh],  core->mac[r->dt]);
@@ -871,19 +871,19 @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
 }
 
 static inline bool
-e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen] > 0;
 }
 
 static inline uint32_t
-e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen];
 }
 
 typedef struct E1000E_TxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
     struct e1000e_tx *tx;
 } E1000E_TxRing;
 
@@ -896,7 +896,7 @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
 static inline void
 e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
 {
-    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+    static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
         { TDBAH,  TDBAL,  TDLEN,  TDH,  TDT, 0 },
         { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
     };
@@ -908,13 +908,13 @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
 }
 
 typedef struct E1000E_RxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
 } E1000E_RxRing;
 
 static inline void
 e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
 {
-    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+    static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
         { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
         { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
     };
@@ -930,7 +930,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
     dma_addr_t base;
     struct e1000_tx_desc desc;
     bool ide = false;
-    const E1000E_RingInfo *txi = txr->i;
+    const E1000ERingInfo *txi = txr->i;
     uint32_t cause = E1000_ICS_TXQE;
 
     if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
@@ -960,7 +960,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
 }
 
 static bool
-e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
+e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r,
                   size_t total_size)
 {
     uint32_t bufs = e1000e_ring_free_descr_num(core, r);
@@ -1397,17 +1397,17 @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr,
     }
 }
 
-typedef struct e1000e_ba_state_st {
+typedef struct E1000EBAState {
     uint16_t written[MAX_PS_BUFFERS];
     uint8_t cur_idx;
-} e1000e_ba_state;
+} E1000EBAState;
 
 static inline void
-e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
-                               hwaddr ba[MAX_PS_BUFFERS],
-                               e1000e_ba_state *bastate,
-                               const char *data,
-                               dma_addr_t data_len)
+e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core,
+                                    hwaddr ba[MAX_PS_BUFFERS],
+                                    E1000EBAState *bastate,
+                                    const char *data,
+                                    dma_addr_t data_len)
 {
     assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]);
 
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
 }
 
 static void
-e1000e_write_to_rx_buffers(E1000ECore *core,
-                           hwaddr ba[MAX_PS_BUFFERS],
-                           e1000e_ba_state *bastate,
-                           const char *data,
-                           dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+                                        hwaddr ba[MAX_PS_BUFFERS],
+                                        E1000EBAState *bastate,
+                                        const char *data,
+                                        dma_addr_t data_len)
 {
     while (data_len > 0) {
         uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1460,7 +1460,7 @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
 }
 
 static inline bool
-e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
+e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi)
 {
     return e1000e_ring_free_descr_num(core, rxi) ==
            e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
@@ -1521,7 +1521,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
     struct iovec *iov = net_rx_pkt_get_iovec(pkt);
     size_t size = net_rx_pkt_get_total_len(pkt);
     size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000E_RingInfo *rxi;
+    const E1000ERingInfo *rxi;
     size_t ps_hdr_len = 0;
     bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len);
     bool is_first = true;
@@ -1530,7 +1530,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
 
     do {
         hwaddr ba[MAX_PS_BUFFERS];
-        e1000e_ba_state bastate = { { 0 } };
+        E1000EBAState bastate = { { 0 } };
         bool is_last = false;
 
         desc_size = total_size - desc_offset;
@@ -1568,8 +1568,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                             iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
                                            iov->iov_len - iov_ofs);
 
-                            e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
-                                                      iov->iov_base, iov_copy);
+                            e1000e_write_hdr_frag_to_rx_buffers(core, ba,
+                                                                &bastate,
+                                                                iov->iov_base,
+                                                                iov_copy);
 
                             copy_size -= iov_copy;
                             ps_hdr_copied += iov_copy;
@@ -1585,8 +1587,8 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                     } else {
                         /* Leave buffer 0 of each descriptor except first */
                         /* empty as per spec 7.1.5.1                      */
-                        e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
-                                                       NULL, 0);
+                        e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate,
+                                                            NULL, 0);
                     }
                 }
 
@@ -1594,8 +1596,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                 while (copy_size) {
                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
-                                            iov->iov_base + iov_ofs, iov_copy);
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+                                                            iov->iov_base +
+                                                            iov_ofs,
+                                                            iov_copy);
 
                     copy_size -= iov_copy;
                     iov_ofs += iov_copy;
@@ -1607,7 +1611,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
 
                 if (desc_offset + desc_size >= total_size) {
                     /* Simulate FCS checksum presence in the last descriptor */
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
                           (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
                 }
             }
@@ -2852,7 +2856,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
 
     if (core->has_vnet) {
         qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
-                         cso_state, 0, 0, 0, 0);
+                         cso_state, 0, 0, 0, 0, 0, 0);
     }
 }
 
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index 13760c66d3..cd896fc0ca 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -839,7 +839,7 @@ union e1000_rx_desc_packet_split {
 #define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
 #define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
 #define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
-#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum caculated */
+#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
 #define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
 #define E1000_RXD_STAT_IPCS     0x40    /* IP xsum calculated */
 #define E1000_RXD_STAT_PIF      0x80    /* passed in-exact filter */
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 788463f1b6..42216de6c9 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -365,13 +365,19 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
     } while (TRUE);
 
     /* Save the Buffer Descriptor Pointers to last bd that was not
-     * succesfully closed */
+     * successfully closed */
     etsec->regs[TBPTR0 + ring_nbr].value = bd_addr;
 
     /* Set transmit halt THLTx */
     etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr);
 }
 
+/*
+ * rx_init_frame() ensures we never do more padding than this
+ * (checksum plus minimum data packet size)
+ */
+#define MAX_RX_PADDING 64
+
 static void fill_rx_bd(eTSEC          *etsec,
                        eTSEC_rxtx_bd  *bd,
                        const uint8_t **buf,
@@ -380,9 +386,11 @@ static void fill_rx_bd(eTSEC          *etsec,
     uint16_t to_write;
     hwaddr   bufptr = bd->bufptr +
         ((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32);
-    uint8_t  padd[etsec->rx_padding];
+    uint8_t  padd[MAX_RX_PADDING];
     uint8_t  rem;
 
+    assert(etsec->rx_padding <= MAX_RX_PADDING);
+
     RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx
                " size:%zu(padding + crc:%u) + fcb:%u\n",
                bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size);
@@ -426,7 +434,7 @@ static void fill_rx_bd(eTSEC          *etsec,
         rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding);
 
         if (rem > 0) {
-            memset(padd, 0x0, sizeof(padd));
+            memset(padd, 0x0, rem);
             etsec->rx_padding -= rem;
             *size             -= rem;
             bd->length        += rem;
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index ab26f8bea1..6defa9d3a1 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -15,6 +15,7 @@
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "exec/address-spaces.h"
 #include "qemu/module.h"
 #include "trace.h"
 #include "i82596.h"
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 8b6b75c522..f6a5e2327b 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
     return false;
 }
 
+typedef struct E1000ERingInfo {
+    int dbah;
+    int dbal;
+    int dlen;
+    int dh;
+    int dt;
+    int idx;
+} E1000ERingInfo;
+
+static uint32_t
+igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
+{
+    return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
+}
+
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+           desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static inline bool
 igb_rss_enabled(IGBCore *core)
 {
@@ -694,24 +717,15 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
     return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
 }
 
-typedef struct E1000E_RingInfo_st {
-    int dbah;
-    int dbal;
-    int dlen;
-    int dh;
-    int dt;
-    int idx;
-} E1000E_RingInfo;
-
 static inline bool
-igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dh] == core->mac[r->dt] ||
                 core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 }
 
 static inline uint64_t
-igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
 {
     uint64_t bah = core->mac[r->dbah];
     uint64_t bal = core->mac[r->dbal];
@@ -720,13 +734,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline uint64_t
-igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
 {
     return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 }
 
 static inline void
-igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
+igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
 {
     core->mac[r->dh] += count;
 
@@ -736,7 +750,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
 }
 
 static inline uint32_t
-igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
 {
     trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
                                  core->mac[r->dh],  core->mac[r->dt]);
@@ -755,13 +769,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline bool
-igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen] > 0;
 }
 
 typedef struct IGB_TxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
     struct igb_tx *tx;
 } IGB_TxRing;
 
@@ -774,7 +788,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
 static inline void
 igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
         { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
         { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
@@ -800,13 +814,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 }
 
 typedef struct E1000E_RxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
 } E1000E_RxRing;
 
 static inline void
 igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
         { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
         { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
@@ -833,7 +847,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 static uint32_t
 igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
                      union e1000_adv_tx_desc *tx_desc,
-                     const E1000E_RingInfo *txi)
+                     const E1000ERingInfo *txi)
 {
     PCIDevice *d;
     uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
@@ -866,7 +880,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 }
 
 static inline bool
-igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
 {
     bool vmdq = core->mac[MRQC] & 1;
     uint16_t qn = txi->idx;
@@ -883,7 +897,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
     PCIDevice *d;
     dma_addr_t base;
     union e1000_adv_tx_desc desc;
-    const E1000E_RingInfo *txi = txr->i;
+    const E1000ERingInfo *txi = txr->i;
     uint32_t eic = 0;
 
     if (!igb_tx_enabled(core, txi)) {
@@ -918,7 +932,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 }
 
 static uint32_t
-igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
+igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
 {
     uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
     uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
@@ -930,7 +944,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static bool
-igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
+igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
 {
     uint32_t bufs = igb_ring_free_descr_num(core, r);
     uint32_t bufsize = igb_rxbufsize(core, r);
@@ -941,6 +955,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
                          bufsize;
 }
 
+static uint32_t
+igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+    return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+           E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
 void
 igb_start_recv(IGBCore *core)
 {
@@ -1225,21 +1247,77 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
 }
 
 static inline void
-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-                      hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                 hwaddr *buff_addr)
 {
     *buff_addr = le64_to_cpu(desc->read.pkt_addr);
 }
 
 static inline void
-igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                  hwaddr *buff_addr)
+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                hwaddr *buff_addr)
 {
+    buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+    buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGBBAState {
+    uint16_t written[IGB_MAX_PS_BUFFERS];
+    uint8_t cur_idx;
+} IGBBAState;
+
+typedef struct IGBSplitDescriptorData {
+    bool sph;
+    bool hbo;
+    size_t hdr_len;
+} IGBSplitDescriptorData;
+
+typedef struct IGBPacketRxDMAState {
+    size_t size;
+    size_t total_size;
+    size_t ps_hdr_len;
+    size_t desc_size;
+    size_t desc_offset;
+    uint32_t rx_desc_packet_buf_size;
+    uint32_t rx_desc_header_buf_size;
+    struct iovec *iov;
+    size_t iov_ofs;
+    bool do_ps;
+    bool is_first;
+    IGBBAState bastate;
+    hwaddr ba[IGB_MAX_PS_BUFFERS];
+    IGBSplitDescriptorData ps_desc_data;
+} IGBPacketRxDMAState;
+
+static inline void
+igb_read_rx_descr(IGBCore *core,
+                  union e1000_rx_desc_union *desc,
+                  IGBPacketRxDMAState *pdma_st,
+                  const E1000ERingInfo *r)
+{
+    uint32_t desc_type;
+
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
-    } else {
-        igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+        igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+        pdma_st->ba[0] = 0;
+        return;
     }
+
+    /* advanced header split descriptor */
+    if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+        return;
+    }
+
+    /* descriptor replication modes not supported */
+    desc_type = igb_rx_queue_desctyp_get(core, r);
+    if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+        trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
+    }
+
+    /* advanced single buffer descriptor */
+    igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+    pdma_st->ba[0] = 0;
 }
 
 static void
@@ -1281,15 +1359,11 @@ igb_verify_csum_in_sw(IGBCore *core,
 }
 
 static void
-igb_build_rx_metadata(IGBCore *core,
-                      struct NetRxPkt *pkt,
-                      bool is_eop,
-                      const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
-                      uint16_t *pkt_info, uint16_t *hdr_info,
-                      uint32_t *rss,
-                      uint32_t *status_flags,
-                      uint16_t *ip_id,
-                      uint16_t *vlan_tag)
+igb_build_rx_metadata_common(IGBCore *core,
+                             struct NetRxPkt *pkt,
+                             bool is_eop,
+                             uint32_t *status_flags,
+                             uint16_t *vlan_tag)
 {
     struct virtio_net_hdr *vhdr;
     bool hasip4, hasip6, csum_valid;
@@ -1298,7 +1372,6 @@ igb_build_rx_metadata(IGBCore *core,
     *status_flags = E1000_RXD_STAT_DD;
 
     /* No additional metadata needed for non-EOP descriptors */
-    /* TODO: EOP apply only to status so don't skip whole function. */
     if (!is_eop) {
         goto func_exit;
     }
@@ -1315,64 +1388,6 @@ igb_build_rx_metadata(IGBCore *core,
         trace_e1000e_rx_metadata_vlan(*vlan_tag);
     }
 
-    /* Packet parsing results */
-    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
-        if (rss_info->enabled) {
-            *rss = cpu_to_le32(rss_info->hash);
-            trace_igb_rx_metadata_rss(*rss);
-        }
-    } else if (hasip4) {
-            *status_flags |= E1000_RXD_STAT_IPIDV;
-            *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
-            trace_e1000e_rx_metadata_ip_id(*ip_id);
-    }
-
-    if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) {
-        *status_flags |= E1000_RXD_STAT_ACK;
-        trace_e1000e_rx_metadata_ack();
-    }
-
-    if (pkt_info) {
-        *pkt_info = rss_info->enabled ? rss_info->type : 0;
-
-        if (etqf < 8) {
-            *pkt_info |= (BIT(11) | etqf) << 4;
-        } else {
-            if (hasip4) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP4;
-            }
-
-            if (hasip6) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP6;
-            }
-
-            switch (l4hdr_proto) {
-            case ETH_L4_HDR_PROTO_TCP:
-                *pkt_info |= E1000_ADVRXD_PKT_TCP;
-                break;
-
-            case ETH_L4_HDR_PROTO_UDP:
-                *pkt_info |= E1000_ADVRXD_PKT_UDP;
-                break;
-
-            case ETH_L4_HDR_PROTO_SCTP:
-                *pkt_info |= E1000_ADVRXD_PKT_SCTP;
-                break;
-
-            default:
-                break;
-            }
-        }
-    }
-
-    if (hdr_info) {
-        *hdr_info = 0;
-    }
-
-    if (ts) {
-        *status_flags |= BIT(16);
-    }
-
     /* RX CSO information */
     if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
         trace_e1000e_rx_metadata_ipv6_sum_disabled();
@@ -1428,56 +1443,168 @@ func_exit:
 static inline void
 igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
                         struct NetRxPkt *pkt,
-                        const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
+                        const E1000E_RSSInfo *rss_info,
                         uint16_t length)
 {
-    uint32_t status_flags, rss;
-    uint16_t ip_id;
+    uint32_t status_flags;
 
     assert(!rss_info->enabled);
-    desc->length = cpu_to_le16(length);
-    desc->csum = 0;
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          NULL, NULL, &rss,
-                          &status_flags, &ip_id,
-                          &desc->special);
+    memset(desc, 0, sizeof(*desc));
+    desc->length = cpu_to_le16(length);
+    igb_build_rx_metadata_common(core, pkt, pkt != NULL,
+                                 &status_flags,
+                                 &desc->special);
+
     desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
     desc->status = (uint8_t) le32_to_cpu(status_flags);
 }
 
+static bool
+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
+static uint16_t
+igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
+{
+    uint16_t pkt_type;
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+
+    if (etqf < 8) {
+        pkt_type = BIT(11) | etqf;
+        return pkt_type;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
+        eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt);
+        pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E :
+                                               E1000_ADVRXD_PKT_IP6;
+    } else if (hasip4) {
+        pkt_type = E1000_ADVRXD_PKT_IP4;
+    } else {
+        pkt_type = 0;
+    }
+
+    switch (l4hdr_proto) {
+    case ETH_L4_HDR_PROTO_TCP:
+        pkt_type |= E1000_ADVRXD_PKT_TCP;
+        break;
+    case ETH_L4_HDR_PROTO_UDP:
+        pkt_type |= E1000_ADVRXD_PKT_UDP;
+        break;
+    case ETH_L4_HDR_PROTO_SCTP:
+        pkt_type |= E1000_ADVRXD_PKT_SCTP;
+        break;
+    default:
+        break;
+    }
+
+    return pkt_type;
+}
+
 static inline void
 igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
                        struct NetRxPkt *pkt,
                        const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
                        uint16_t length)
 {
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    uint16_t rss_type = 0, pkt_type;
+    bool eop = (pkt != NULL);
+    uint32_t adv_desc_status_error = 0;
     memset(&desc->wb, 0, sizeof(desc->wb));
 
     desc->wb.upper.length = cpu_to_le16(length);
+    igb_build_rx_metadata_common(core, pkt, eop,
+                                 &desc->wb.upper.status_error,
+                                 &desc->wb.upper.vlan);
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          &desc->wb.lower.lo_dword.pkt_info,
-                          &desc->wb.lower.lo_dword.hdr_info,
-                          &desc->wb.lower.hi_dword.rss,
-                          &desc->wb.upper.status_error,
-                          &desc->wb.lower.hi_dword.csum_ip.ip_id,
-                          &desc->wb.upper.vlan);
+    if (!eop) {
+        return;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
+        if (rss_info->enabled) {
+            desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash);
+            rss_type = rss_info->type;
+            trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type);
+        }
+    } else if (hasip4) {
+            adv_desc_status_error |= E1000_RXD_STAT_IPIDV;
+            desc->wb.lower.hi_dword.csum_ip.ip_id =
+                cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
+            trace_e1000e_rx_metadata_ip_id(
+                desc->wb.lower.hi_dword.csum_ip.ip_id);
+    }
+
+    if (ts) {
+        adv_desc_status_error |= BIT(16);
+    }
+
+    pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf);
+    trace_e1000e_rx_metadata_pkt_type(pkt_type);
+    desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
+    desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error);
 }
 
 static inline void
-igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                   struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
-                   uint16_t etqf, bool ts, uint16_t length)
+igb_write_adv_ps_rx_descr(IGBCore *core,
+                          union e1000_adv_rx_desc *desc,
+                          struct NetRxPkt *pkt,
+                          const E1000E_RSSInfo *rss_info,
+                          const E1000ERingInfo *r,
+                          uint16_t etqf,
+                          bool ts,
+                          IGBPacketRxDMAState *pdma_st)
+{
+    size_t pkt_len;
+    uint16_t hdr_info = 0;
+
+    if (pdma_st->do_ps) {
+        pkt_len = pdma_st->bastate.written[1];
+    } else {
+        pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1];
+    }
+
+    igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len);
+
+    hdr_info = (pdma_st->ps_desc_data.hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
+               E1000_ADVRXD_ADV_HDR_LEN_MASK;
+    hdr_info |= pdma_st->ps_desc_data.sph ? E1000_ADVRXD_HDR_SPH : 0;
+    desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info);
+
+    desc->wb.upper.status_error |= cpu_to_le32(
+        pdma_st->ps_desc_data.hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0);
+}
+
+static inline void
+igb_write_rx_descr(IGBCore *core,
+                   union e1000_rx_desc_union *desc,
+                   struct NetRxPkt *pkt,
+                   const E1000E_RSSInfo *rss_info,
+                   uint16_t etqf,
+                   bool ts,
+                   IGBPacketRxDMAState *pdma_st,
+                   const E1000ERingInfo *r)
 {
     if (igb_rx_use_legacy_descriptor(core)) {
         igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
-                                etqf, ts, length);
+                                pdma_st->bastate.written[1]);
+    } else if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts,
+                                  pdma_st);
     } else {
         igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
-                               etqf, ts, length);
+                               etqf, ts, pdma_st->bastate.written[1]);
     }
 }
 
@@ -1514,20 +1641,7 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
 }
 
 static void
-igb_write_to_rx_buffers(IGBCore *core,
-                        PCIDevice *d,
-                        hwaddr ba,
-                        uint16_t *written,
-                        const char *data,
-                        dma_addr_t data_len)
-{
-    trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
-    pci_dma_write(d, ba + *written, data, data_len);
-    *written += data_len;
-}
-
-static void
-igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
                     size_t pkt_size, size_t pkt_fcs_size)
 {
     eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
@@ -1545,12 +1659,256 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
 }
 
 static inline bool
-igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
 {
     return igb_ring_free_descr_num(core, rxi) ==
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
 }
 
+static bool
+igb_do_ps(IGBCore *core,
+          const E1000ERingInfo *r,
+          struct NetRxPkt *pkt,
+          IGBPacketRxDMAState *pdma_st)
+{
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    bool fragment;
+    bool split_always;
+    size_t bheader_size;
+    size_t total_pkt_len;
+
+    if (!igb_rx_use_ps_descriptor(core, r)) {
+        return false;
+    }
+
+    total_pkt_len = net_rx_pkt_get_total_len(pkt);
+    bheader_size = igb_rxhdrbufsize(core, r);
+    split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (split_always && total_pkt_len <= bheader_size) {
+        pdma_st->ps_hdr_len = total_pkt_len;
+        pdma_st->ps_desc_data.hdr_len = total_pkt_len;
+        return true;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip4) {
+        fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+    } else if (hasip6) {
+        fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+    } else {
+        pdma_st->ps_desc_data.hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+        pdma_st->ps_desc_data.hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    /* no header splitting for SCTP */
+    if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
+                      l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
+        pdma_st->ps_hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+    } else {
+        pdma_st->ps_hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+    }
+
+    pdma_st->ps_desc_data.sph = true;
+    pdma_st->ps_desc_data.hdr_len = pdma_st->ps_hdr_len;
+
+    if (pdma_st->ps_hdr_len > bheader_size) {
+        pdma_st->ps_desc_data.hbo = true;
+        goto header_not_handled;
+    }
+
+    return true;
+
+header_not_handled:
+    if (split_always) {
+        pdma_st->ps_hdr_len = bheader_size;
+        return true;
+    }
+
+    return false;
+}
+
+static void
+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+    if (pdma_st->do_ps && pdma_st->is_first) {
+        if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
+            *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
+        }
+    } else {
+        if (*size > pdma_st->rx_desc_packet_buf_size) {
+            *size = pdma_st->rx_desc_packet_buf_size;
+        }
+    }
+}
+
+static inline void
+igb_write_hdr_frag_to_rx_buffers(IGBCore *core,
+                                 PCIDevice *d,
+                                 IGBPacketRxDMAState *pdma_st,
+                                 const char *data,
+                                 dma_addr_t data_len)
+{
+    assert(data_len <= pdma_st->rx_desc_header_buf_size -
+                       pdma_st->bastate.written[0]);
+    pci_dma_write(d,
+                  pdma_st->ba[0] + pdma_st->bastate.written[0],
+                  data, data_len);
+    pdma_st->bastate.written[0] += data_len;
+    pdma_st->bastate.cur_idx = 1;
+}
+
+static void
+igb_write_header_to_rx_buffers(IGBCore *core,
+                               struct NetRxPkt *pkt,
+                               PCIDevice *d,
+                               IGBPacketRxDMAState *pdma_st,
+                               size_t *copy_size)
+{
+    size_t iov_copy;
+    size_t ps_hdr_copied = 0;
+
+    if (!pdma_st->is_first) {
+        /* Leave buffer 0 of each descriptor except first */
+        /* empty                                          */
+        pdma_st->bastate.cur_idx = 1;
+        return;
+    }
+
+    do {
+        iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
+                       pdma_st->iov->iov_len - pdma_st->iov_ofs);
+
+        igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st,
+                                         pdma_st->iov->iov_base,
+                                         iov_copy);
+
+        *copy_size -= iov_copy;
+        ps_hdr_copied += iov_copy;
+
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    } while (ps_hdr_copied < pdma_st->ps_hdr_len);
+
+    pdma_st->is_first = false;
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+                                     PCIDevice *d,
+                                     IGBPacketRxDMAState *pdma_st,
+                                     const char *data,
+                                     dma_addr_t data_len)
+{
+    while (data_len > 0) {
+        assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS);
+
+        uint32_t cur_buf_bytes_left =
+            pdma_st->rx_desc_packet_buf_size -
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx];
+        uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
+
+        trace_igb_rx_desc_buff_write(
+            pdma_st->bastate.cur_idx,
+            pdma_st->ba[pdma_st->bastate.cur_idx],
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+            data,
+            bytes_to_write);
+
+        pci_dma_write(d,
+                      pdma_st->ba[pdma_st->bastate.cur_idx] +
+                      pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+                      data, bytes_to_write);
+
+        pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write;
+        data += bytes_to_write;
+        data_len -= bytes_to_write;
+
+        if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] ==
+            pdma_st->rx_desc_packet_buf_size) {
+            pdma_st->bastate.cur_idx++;
+        }
+    }
+}
+
+static void
+igb_write_payload_to_rx_buffers(IGBCore *core,
+                                struct NetRxPkt *pkt,
+                                PCIDevice *d,
+                                IGBPacketRxDMAState *pdma_st,
+                                size_t *copy_size)
+{
+    static const uint32_t fcs_pad;
+    size_t iov_copy;
+
+    /* Copy packet payload */
+    while (*copy_size) {
+        iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st,
+                                             pdma_st->iov->iov_base +
+                                             pdma_st->iov_ofs,
+                                             iov_copy);
+
+        *copy_size -= iov_copy;
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    }
+
+    if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
+        /* Simulate FCS checksum presence in the last descriptor */
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st,
+                                             (const char *) &fcs_pad,
+                                             e1000x_fcs_len(core->mac));
+    }
+}
+
+static void
+igb_write_to_rx_buffers(IGBCore *core,
+                        struct NetRxPkt *pkt,
+                        PCIDevice *d,
+                        IGBPacketRxDMAState *pdma_st)
+{
+    size_t copy_size;
+
+    if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) {
+        /* as per intel docs; skip descriptors with null buf addr */
+        trace_e1000e_rx_null_descriptor();
+        return;
+    }
+
+    if (pdma_st->desc_offset >= pdma_st->size) {
+        return;
+    }
+
+    pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
+    copy_size = pdma_st->size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &copy_size);
+
+    /* For PS mode copy the packet header first */
+    if (pdma_st->do_ps) {
+        igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+    } else {
+        pdma_st->bastate.cur_idx = 1;
+    }
+
+    igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+}
+
 static void
 igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
                           const E1000E_RxRing *rxr,
@@ -1560,95 +1918,61 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
     PCIDevice *d;
     dma_addr_t base;
     union e1000_rx_desc_union desc;
-    size_t desc_size;
-    size_t desc_offset = 0;
-    size_t iov_ofs = 0;
+    const E1000ERingInfo *rxi;
+    size_t rx_desc_len;
 
-    struct iovec *iov = net_rx_pkt_get_iovec(pkt);
-    size_t size = net_rx_pkt_get_total_len(pkt);
-    size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000E_RingInfo *rxi = rxr->i;
-    size_t bufsize = igb_rxbufsize(core, rxi);
+    IGBPacketRxDMAState pdma_st = {0};
+    pdma_st.is_first = true;
+    pdma_st.size = net_rx_pkt_get_total_len(pkt);
+    pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
 
+    rxi = rxr->i;
+    rx_desc_len = core->rx_desc_len;
+    pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi);
+    pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi);
+    pdma_st.iov = net_rx_pkt_get_iovec(pkt);
     d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
     if (!d) {
         d = core->owner;
     }
 
+    pdma_st.do_ps = igb_do_ps(core, rxi, pkt, &pdma_st);
+
     do {
-        hwaddr ba;
-        uint16_t written = 0;
+        memset(&pdma_st.bastate, 0, sizeof(IGBBAState));
         bool is_last = false;
 
-        desc_size = total_size - desc_offset;
-
-        if (desc_size > bufsize) {
-            desc_size = bufsize;
-        }
-
         if (igb_ring_empty(core, rxi)) {
             return;
         }
 
         base = igb_ring_head_descr(core, rxi);
+        pci_dma_read(d, base, &desc, rx_desc_len);
+        trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
 
-        pci_dma_read(d, base, &desc, core->rx_desc_len);
+        igb_read_rx_descr(core, &desc, &pdma_st, rxi);
 
-        trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
-
-        igb_read_rx_descr(core, &desc, &ba);
-
-        if (ba) {
-            if (desc_offset < size) {
-                static const uint32_t fcs_pad;
-                size_t iov_copy;
-                size_t copy_size = size - desc_offset;
-                if (copy_size > bufsize) {
-                    copy_size = bufsize;
-                }
-
-                /* Copy packet payload */
-                while (copy_size) {
-                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
-
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                                            iov->iov_base + iov_ofs, iov_copy);
-
-                    copy_size -= iov_copy;
-                    iov_ofs += iov_copy;
-                    if (iov_ofs == iov->iov_len) {
-                        iov++;
-                        iov_ofs = 0;
-                    }
-                }
-
-                if (desc_offset + desc_size >= total_size) {
-                    /* Simulate FCS checksum presence in the last descriptor */
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                          (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
-                }
-            }
-        } else { /* as per intel docs; skip descriptors with null buf addr */
-            trace_e1000e_rx_null_descriptor();
-        }
-        desc_offset += desc_size;
-        if (desc_offset >= total_size) {
+        igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
+        pdma_st.desc_offset += pdma_st.desc_size;
+        if (pdma_st.desc_offset >= pdma_st.total_size) {
             is_last = true;
         }
 
-        igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
-                           rss_info, etqf, ts, written);
-        igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
+        igb_write_rx_descr(core, &desc,
+                           is_last ? pkt : NULL,
+                           rss_info,
+                           etqf, ts,
+                           &pdma_st,
+                           rxi);
+        igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len);
+        igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN);
+    } while (pdma_st.desc_offset < pdma_st.total_size);
 
-        igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
-
-    } while (desc_offset < total_size);
-
-    igb_update_rx_stats(core, rxi, size, total_size);
+    igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
 }
 
 static bool
-igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
 {
     if (core->mac[MRQC] & 1) {
         uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
@@ -2753,7 +3077,7 @@ igb_update_rx_offloads(IGBCore *core)
 
     if (core->has_vnet) {
         qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
-                         cso_state, 0, 0, 0, 0);
+                         cso_state, 0, 0, 0, 0, 0, 0);
     }
 }
 
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 82ff195dfc..e5a47eab64 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -364,7 +364,7 @@ union e1000_adv_rx_desc {
 /* Indicates that VF is still clear to send requests */
 #define E1000_VT_MSGTYPE_CTS 0x20000000
 #define E1000_VT_MSGINFO_SHIFT 16
-/* bits 23:16 are used for exra info for certain messages */
+/* bits 23:16 are used for extra info for certain messages */
 #define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT)
 
 #define E1000_VF_RESET                 0x01 /* VF requests reset */
@@ -452,6 +452,7 @@ union e1000_adv_rx_desc {
 #define E1000_SRRCTL_BSIZEHDRSIZE_MASK         0x00000F00
 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT        2  /* Shift _left_ */
 #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF       0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT        0x04000000
 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
 #define E1000_SRRCTL_DESCTYPE_MASK             0x0E000000
 #define E1000_SRRCTL_DROP_EN                   0x80000000
@@ -490,7 +491,7 @@ union e1000_adv_rx_desc {
 #define E1000_VF_MBX_INIT_DELAY   500  /* usec delay between retries */
 
 #define E1000_VT_MSGINFO_SHIFT    16
-/* bits 23:16 are used for exra info for certain messages */
+/* bits 23:16 are used for extra info for certain messages */
 #define E1000_VT_MSGINFO_MASK     (0xFF << E1000_VT_MSGINFO_SHIFT)
 
 #define E1000_VF_RESET            0x01 /* VF requests reset */
@@ -692,11 +693,20 @@ union e1000_adv_rx_desc {
 
 #define E1000_STATUS_NUM_VFS_SHIFT 14
 
-#define E1000_ADVRXD_PKT_IP4 BIT(4)
-#define E1000_ADVRXD_PKT_IP6 BIT(6)
-#define E1000_ADVRXD_PKT_TCP BIT(8)
-#define E1000_ADVRXD_PKT_UDP BIT(9)
-#define E1000_ADVRXD_PKT_SCTP BIT(10)
+#define E1000_ADVRXD_PKT_IP4  BIT(0)
+#define E1000_ADVRXD_PKT_IP6  BIT(2)
+#define E1000_ADVRXD_PKT_IP6E BIT(3)
+#define E1000_ADVRXD_PKT_TCP  BIT(4)
+#define E1000_ADVRXD_PKT_UDP  BIT(5)
+#define E1000_ADVRXD_PKT_SCTP BIT(6)
+
+#define IGB_MAX_PS_BUFFERS 2
+
+#define E1000_ADVRXD_HDR_LEN_OFFSET    (21 - 16)
+#define E1000_ADVRXD_ADV_HDR_LEN_MASK  ((BIT(10) - 1) << \
+                                        E1000_ADVRXD_HDR_LEN_OFFSET)
+#define E1000_ADVRXD_HDR_SPH           BIT(15)
+#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
 
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
diff --git a/hw/net/mcf_fec.c b/hw/net/mcf_fec.c
index 8aa27bd322..ec3ddf520a 100644
--- a/hw/net/mcf_fec.c
+++ b/hw/net/mcf_fec.c
@@ -571,7 +571,7 @@ static ssize_t mcf_fec_receive(NetClientState *nc, const uint8_t *buf, size_t si
     size += 4;
     crc = cpu_to_be32(crc32(~0, buf, size));
     crc_ptr = (uint8_t *)&crc;
-    /* Huge frames are truncted.  */
+    /* Huge frames are truncated.  */
     if (size > FEC_MAX_FRAME_SIZE) {
         size = FEC_MAX_FRAME_SIZE;
         flags |= FEC_BD_TR | FEC_BD_LG;
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index cbeed65bd5..9afd0c5e3f 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -134,7 +134,7 @@ static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov,
     FpPort *port = qemu_get_nic_opaque(nc);
 
     /* If the port is disabled, we want to drop this pkt
-     * now rather than queing it for later.  We don't want
+     * now rather than queueing it for later.  We don't want
      * any stale pkts getting into the device when the port
      * transitions to enabled.
      */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index dfe4754469..5e16056be6 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -1043,7 +1043,7 @@ static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id)
 static ssize_t of_dpa_ig(World *world, uint32_t pport,
                          const struct iovec *iov, int iovcnt)
 {
-    struct iovec iov_copy[iovcnt + 2];
+    g_autofree struct iovec *iov_copy = g_new(struct iovec, iovcnt + 2);
     OfDpaFlowContext fc = {
         .of_dpa = world_private(world),
         .in_pport = pport,
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index b4df75b2c9..4525fda383 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -100,7 +100,7 @@ enum RTL8139_registers {
     MAC0 = 0,        /* Ethernet hardware address. */
     MAR0 = 8,        /* Multicast filter. */
     TxStatus0 = 0x10,/* Transmit status (Four 32bit registers). C mode only */
-                     /* Dump Tally Conter control register(64bit). C+ mode only */
+                     /* Dump Tally Counter control register(64bit). C+ mode only */
     TxAddr0 = 0x20,  /* Tx descriptors (also four 32bit). */
     RxBuf = 0x30,
     ChipCmd = 0x37,
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index ad778cd8fc..ddbceda967 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -361,7 +361,7 @@ static void smc91c111_writeb(void *opaque, hwaddr offset,
         case 4: case 5: case 6: case 7: case 8: case 9: /* IA */
             /* Not implemented.  */
             return;
-        case 10: /* Genral Purpose */
+        case 10: /* General Purpose */
             SET_LOW(gpr, value);
             return;
         case 11:
diff --git a/hw/net/sungem.c b/hw/net/sungem.c
index 510b370e5f..c2e2c90668 100644
--- a/hw/net/sungem.c
+++ b/hw/net/sungem.c
@@ -1228,7 +1228,7 @@ static void sungem_mmio_mif_write(void *opaque, hwaddr addr, uint64_t val,
     case MIF_SMACHINE:
         return; /* No actual write */
     case MIF_CFG:
-        /* Maintain the RO MDI bits to advertize an MDIO PHY on MDI0 */
+        /* Maintain the RO MDI bits to advertise an MDIO PHY on MDI0 */
         val &= ~MIF_CFG_MDI1;
         val |= MIF_CFG_MDI0;
         break;
diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
index 391d26fb82..64d4ea5850 100644
--- a/hw/net/sunhme.c
+++ b/hw/net/sunhme.c
@@ -901,7 +901,7 @@ static void sunhme_reset(DeviceState *ds)
     /* Configure internal transceiver */
     s->mifregs[HME_MIFI_CFG >> 2] |= HME_MIF_CFG_MDI0;
 
-    /* Advetise auto, 100Mbps FD */
+    /* Advertise auto, 100Mbps FD */
     s->miiregs[MII_ANAR] = MII_ANAR_TXFD;
     s->miiregs[MII_BMSR] = MII_BMSR_AUTONEG | MII_BMSR_100TX_FD |
                            MII_BMSR_AN_COMP;
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 6b5ba669a2..3abfd65e5b 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -278,9 +278,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
 
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
-igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
+igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
 
 igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
 igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
@@ -295,6 +295,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
 igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
 igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"
 
+igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d"
+
 # igbvf.c
 igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6b958d6363..57427a3997 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
     VIRTIO_F_RING_RESET,
     VIRTIO_NET_F_RSS,
     VIRTIO_NET_F_HASH_REPORT,
+    VIRTIO_NET_F_GUEST_USO4,
+    VIRTIO_NET_F_GUEST_USO6,
+    VIRTIO_NET_F_HOST_USO,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7102ec4817..5a0201c423 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
     return n->has_ufo;
 }
 
+static int peer_has_uso(VirtIONet *n)
+{
+    if (!peer_has_vnet_hdr(n)) {
+        return 0;
+    }
+
+    return qemu_has_uso(qemu_get_queue(n->nic)->peer);
+}
+
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
                                        int version_1, int hash_report)
 {
@@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 
+        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+
         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
     }
 
@@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
     }
 
+    if (!peer_has_uso(n)) {
+        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+    }
+
     if (!get_vhost_net(nc->peer)) {
         return features;
     }
@@ -859,17 +878,21 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
-            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
-static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
+static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
 {
     static const uint64_t guest_offloads_mask =
         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
-        (1ULL << VIRTIO_NET_F_GUEST_UFO);
+        (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
+        (1ULL << VIRTIO_NET_F_GUEST_USO4) |
+        (1ULL << VIRTIO_NET_F_GUEST_USO6);
 
     return guest_offloads_mask & features;
 }
@@ -1307,7 +1330,7 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
 static bool virtio_net_load_ebpf(VirtIONet *n)
 {
     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
-        /* backend does't support steering ebpf */
+        /* backend doesn't support steering ebpf */
         return false;
     }
 
@@ -2046,7 +2069,7 @@ static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
                                         + sizeof(struct ip6_header));
     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
 
-    /* There is a difference between payload lenght in ipv4 and v6,
+    /* There is a difference between payload length in ipv4 and v6,
        ip header is excluded in ipv6 */
     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
 }
@@ -3795,7 +3818,7 @@ static void virtio_net_instance_init(Object *obj)
 
     /*
      * The default config_size is sizeof(struct virtio_net_config).
-     * Can be overriden with virtio_net_set_config_size.
+     * Can be overridden with virtio_net_set_config_size.
      */
     n->config_size = sizeof(struct virtio_net_config);
     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
@@ -3922,6 +3945,12 @@ static Property virtio_net_properties[] = {
     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+    DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
+                      VIRTIO_NET_F_GUEST_USO4, true),
+    DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
+                      VIRTIO_NET_F_GUEST_USO6, true),
+    DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
+                      VIRTIO_NET_F_HOST_USO, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 5dfacb1098..1b48d7743e 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
                          s->lro_supported,
                          s->lro_supported,
                          0,
+                         0,
+                         0,
                          0);
     }
 }
@@ -1439,7 +1441,10 @@ static void vmxnet3_activate_device(VMXNET3State *s)
     vmxnet3_setup_rx_filtering(s);
     /* Cache fields from shared memory */
     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
-    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu <= VMXNET3_MAX_MTU);
+    if (s->mtu < VMXNET3_MIN_MTU || s->mtu > VMXNET3_MAX_MTU) {
+        qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad MTU size: %u\n", s->mtu);
+        return;
+    }
     VMW_CFPRN("MTU is %u", s->mtu);
 
     s->max_rx_frags =
@@ -1884,7 +1889,7 @@ vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
             break;
 
         default:
-            VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
+            VMW_CBPRN("Unknown read BAR1[%" PRIx64 "], %d bytes", addr, size);
             break;
         }
 
diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index bf4f6de74a..f9283f9e7b 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -733,7 +733,7 @@ struct Vmxnet3_TxQueueDesc {
 struct Vmxnet3_RxQueueDesc {
     struct Vmxnet3_RxQueueCtrl        ctrl;
     struct Vmxnet3_RxQueueConf        conf;
-    /* Driver read after a GET commad */
+    /* Driver read after a GET command */
     struct Vmxnet3_QueueStatus        status;
     struct UPT1_RxStats            stats;
     u8                      __pad[88]; /* 128 aligned */
diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c
index b30a7b1efb..cd75803fc2 100644
--- a/hw/nios2/boot.c
+++ b/hw/nios2/boot.c
@@ -148,16 +148,11 @@ void nios2_load_kernel(Nios2CPU *cpu, hwaddr ddr_base,
     if (kernel_filename) {
         int kernel_size, fdt_size;
         uint64_t entry, high;
-        int big_endian = 0;
-
-#if TARGET_BIG_ENDIAN
-        big_endian = 1;
-#endif
 
         /* Boots a kernel elf binary. */
         kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
                                &entry, NULL, &high, NULL,
-                               big_endian, EM_ALTERA_NIOS2, 0, 0);
+                               TARGET_BIG_ENDIAN, EM_ALTERA_NIOS2, 0, 0);
         if ((uint32_t)entry == 0xc0000000) {
             /*
              * The Nios II processor reference guide documents that the
@@ -168,7 +163,7 @@ void nios2_load_kernel(Nios2CPU *cpu, hwaddr ddr_base,
             kernel_size = load_elf(kernel_filename, NULL,
                                    translate_kernel_address, NULL,
                                    &entry, NULL, NULL, NULL,
-                                   big_endian, EM_ALTERA_NIOS2, 0, 0);
+                                   TARGET_BIG_ENDIAN, EM_ALTERA_NIOS2, 0, 0);
             boot_info.bootstrap_pc = ddr_base + 0xc0000000 +
                 (entry & 0x07ffffff);
         } else {
diff --git a/hw/nubus/trace-events b/hw/nubus/trace-events
index e31833d694..9259d66725 100644
--- a/hw/nubus/trace-events
+++ b/hw/nubus/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # nubus-bus.c
 nubus_slot_read(uint64_t addr, int size) "reading unassigned addr 0x%"PRIx64 " size %d"
diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index 8ac90942e5..cfa2ab0f9d 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -1,4 +1,4 @@
 config NVME_PCI
     bool
-    default y if PCI_DEVICES
+    default y if PCI_DEVICES || PCIE_DEVICES
     depends on PCI
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 539d273553..f026245d1e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -17,7 +17,7 @@
  * Notes on coding style
  * ---------------------
  * While QEMU coding style prefers lowercase hexadecimals in constants, the
- * NVMe subsystem use thes format from the NVMe specifications in the comments
+ * NVMe subsystem use this format from the NVMe specifications in the comments
  * (i.e. 'h' suffix instead of '0x' prefix).
  *
  * Usage
@@ -730,7 +730,7 @@ static inline void nvme_sg_unmap(NvmeSg *sg)
 }
 
 /*
- * When metadata is transfered as extended LBAs, the DPTR mapped into `sg`
+ * When metadata is transferred as extended LBAs, the DPTR mapped into `sg`
  * holds both data and metadata. This function splits the data and metadata
  * into two separate QSG/IOVs.
  */
@@ -894,7 +894,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
     len -= trans_len;
     if (len) {
         if (len > n->page_size) {
-            uint64_t prp_list[n->max_prp_ents];
+            g_autofree uint64_t *prp_list = g_new(uint64_t, n->max_prp_ents);
             uint32_t nents, prp_trans;
             int i = 0;
 
@@ -1045,7 +1045,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
      * descriptors and segment chain) than the command transfer size, so it is
      * not bounded by MDTS.
      */
-    const int SEG_CHUNK_SIZE = 256;
+#define SEG_CHUNK_SIZE 256
 
     NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
     uint64_t nsgld;
@@ -2130,11 +2130,6 @@ static inline bool nvme_is_write(NvmeRequest *req)
            rw->opcode == NVME_CMD_WRITE_ZEROES;
 }
 
-static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
-{
-    return qemu_get_aio_context();
-}
-
 static void nvme_misc_cb(void *opaque, int ret)
 {
     NvmeRequest *req = opaque;
@@ -3302,7 +3297,6 @@ static void nvme_flush_cancel(BlockAIOCB *acb)
 static const AIOCBInfo nvme_flush_aiocb_info = {
     .aiocb_size = sizeof(NvmeFlushAIOCB),
     .cancel_async = nvme_flush_cancel,
-    .get_aio_context = nvme_get_aio_context,
 };
 
 static void nvme_do_flush(NvmeFlushAIOCB *iocb);
@@ -6478,7 +6472,6 @@ static void nvme_format_cancel(BlockAIOCB *aiocb)
 static const AIOCBInfo nvme_format_aiocb_info = {
     .aiocb_size = sizeof(NvmeFormatAIOCB),
     .cancel_async = nvme_format_cancel,
-    .get_aio_context = nvme_get_aio_context,
 };
 
 static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
@@ -7594,7 +7587,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
             /*
              * NVM Express v1.3d, Section 4.1 state: "If host software writes
              * an invalid value to the Submission Queue Tail Doorbell or
-             * Completion Queue Head Doorbell regiter and an Asynchronous Event
+             * Completion Queue Head Doorbell register and an Asynchronous Event
              * Request command is outstanding, then an asynchronous event is
              * posted to the Admin Completion Queue with a status code of
              * Invalid Doorbell Write Value."
diff --git a/hw/nvram/eeprom_at24c.c b/hw/nvram/eeprom_at24c.c
index 613c4929e3..3272068663 100644
--- a/hw/nvram/eeprom_at24c.c
+++ b/hw/nvram/eeprom_at24c.c
@@ -51,7 +51,7 @@ struct EEPROMState {
     bool writable;
     /* cells changed since last START? */
     bool changed;
-    /* during WRITE, # of address bytes transfered */
+    /* during WRITE, # of address bytes transferred */
     uint8_t haveaddr;
 
     uint8_t *mem;
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 29a5bef1d5..4e4524673a 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -877,7 +877,7 @@ static struct {
 /*
  * Any sub-page size update to these table MRs will be lost during migration,
  * as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
- * In order to avoid the inconsistency in sizes save them seperately and
+ * In order to avoid the inconsistency in sizes save them separately and
  * migrate over in vmstate post_load().
  */
 static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 54f507318f..5a2b749c8e 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -42,7 +42,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
                                        CXL2_DOWNSTREAM_PORT);
 }
 
-/* TODO: Look at sharing this code acorss all CXL port types */
+/* TODO: Look at sharing this code across all CXL port types */
 static void cxl_dsp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
                                       uint32_t val, int len)
 {
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index 9159f48a8c..2b9cf0cc97 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -262,7 +262,7 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
                 .length = sslbis_size,
             },
             .data_type = HMATLB_DATA_TYPE_ACCESS_BANDWIDTH,
-            .entry_base_unit = 1000,
+            .entry_base_unit = 1024,
         },
     };
 
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 613857b601..535889f7c2 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -263,7 +263,7 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
 
     /*
      * First carry out normal swizzle to handle
-     * multple root ports on a pxb instance.
+     * multiple root ports on a pxb instance.
      */
     pin = pci_swizzle_map_irq_fn(pci_dev, pin);
 
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 4701481b9b..ee6cb85e97 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -62,7 +62,7 @@
 #define DPRINTF(fmt, ...)
 #endif
 
-/* from linux soure code. include/asm-mips/mips-boards/bonito64.h*/
+/* from linux source code. include/asm-mips/mips-boards/bonito64.h*/
 #define BONITO_BOOT_BASE        0x1fc00000
 #define BONITO_BOOT_SIZE        0x00100000
 #define BONITO_BOOT_TOP         (BONITO_BOOT_BASE + BONITO_BOOT_SIZE - 1)
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 388d252ee2..6f5442f108 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -488,7 +488,7 @@ static void designware_pcie_root_realize(PCIDevice *dev, Error **errp)
 
     /*
      * If no inbound iATU windows are configured, HW defaults to
-     * letting inbound TLPs to pass in. We emulate that by exlicitly
+     * letting inbound TLPs to pass in. We emulate that by explicitly
      * configuring first inbound window to cover all of target's
      * address space.
      *
@@ -503,7 +503,7 @@ static void designware_pcie_root_realize(PCIDevice *dev, Error **errp)
                           &designware_pci_host_msi_ops,
                           root, "pcie-msi", 0x4);
     /*
-     * We initially place MSI interrupt I/O region a adress 0 and
+     * We initially place MSI interrupt I/O region at address 0 and
      * disable it. It'll be later moved to correct offset and enabled
      * in designware_pcie_root_update_msi_mapping() as a part of
      * initialization done by guest OS
diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c
index e8eaebca54..82503229fa 100644
--- a/hw/pci-host/dino.c
+++ b/hw/pci-host/dino.c
@@ -1,5 +1,5 @@
 /*
- * HP-PARISC Dino PCI chipset emulation, as in B160L and similiar machines
+ * HP-PARISC Dino PCI chipset emulation, as in B160L and similar machines
  *
  * (C) 2017-2019 by Helge Deller <deller@gmx.de>
  *
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 7c7316bc96..1092dc3b70 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -177,7 +177,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
             acpi_dsdt_add_pci_route_table(dev, cfg->irq);
 
             /*
-             * Resources defined for PXBs are composed by the folling parts:
+             * Resources defined for PXBs are composed of the following parts:
              * 1. The resources the pci-brige/pcie-root-port need.
              * 2. The resources the devices behind pxb need.
              */
diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c
index 82c15edb46..143bf053d7 100644
--- a/hw/pci-host/gt64120.c
+++ b/hw/pci-host/gt64120.c
@@ -331,9 +331,9 @@ static void gt64120_update_pci_cfgdata_mapping(GT64120State *s)
     /*
      * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal
      * Command Register determines how data transactions from the CPU to/from
-     * PCI are handled along with the setting of the Endianess bit in the CPU
+     * PCI are handled along with the setting of the Endianness bit in the CPU
      * Configuration Register. See:
-     * - Table 16: 32-bit PCI Transaction Endianess
+     * - Table 16: 32-bit PCI Transaction Endianness
      * - Table 158: PCI_0 Command, Offset: 0xc00
      */
 
diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index 82332d7a05..157c00782c 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -25,7 +25,7 @@
  * state associated with the child has an id, use it as QOM id.
  * Otherwise use object_typename[index] as QOM id.
  *
- * This helper does both operations at the same time because seting
+ * This helper does both operations at the same time because setting
  * a new QOM child will erase the bus parent of the device. This happens
  * because object_unparent() will call object_property_del_child(),
  * which in turn calls the property release callback prop->release if
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 7a21497cf8..c5e58f4086 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -757,7 +757,7 @@ static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
          * We only support non-translate in top window.
          *
          * TODO: Venice/Murano support it on bottom window above 4G and
-         * Naples suports it on everything
+         * Naples supports it on everything
          */
         if (!(tve & PPC_BIT(51))) {
             phb3_error(phb, "xlate for invalid non-translate TVE");
diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
index 41e63b066f..dc8d8637f2 100644
--- a/hw/pci-host/pnv_phb3_msi.c
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -281,7 +281,7 @@ static void phb3_msi_instance_init(Object *obj)
                              object_property_allow_set_link,
                              OBJ_PROP_LINK_STRONG);
 
-    /* Will be overriden later */
+    /* Will be overridden later */
     ics->offset = 0;
 }
 
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 6232cbeee1..29cb11a5d9 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -207,7 +207,7 @@ static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
     }
 
-    /* TODO: Figure out how to implemet/decode AOMASK */
+    /* TODO: Figure out how to implement/decode AOMASK */
 
     /* Check if it matches an enabled MMIO region in the PEC stack */
     if (memory_region_is_mapped(&phb->mmbar0) &&
@@ -391,7 +391,7 @@ static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
     case IODA3_TBL_MBT:
         *tptr = val;
 
-        /* Copy accross the valid bit to the other half */
+        /* Copy across the valid bit to the other half */
         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
 
@@ -1408,7 +1408,7 @@ static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
         return;
     }
 
-    /* TODO: check PE/MSI assignement */
+    /* TODO: check PE/MSI assignment */
 
     qemu_irq_pulse(phb->qirqs[src]);
 }
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 374d593ead..b68c7ecb49 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -324,7 +324,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
          * it isn't implemented in qemu right now.
          * So just discard the error for now.
          * OS which cares of aer would receive errors via
-         * native aer mechanims, so this wouldn't matter.
+         * native aer mechanisms, so this wouldn't matter.
          */
     }
 
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index e7bc7192f1..df7f370111 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -615,7 +615,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar,
     }
     if (nslots > SHPC_MAX_SLOTS ||
         SHPC_IDX_TO_PCI(nslots) > PCI_SLOT_MAX) {
-        /* TODO: report an error mesage that makes sense. */
+        /* TODO: report an error message that makes sense. */
         return -EINVAL;
     }
     shpc->nslots = nslots;
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 67793a86f1..d5b6820d1d 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -712,7 +712,7 @@ static int ppce500_prep_device_tree(PPCE500MachineState *machine,
     p->kernel_base = kernel_base;
     p->kernel_size = kernel_size;
 
-    qemu_register_reset(ppce500_reset_device_tree, p);
+    qemu_register_reset_nosnapshotload(ppce500_reset_device_tree, p);
     p->notifier.notify = ppce500_init_notify;
     qemu_add_machine_init_done_notifier(&p->notifier);
 
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 510ff0eaaf..9acc7adfc9 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -81,6 +81,7 @@ static void ppc_heathrow_reset(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
 
+    cpu_ppc_tb_reset(&cpu->env);
     cpu_reset(CPU(cpu));
 }
 
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index a313d4b964..7c2c52434a 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -36,7 +36,6 @@ ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(
 ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
 ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
   'spapr_pci_vfio.c',
-  'spapr_pci_nvlink2.c'
 ))
 
 # IBM PowerNV
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 075367d94d..bd397cf2b5 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -99,6 +99,7 @@ static void pegasos2_cpu_reset(void *opaque)
         cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
         cpu->env.nip = 0x100;
     }
+    cpu_ppc_tb_reset(&cpu->env);
 }
 
 static void pegasos2_pci_irq(void *opaque, int n, int level)
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 9b39d527de..8c7afe037f 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -61,6 +61,8 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
     hreg_compute_hflags(env);
     ppc_maybe_interrupt(env);
 
+    cpu_ppc_tb_reset(env);
+
     pcc->intc_reset(pc->chip, cpu);
 }
 
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 0e0a3d93c3..be167710a3 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -32,6 +32,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
+#include "sysemu/replay.h"
 #include "sysemu/runstate.h"
 #include "kvm_ppc.h"
 #include "migration/vmstate.h"
@@ -58,7 +59,9 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level)
 
     if (old_pending != env->pending_interrupts) {
         ppc_maybe_interrupt(env);
-        kvmppc_set_interrupt(cpu, irq, level);
+        if (kvm_enabled()) {
+            kvmppc_set_interrupt(cpu, irq, level);
+        }
     }
 
     trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts,
@@ -482,10 +485,32 @@ void ppce500_set_mpic_proxy(bool enabled)
 /*****************************************************************************/
 /* PowerPC time base and decrementer emulation */
 
+/*
+ * Conversion between QEMU_CLOCK_VIRTUAL ns and timebase (TB) ticks:
+ * TB ticks are arrived at by multiplying tb_freq then dividing by
+ * ns per second, and rounding down. TB ticks drive all clocks and
+ * timers in the target machine.
+ *
+ * Converting TB intervals to ns for the purpose of setting a
+ * QEMU_CLOCK_VIRTUAL timer should go the other way, but rounding
+ * up. Rounding down could cause the timer to fire before the TB
+ * value has been reached.
+ */
+static uint64_t ns_to_tb(uint32_t freq, int64_t clock)
+{
+    return muldiv64(clock, freq, NANOSECONDS_PER_SECOND);
+}
+
+/* virtual clock in TB ticks, not adjusted by TB offset */
+static int64_t tb_to_ns_round_up(uint32_t freq, uint64_t tb)
+{
+    return muldiv64_round_up(tb, NANOSECONDS_PER_SECOND, freq);
+}
+
 uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset)
 {
     /* TB time in tb periods */
-    return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset;
+    return ns_to_tb(tb_env->tb_freq, vmclk) + tb_offset;
 }
 
 uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
@@ -497,7 +522,8 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
         return env->spr[SPR_TBL];
     }
 
-    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                        tb_env->tb_offset);
     trace_ppc_tb_load(tb);
 
     return tb;
@@ -508,7 +534,8 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env)
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t tb;
 
-    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                        tb_env->tb_offset);
     trace_ppc_tb_load(tb);
 
     return tb >> 32;
@@ -526,8 +553,7 @@ uint32_t cpu_ppc_load_tbu (CPUPPCState *env)
 static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
                                     int64_t *tb_offsetp, uint64_t value)
 {
-    *tb_offsetp = value -
-        muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
+    *tb_offsetp = value - ns_to_tb(tb_env->tb_freq, vmclk);
 
     trace_ppc_tb_store(value, *tb_offsetp);
 }
@@ -565,7 +591,8 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env)
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t tb;
 
-    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                        tb_env->atb_offset);
     trace_ppc_tb_load(tb);
 
     return tb;
@@ -576,7 +603,8 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env)
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t tb;
 
-    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                        tb_env->atb_offset);
     trace_ppc_tb_load(tb);
 
     return tb >> 32;
@@ -683,64 +711,77 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env)
     return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
 }
 
-static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+static inline int64_t __cpu_ppc_load_decr(CPUPPCState *env, int64_t now,
+                                          uint64_t next)
 {
     ppc_tb_t *tb_env = env->tb_env;
-    int64_t decr, diff;
+    uint64_t n;
+    int64_t decr;
 
-    diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    if (diff >= 0) {
-        decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
-    } else if (tb_env->flags & PPC_TIMER_BOOKE) {
+    n = ns_to_tb(tb_env->decr_freq, now);
+    if (next > n && tb_env->flags & PPC_TIMER_BOOKE) {
         decr = 0;
-    }  else {
-        decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+    } else {
+        decr = next - n;
     }
+
     trace_ppc_decr_load(decr);
 
     return decr;
 }
 
-target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+static target_ulong _cpu_ppc_load_decr(CPUPPCState *env, int64_t now)
 {
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t decr;
 
-    if (kvm_enabled()) {
-        return env->spr[SPR_DECR];
-    }
-
-    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
+    decr = __cpu_ppc_load_decr(env, now, tb_env->decr_next);
 
     /*
-     * If large decrementer is enabled then the decrementer is signed extened
+     * If large decrementer is enabled then the decrementer is signed extended
      * to 64 bits, otherwise it is a 32 bit value.
      */
     if (env->spr[SPR_LPCR] & LPCR_LD) {
-        return decr;
+        PowerPCCPU *cpu = env_archcpu(env);
+        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+        return sextract64(decr, 0, pcc->lrg_decr_bits);
     }
     return (uint32_t) decr;
 }
 
-target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+{
+    if (kvm_enabled()) {
+        return env->spr[SPR_DECR];
+    } else {
+        return _cpu_ppc_load_decr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    }
+}
+
+static target_ulong _cpu_ppc_load_hdecr(CPUPPCState *env, int64_t now)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t hdecr;
 
-    hdecr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+    hdecr =  __cpu_ppc_load_decr(env, now, tb_env->hdecr_next);
 
     /*
      * If we have a large decrementer (POWER9 or later) then hdecr is sign
      * extended to 64 bits, otherwise it is 32 bits.
      */
     if (pcc->lrg_decr_bits > 32) {
-        return hdecr;
+        return sextract64(hdecr, 0, pcc->lrg_decr_bits);
     }
     return (uint32_t) hdecr;
 }
 
+target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+{
+    return _cpu_ppc_load_hdecr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+}
+
 uint64_t cpu_ppc_load_purr (CPUPPCState *env)
 {
     ppc_tb_t *tb_env = env->tb_env;
@@ -785,7 +826,7 @@ static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu)
     ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
 }
 
-static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+static void __cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, uint64_t *nextp,
                                  QEMUTimer *timer,
                                  void (*raise_excp)(void *),
                                  void (*lower_excp)(PowerPCCPU *),
@@ -794,7 +835,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
 {
     CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env = env->tb_env;
-    uint64_t now, next;
+    uint64_t next;
     int64_t signed_value;
     int64_t signed_decr;
 
@@ -806,10 +847,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
 
     trace_ppc_decr_store(nr_bits, decr, value);
 
-    if (kvm_enabled()) {
-        /* KVM handles decrementer exceptions, we don't need our own timer */
-        return;
-    }
+    /*
+     * Calculate the next decrementer event and set a timer.
+     * decr_next is in timebase units to keep rounding simple. Note it is
+     * not adjusted by tb_offset because if TB changes via tb_offset changing,
+     * decrementer does not change, so not directly comparable with TB.
+     */
+    next = ns_to_tb(tb_env->decr_freq, now) + value;
+    *nextp = next; /* nextp is in timebase units */
 
     /*
      * Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt.
@@ -832,21 +877,17 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
         (*lower_excp)(cpu);
     }
 
-    /* Calculate the next timer event */
-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
-    *nextp = next;
-
     /* Adjust timer */
-    timer_mod(timer, next);
+    timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next));
 }
 
-static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
-                                       target_ulong value, int nr_bits)
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now,
+                                       target_ulong decr, target_ulong value,
+                                       int nr_bits)
 {
     ppc_tb_t *tb_env = cpu->env.tb_env;
 
-    __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
+    __cpu_ppc_store_decr(cpu, now, &tb_env->decr_next, tb_env->decr_timer,
                          tb_env->decr_timer->cb, &cpu_ppc_decr_lower,
                          tb_env->flags, decr, value, nr_bits);
 }
@@ -855,13 +896,22 @@ void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    int64_t now;
+    target_ulong decr;
     int nr_bits = 32;
 
+    if (kvm_enabled()) {
+        /* KVM handles decrementer exceptions, we don't need our own timer */
+        return;
+    }
+
     if (env->spr[SPR_LPCR] & LPCR_LD) {
         nr_bits = pcc->lrg_decr_bits;
     }
 
-    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    decr = _cpu_ppc_load_decr(env, now);
+    _cpu_ppc_store_decr(cpu, now, decr, value, nr_bits);
 }
 
 static void cpu_ppc_decr_cb(void *opaque)
@@ -871,14 +921,15 @@ static void cpu_ppc_decr_cb(void *opaque)
     cpu_ppc_decr_excp(cpu);
 }
 
-static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
-                                        target_ulong value, int nr_bits)
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, int64_t now,
+                                        target_ulong hdecr, target_ulong value,
+                                        int nr_bits)
 {
     ppc_tb_t *tb_env = cpu->env.tb_env;
 
     if (tb_env->hdecr_timer != NULL) {
         /* HDECR (Book3S 64bit) is edge-based, not level like DECR */
-        __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
+        __cpu_ppc_store_decr(cpu, now, &tb_env->hdecr_next, tb_env->hdecr_timer,
                              tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
                              PPC_DECR_UNDERFLOW_TRIGGERED,
                              hdecr, value, nr_bits);
@@ -889,9 +940,12 @@ void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    int64_t now;
+    target_ulong hdecr;
 
-    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
-                         pcc->lrg_decr_bits);
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    hdecr = _cpu_ppc_load_hdecr(env, now);
+    _cpu_ppc_store_hdecr(cpu, now, hdecr, value, pcc->lrg_decr_bits);
 }
 
 static void cpu_ppc_hdecr_cb(void *opaque)
@@ -901,29 +955,16 @@ static void cpu_ppc_hdecr_cb(void *opaque)
     cpu_ppc_hdecr_excp(cpu);
 }
 
-void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
+static void _cpu_ppc_store_purr(CPUPPCState *env, int64_t now, uint64_t value)
 {
     ppc_tb_t *tb_env = env->tb_env;
 
-    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-                     &tb_env->purr_offset, value);
+    cpu_ppc_store_tb(tb_env, now, &tb_env->purr_offset, value);
 }
 
-static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
+void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
 {
-    CPUPPCState *env = opaque;
-    PowerPCCPU *cpu = env_archcpu(env);
-    ppc_tb_t *tb_env = env->tb_env;
-
-    tb_env->tb_freq = freq;
-    tb_env->decr_freq = freq;
-    /* There is a bug in Linux 2.4 kernels:
-     * if a decrementer exception is pending when it enables msr_ee at startup,
-     * it's not ready to handle it...
-     */
-    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
-    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
-    cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+    _cpu_ppc_store_purr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), value);
 }
 
 static void timebase_save(PPCTimebase *tb)
@@ -936,8 +977,14 @@ static void timebase_save(PPCTimebase *tb)
         return;
     }
 
-    /* not used anymore, we keep it for compatibility */
-    tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+    if (replay_mode == REPLAY_MODE_NONE) {
+        /* not used anymore, we keep it for compatibility */
+        tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+    } else {
+        /* simpler for record-replay to avoid this event, compat not needed */
+        tb->time_of_the_day_ns = 0;
+    }
+
     /*
      * tb_offset is only expected to be changed by QEMU so
      * there is no need to update it from KVM here
@@ -1027,7 +1074,7 @@ const VMStateDescription vmstate_ppc_timebase = {
 };
 
 /* Set up (once) timebase frequency (in Hz) */
-clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
+void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     ppc_tb_t *tb_env;
@@ -1040,16 +1087,41 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
         tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL;
     }
     /* Create new timer */
-    tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu);
+    tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                      &cpu_ppc_decr_cb, cpu);
     if (env->has_hv_mode && !cpu->vhyp) {
-        tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb,
-                                                cpu);
+        tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           &cpu_ppc_hdecr_cb, cpu);
     } else {
         tb_env->hdecr_timer = NULL;
     }
-    cpu_ppc_set_tb_clk(env, freq);
 
-    return &cpu_ppc_set_tb_clk;
+    tb_env->tb_freq = freq;
+    tb_env->decr_freq = freq;
+}
+
+void cpu_ppc_tb_reset(CPUPPCState *env)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc_tb_t *tb_env = env->tb_env;
+
+    timer_del(tb_env->decr_timer);
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0);
+    tb_env->decr_next = 0;
+    if (tb_env->hdecr_timer != NULL) {
+        timer_del(tb_env->hdecr_timer);
+        ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+        tb_env->hdecr_next = 0;
+    }
+
+    /*
+     * There is a bug in Linux 2.4 kernels:
+     * if a decrementer exception is pending when it enables msr_ee at startup,
+     * it's not ready to handle it...
+     */
+    cpu_ppc_store_decr(env, -1);
+    cpu_ppc_store_hdecr(env, -1);
+    cpu_ppc_store_purr(env, 0x0000000000000000ULL);
 }
 
 void cpu_ppc_tb_free(CPUPPCState *env)
@@ -1125,9 +1197,7 @@ static void cpu_4xx_fit_cb (void *opaque)
         /* Cannot occur, but makes gcc happy */
         return;
     }
-    next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
-    if (next == now)
-        next++;
+    next = now + tb_to_ns_round_up(tb_env->tb_freq, next);
     timer_mod(ppc40x_timer->fit_timer, next);
     env->spr[SPR_40x_TSR] |= 1 << 26;
     if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
@@ -1153,14 +1223,15 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
     } else {
         trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
         now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        next = now + muldiv64(ppc40x_timer->pit_reload,
-                              NANOSECONDS_PER_SECOND, tb_env->decr_freq);
-        if (is_excp)
-            next += tb_env->decr_next - now;
-        if (next == now)
-            next++;
+
+        if (is_excp) {
+            tb_env->decr_next += ppc40x_timer->pit_reload;
+        } else {
+            tb_env->decr_next = ns_to_tb(tb_env->decr_freq, now)
+                                + ppc40x_timer->pit_reload;
+        }
+        next = tb_to_ns_round_up(tb_env->decr_freq, tb_env->decr_next);
         timer_mod(tb_env->decr_timer, next);
-        tb_env->decr_next = next;
     }
 }
 
@@ -1213,9 +1284,7 @@ static void cpu_4xx_wdt_cb (void *opaque)
         /* Cannot occur, but makes gcc happy */
         return;
     }
-    next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
-    if (next == now)
-        next++;
+    next = now + tb_to_ns_round_up(tb_env->decr_freq, next);
     trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
     switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
     case 0x0:
@@ -1465,5 +1534,7 @@ void ppc_irq_reset(PowerPCCPU *cpu)
     CPUPPCState *env = &cpu->env;
 
     env->irq_input_state = 0;
-    kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+    if (kvm_enabled()) {
+        kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+    }
 }
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index d9231c7317..f6fd35fcb9 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -67,6 +67,7 @@ static void ppc_prep_reset(void *opaque)
     PowerPCCPU *cpu = opaque;
 
     cpu_reset(CPU(cpu));
+    cpu_ppc_tb_reset(&cpu->env);
 }
 
 
diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c
index 5a56f155f5..c96cefb13d 100644
--- a/hw/ppc/prep_systemio.c
+++ b/hw/ppc/prep_systemio.c
@@ -39,7 +39,7 @@
 #define TYPE_PREP_SYSTEMIO "prep-systemio"
 OBJECT_DECLARE_SIMPLE_TYPE(PrepSystemIoState, PREP_SYSTEMIO)
 
-/* Bit as defined in PowerPC Reference Plaform v1.1, sect. 6.1.5, p. 132 */
+/* Bit as defined in PowerPC Reference Platform v1.1, sect. 6.1.5, p. 132 */
 #define PREP_BIT(n) (1 << (7 - (n)))
 
 struct PrepSystemIoState {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 07e91e3800..1f1aa2a6d4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1022,7 +1022,6 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
 {
     MachineState *machine = MACHINE(spapr);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
-    uint8_t rng_seed[32];
     int chosen;
 
     _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
@@ -1100,8 +1099,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
         spapr_dt_ov5_platform_support(spapr, fdt, chosen);
     }
 
-    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
-    _FDT(fdt_setprop(fdt, chosen, "rng-seed", rng_seed, sizeof(rng_seed)));
+    _FDT(fdt_setprop(fdt, chosen, "rng-seed", spapr->fdt_rng_seed, 32));
 
     _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));
 }
@@ -1322,6 +1320,22 @@ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask)
     }
 }
 
+/* May be used when the machine is not running */
+void spapr_init_all_lpcrs(target_ulong value, target_ulong mask)
+{
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+        target_ulong lpcr;
+
+        lpcr = env->spr[SPR_LPCR];
+        lpcr &= ~(LPCR_HR | LPCR_UPRT);
+        ppc_store_lpcr(cpu, lpcr);
+    }
+}
+
+
 static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu,
                            target_ulong lpid, ppc_v3_pate_t *entry)
 {
@@ -1583,7 +1597,7 @@ int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
     }
     /* We're setting up a hash table, so that means we're not radix */
     spapr->patb_entry = 0;
-    spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
+    spapr_init_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
     return 0;
 }
 
@@ -1638,6 +1652,14 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
     void *fdt;
     int rc;
 
+    if (reason != SHUTDOWN_CAUSE_SNAPSHOT_LOAD) {
+        /*
+         * Record-replay snapshot load must not consume random, this was
+         * already replayed from initial machine reset.
+         */
+        qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
+    }
+
     pef_kvm_reset(machine->cgs, &error_fatal);
     spapr_caps_apply(spapr);
 
@@ -1661,7 +1683,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
     spapr_ovec_cleanup(spapr->ov5_cas);
     spapr->ov5_cas = spapr_ovec_new();
 
-    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
+    ppc_init_compat_all(spapr->max_compat_pvr, &error_fatal);
 
     /*
      * This is fixing some of the default configuration of the XIVE
@@ -2551,7 +2573,7 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
         return;
     }
 
-    /* Detemine the VSMT mode to use: */
+    /* Determine the VSMT mode to use: */
     if (vsmt_user) {
         if (spapr->vsmt < smp_threads) {
             error_setg(errp, "Cannot support VSMT mode %d"
@@ -2853,8 +2875,6 @@ static void spapr_machine_init(MachineState *machine)
     /* init CPUs */
     spapr_init_cpus(spapr);
 
-    spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
-
     /* Init numa_assoc_array */
     spapr_numa_associativity_init(spapr, machine);
 
@@ -3087,7 +3107,7 @@ static int spapr_kvm_type(MachineState *machine, const char *vm_type)
 {
     /*
      * The use of g_ascii_strcasecmp() for 'hv' and 'pr' is to
-     * accomodate the 'HV' and 'PV' formats that exists in the
+     * accommodate the 'HV' and 'PV' formats that exists in the
      * wild. The 'auto' mode is being introduced already as
      * lower-case, thus we don't need to bother checking for
      * "AUTO".
@@ -4112,7 +4132,6 @@ static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                            &sphb->buid, &sphb->io_win_addr,
                            &sphb->mem_win_addr, &sphb->mem64_win_addr,
                            windows_supported, sphb->dma_liobn,
-                           &sphb->nv2_gpa_win_addr, &sphb->nv2_atsd_win_addr,
                            errp);
 }
 
@@ -4321,7 +4340,7 @@ spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
     CPUArchId *core_slot;
     MachineClass *mc = MACHINE_GET_CLASS(machine);
 
-    /* make sure possible_cpu are intialized */
+    /* make sure possible_cpu are initialized */
     mc->possible_cpu_arch_ids(machine);
     /* get CPU core slot containing thread that matches cpu_index */
     core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
@@ -4375,8 +4394,7 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
 static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
                                 uint64_t *buid, hwaddr *pio,
                                 hwaddr *mmio32, hwaddr *mmio64,
-                                unsigned n_dma, uint32_t *liobns,
-                                hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+                                unsigned n_dma, uint32_t *liobns, Error **errp)
 {
     /*
      * New-style PHB window placement.
@@ -4421,9 +4439,6 @@ static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
     *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
     *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
     *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
-
-    *nv2gpa = SPAPR_PCI_NV2RAM64_WIN_BASE + index * SPAPR_PCI_NV2RAM64_WIN_SIZE;
-    *nv2atsd = SPAPR_PCI_NV2ATSD_WIN_BASE + index * SPAPR_PCI_NV2ATSD_WIN_SIZE;
     return true;
 }
 
@@ -4941,16 +4956,12 @@ DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
 static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
                               uint64_t *buid, hwaddr *pio,
                               hwaddr *mmio32, hwaddr *mmio64,
-                              unsigned n_dma, uint32_t *liobns,
-                              hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+                              unsigned n_dma, uint32_t *liobns, Error **errp)
 {
     if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma,
-                             liobns, nv2gpa, nv2atsd, errp)) {
+                             liobns, errp)) {
         return false;
     }
-
-    *nv2gpa = 0;
-    *nv2atsd = 0;
     return true;
 }
 static void spapr_machine_4_0_class_options(MachineClass *mc)
@@ -5023,7 +5034,7 @@ static void spapr_machine_2_12_class_options(MachineClass *mc)
 
     /* We depend on kvm_enabled() to choose a default value for the
      * hpt-max-page-size capability. Of course we can't do it here
-     * because this is too early and the HW accelerator isn't initialzed
+     * because this is too early and the HW accelerator isn't initialized
      * yet. Postpone this to machine init (see default_caps_with_cpu()).
      */
     smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
@@ -5115,8 +5126,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
 static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
                               uint64_t *buid, hwaddr *pio,
                               hwaddr *mmio32, hwaddr *mmio64,
-                              unsigned n_dma, uint32_t *liobns,
-                              hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+                              unsigned n_dma, uint32_t *liobns, Error **errp)
 {
     /* Legacy PHB placement for pseries-2.7 and earlier machine types */
     const uint64_t base_buid = 0x800000020000000ULL;
@@ -5161,8 +5171,6 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
      * window into contiguous 32-bit and 64-bit windows
      */
 
-    *nv2gpa = 0;
-    *nv2atsd = 0;
     return true;
 }
 
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index b482d9754a..91fae56573 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -74,6 +74,8 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
 
     kvm_check_mmu(cpu, &error_fatal);
 
+    cpu_ppc_tb_reset(env);
+
     spapr_irq_cpu_intc_reset(spapr, cpu);
 }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 9b1f225d4a..522a2396c7 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -3,6 +3,7 @@
 #include "qapi/error.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/runstate.h"
+#include "sysemu/tcg.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
@@ -789,6 +790,54 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong h_set_mode_resource_set_ciabr(PowerPCCPU *cpu,
+                                                  SpaprMachineState *spapr,
+                                                  target_ulong mflags,
+                                                  target_ulong value1,
+                                                  target_ulong value2)
+{
+    CPUPPCState *env = &cpu->env;
+
+    assert(tcg_enabled()); /* KVM will have handled this */
+
+    if (mflags) {
+        return H_UNSUPPORTED_FLAG;
+    }
+    if (value2) {
+        return H_P4;
+    }
+    if ((value1 & PPC_BITMASK(62, 63)) == 0x3) {
+        return H_P3;
+    }
+
+    ppc_store_ciabr(env, value1);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu,
+                                                  SpaprMachineState *spapr,
+                                                  target_ulong mflags,
+                                                  target_ulong value1,
+                                                  target_ulong value2)
+{
+    CPUPPCState *env = &cpu->env;
+
+    assert(tcg_enabled()); /* KVM will have handled this */
+
+    if (mflags) {
+        return H_UNSUPPORTED_FLAG;
+    }
+    if (value2 & PPC_BIT(61)) {
+        return H_P4;
+    }
+
+    ppc_store_dawr0(env, value1);
+    ppc_store_dawrx0(env, value2);
+
+    return H_SUCCESS;
+}
+
 static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
                                            SpaprMachineState *spapr,
                                            target_ulong mflags,
@@ -858,6 +907,14 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr,
     target_ulong ret = H_P2;
 
     switch (resource) {
+    case H_SET_MODE_RESOURCE_SET_CIABR:
+        ret = h_set_mode_resource_set_ciabr(cpu, spapr, args[0], args[2],
+                                            args[3]);
+        break;
+    case H_SET_MODE_RESOURCE_SET_DAWR0:
+        ret = h_set_mode_resource_set_dawr0(cpu, spapr, args[0], args[2],
+                                            args[3]);
+        break;
     case H_SET_MODE_RESOURCE_LE:
         ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]);
         break;
@@ -1558,7 +1615,7 @@ static void hypercall_register_types(void)
     spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS,
                              h_get_cpu_characteristics);
 
-    /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
+    /* "debugger" hcalls (also used by SLOF). Note: We do -not- differentiate
      * here between the "CI" and the "CACHE" variants, they will use whatever
      * mapping attributes qemu is using. When using KVM, the kernel will
      * enforce the attributes more strongly
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 63e34d457a..5e3973fc5f 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -248,7 +248,7 @@ static int spapr_tce_table_post_load(void *opaque, int version_id)
         memcpy(tcet->table, tcet->mig_table,
                tcet->nb_table * sizeof(tcet->table[0]));
 
-        free(tcet->mig_table);
+        g_free(tcet->mig_table);
         tcet->mig_table = NULL;
     }
 
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index a64098c375..ea6762d3d2 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -108,20 +108,6 @@ static bool spapr_numa_is_symmetrical(MachineState *ms)
     return true;
 }
 
-/*
- * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
- * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
- * called from vPHB reset handler so we initialize the counter here.
- * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
- * must be equally distant from any other node.
- * The final value of spapr->gpu_numa_id is going to be written to
- * max-associativity-domains in spapr_build_fdt().
- */
-unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine)
-{
-    return MAX(1, machine->numa_state->num_nodes);
-}
-
 /*
  * This function will translate the user distances into
  * what the kernel understand as possible values: 10
@@ -277,7 +263,7 @@ static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 {
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
     int nb_numa_nodes = machine->numa_state->num_nodes;
-    int i, j, max_nodes_with_gpus;
+    int i, j;
 
     /*
      * For all associativity arrays: first position is the size,
@@ -293,17 +279,7 @@ static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
         spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
     }
 
-    /*
-     * Initialize NVLink GPU associativity arrays. We know that
-     * the first GPU will take the first available NUMA id, and
-     * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
-     * At this point we're not sure if there are GPUs or not, but
-     * let's initialize the associativity arrays and allow NVLink
-     * GPUs to be handled like regular NUMA nodes later on.
-     */
-    max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
-
-    for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
+    for (i = nb_numa_nodes; i < nb_numa_nodes; i++) {
         spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
 
         for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
@@ -345,10 +321,6 @@ static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
      * CPUs will write an additional 'vcpu_id' on top of the arrays
      * being initialized here. 'numa_id' is represented by the
      * index 'i' of the loop.
-     *
-     * Given that this initialization is also valid for GPU associativity
-     * arrays, handle everything in one single step by populating the
-     * arrays up to NUMA_NODES_MAX_NUM.
      */
     for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
         spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
@@ -461,8 +433,6 @@ static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
 {
     MachineState *ms = MACHINE(spapr);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-    uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
-                                   spapr_numa_initial_nvgpu_numa_id(ms);
     uint32_t refpoints[] = {
         cpu_to_be32(0x4),
         cpu_to_be32(0x3),
@@ -470,7 +440,7 @@ static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
         cpu_to_be32(0x1),
     };
     uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
-    uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+    uint32_t maxdomain = ms->numa_state->num_nodes;
     uint32_t maxdomains[] = {
         cpu_to_be32(4),
         cpu_to_be32(maxdomain),
@@ -486,13 +456,12 @@ static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
             cpu_to_be32(0x4),
             cpu_to_be32(0x2),
         };
-        uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
         uint32_t legacy_maxdomains[] = {
             cpu_to_be32(4),
-            cpu_to_be32(legacy_maxdomain),
-            cpu_to_be32(legacy_maxdomain),
-            cpu_to_be32(legacy_maxdomain),
-            cpu_to_be32(spapr->gpu_numa_id),
+            cpu_to_be32(0),
+            cpu_to_be32(0),
+            cpu_to_be32(0),
+            cpu_to_be32(maxdomain ? maxdomain : 1),
         };
 
         G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
@@ -581,8 +550,6 @@ static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
                                            void *fdt, int rtas)
 {
     MachineState *ms = MACHINE(spapr);
-    uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
-                                   spapr_numa_initial_nvgpu_numa_id(ms);
 
     /*
      * In FORM2, ibm,associativity-reference-points will point to
@@ -596,7 +563,7 @@ static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
      */
     uint32_t refpoints[] = { cpu_to_be32(1) };
 
-    uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+    uint32_t maxdomain = ms->numa_state->num_nodes;
     uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
 
     _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index a8688243a6..b2f009c816 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -320,7 +320,8 @@ static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
 
     nvdimm = NVDIMM(drc->dev);
     if ((offset + len < offset) ||
-        (nvdimm->label_size < len + offset)) {
+        (nvdimm->label_size < len + offset) ||
+        nvdimm->readonly) {
         return H_P2;
     }
 
@@ -377,7 +378,7 @@ static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
 
     /*
      * Currently continue token should be zero qemu has already bound
-     * everything and this hcall doesnt return H_BUSY.
+     * everything and this hcall doesn't return H_BUSY.
      */
     if (continue_token > 0) {
         return H_P5;
@@ -588,7 +589,7 @@ void spapr_nvdimm_finish_flushes(void)
      * Called on reset path, the main loop thread which calls
      * the pending BHs has gotten out running in the reset path,
      * finally reaching here. Other code path being guest
-     * h_client_architecture_support, thats early boot up.
+     * h_client_architecture_support, that's early boot up.
      */
     nvdimms = nvdimm_get_device_list();
     for (list = nvdimms; list; list = list->next) {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 75aacda65a..ce14959317 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1443,8 +1443,6 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
         _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
     }
 
-    spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
-
     if (!IS_PCI_BRIDGE(dev)) {
         /* Properties only for non-bridges */
         uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
@@ -1757,8 +1755,6 @@ static void spapr_phb_unrealize(DeviceState *dev)
     int i;
     const unsigned windows_supported = spapr_phb_windows_supported(sphb);
 
-    spapr_phb_nvgpu_free(sphb);
-
     if (sphb->msi) {
         g_hash_table_unref(sphb->msi);
         sphb->msi = NULL;
@@ -2069,14 +2065,8 @@ void spapr_phb_dma_reset(SpaprPhbState *sphb)
 static void spapr_phb_reset(DeviceState *qdev)
 {
     SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
-    Error *err = NULL;
 
     spapr_phb_dma_reset(sphb);
-    spapr_phb_nvgpu_free(sphb);
-    spapr_phb_nvgpu_setup(sphb, &err);
-    if (err) {
-        error_report_err(err);
-    }
 
     /* Reset the IOMMU state */
     object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
@@ -2112,8 +2102,6 @@ static Property spapr_phb_properties[] = {
                      pre_2_8_migration, false),
     DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
                      pcie_ecs, true),
-    DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
-    DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
     DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
                      pre_5_1_assoc, false),
     DEFINE_PROP_END_OF_LIST(),
@@ -2362,7 +2350,6 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
     };
     SpaprTceTable *tcet;
     SpaprDrc *drc;
-    Error *err = NULL;
 
     /* Start populating the FDT */
     _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
@@ -2443,12 +2430,6 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
         return ret;
     }
 
-    spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &err);
-    if (err) {
-        error_report_err(err);
-    }
-    spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
-
     return 0;
 }
 
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
deleted file mode 100644
index 2a8a11be1d..0000000000
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * QEMU sPAPR PCI for NVLink2 pass through
- *
- * Copyright (c) 2019 Alexey Kardashevskiy, IBM Corporation.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "hw/pci/pci.h"
-#include "hw/pci-host/spapr.h"
-#include "hw/ppc/spapr_numa.h"
-#include "qemu/error-report.h"
-#include "hw/ppc/fdt.h"
-#include "hw/pci/pci_bridge.h"
-
-#define PHANDLE_PCIDEV(phb, pdev)    (0x12000000 | \
-                                     (((phb)->index) << 16) | ((pdev)->devfn))
-#define PHANDLE_GPURAM(phb, n)       (0x110000FF | ((n) << 8) | \
-                                     (((phb)->index) << 16))
-#define PHANDLE_NVLINK(phb, gn, nn)  (0x00130000 | (((phb)->index) << 8) | \
-                                     ((gn) << 4) | (nn))
-
-typedef struct SpaprPhbPciNvGpuSlot {
-        uint64_t tgt;
-        uint64_t gpa;
-        unsigned numa_id;
-        PCIDevice *gpdev;
-        int linknum;
-        struct {
-            uint64_t atsd_gpa;
-            PCIDevice *npdev;
-            uint32_t link_speed;
-        } links[NVGPU_MAX_LINKS];
-} SpaprPhbPciNvGpuSlot;
-
-struct SpaprPhbPciNvGpuConfig {
-    uint64_t nv2_ram_current;
-    uint64_t nv2_atsd_current;
-    int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */
-    SpaprPhbPciNvGpuSlot slots[NVGPU_MAX_NUM];
-    Error *err;
-};
-
-static SpaprPhbPciNvGpuSlot *
-spapr_nvgpu_get_slot(SpaprPhbPciNvGpuConfig *nvgpus, uint64_t tgt)
-{
-    int i;
-
-    /* Search for partially collected "slot" */
-    for (i = 0; i < nvgpus->num; ++i) {
-        if (nvgpus->slots[i].tgt == tgt) {
-            return &nvgpus->slots[i];
-        }
-    }
-
-    if (nvgpus->num == ARRAY_SIZE(nvgpus->slots)) {
-        return NULL;
-    }
-
-    i = nvgpus->num;
-    nvgpus->slots[i].tgt = tgt;
-    ++nvgpus->num;
-
-    return &nvgpus->slots[i];
-}
-
-static void spapr_pci_collect_nvgpu(SpaprPhbPciNvGpuConfig *nvgpus,
-                                    PCIDevice *pdev, uint64_t tgt,
-                                    MemoryRegion *mr, Error **errp)
-{
-    MachineState *machine = MACHINE(qdev_get_machine());
-    SpaprMachineState *spapr = SPAPR_MACHINE(machine);
-    SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
-
-    if (!nvslot) {
-        error_setg(errp, "Found too many GPUs per vPHB");
-        return;
-    }
-    g_assert(!nvslot->gpdev);
-    nvslot->gpdev = pdev;
-
-    nvslot->gpa = nvgpus->nv2_ram_current;
-    nvgpus->nv2_ram_current += memory_region_size(mr);
-    nvslot->numa_id = spapr->gpu_numa_id;
-    ++spapr->gpu_numa_id;
-}
-
-static void spapr_pci_collect_nvnpu(SpaprPhbPciNvGpuConfig *nvgpus,
-                                    PCIDevice *pdev, uint64_t tgt,
-                                    MemoryRegion *mr, Error **errp)
-{
-    SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
-    int j;
-
-    if (!nvslot) {
-        error_setg(errp, "Found too many NVLink bridges per vPHB");
-        return;
-    }
-
-    j = nvslot->linknum;
-    if (j == ARRAY_SIZE(nvslot->links)) {
-        error_setg(errp, "Found too many NVLink bridges per GPU");
-        return;
-    }
-    ++nvslot->linknum;
-
-    g_assert(!nvslot->links[j].npdev);
-    nvslot->links[j].npdev = pdev;
-    nvslot->links[j].atsd_gpa = nvgpus->nv2_atsd_current;
-    nvgpus->nv2_atsd_current += memory_region_size(mr);
-    nvslot->links[j].link_speed =
-        object_property_get_uint(OBJECT(pdev), "nvlink2-link-speed", NULL);
-}
-
-static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev,
-                                        void *opaque)
-{
-    PCIBus *sec_bus;
-    Object *po = OBJECT(pdev);
-    uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);
-
-    if (tgt) {
-        Error *local_err = NULL;
-        SpaprPhbPciNvGpuConfig *nvgpus = opaque;
-        Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL);
-        Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]",
-                                                  NULL);
-
-        g_assert(mr_gpu || mr_npu);
-        if (mr_gpu) {
-            spapr_pci_collect_nvgpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_gpu),
-                                    &local_err);
-        } else {
-            spapr_pci_collect_nvnpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_npu),
-                                    &local_err);
-        }
-        error_propagate(&nvgpus->err, local_err);
-    }
-    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
-         PCI_HEADER_TYPE_BRIDGE)) {
-        return;
-    }
-
-    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
-    if (!sec_bus) {
-        return;
-    }
-
-    pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque);
-}
-
-void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
-{
-    int i, j, valid_gpu_num;
-    PCIBus *bus;
-
-    /* Search for GPUs and NPUs */
-    if (!sphb->nv2_gpa_win_addr || !sphb->nv2_atsd_win_addr) {
-        return;
-    }
-
-    sphb->nvgpus = g_new0(SpaprPhbPciNvGpuConfig, 1);
-    sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr;
-    sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr;
-
-    bus = PCI_HOST_BRIDGE(sphb)->bus;
-    pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu,
-                                  sphb->nvgpus);
-
-    if (sphb->nvgpus->err) {
-        error_propagate(errp, sphb->nvgpus->err);
-        sphb->nvgpus->err = NULL;
-        goto cleanup_exit;
-    }
-
-    /* Add found GPU RAM and ATSD MRs if found */
-    for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) {
-        Object *nvmrobj;
-        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
-
-        if (!nvslot->gpdev) {
-            continue;
-        }
-        nvmrobj = object_property_get_link(OBJECT(nvslot->gpdev),
-                                           "nvlink2-mr[0]", NULL);
-        /* ATSD is pointless without GPU RAM MR so skip those */
-        if (!nvmrobj) {
-            continue;
-        }
-
-        ++valid_gpu_num;
-        memory_region_add_subregion(get_system_memory(), nvslot->gpa,
-                                    MEMORY_REGION(nvmrobj));
-
-        for (j = 0; j < nvslot->linknum; ++j) {
-            Object *atsdmrobj;
-
-            atsdmrobj = object_property_get_link(OBJECT(nvslot->links[j].npdev),
-                                                 "nvlink2-atsd-mr[0]", NULL);
-            if (!atsdmrobj) {
-                continue;
-            }
-            memory_region_add_subregion(get_system_memory(),
-                                        nvslot->links[j].atsd_gpa,
-                                        MEMORY_REGION(atsdmrobj));
-        }
-    }
-
-    if (valid_gpu_num) {
-        return;
-    }
-    /* We did not find any interesting GPU */
-cleanup_exit:
-    g_free(sphb->nvgpus);
-    sphb->nvgpus = NULL;
-}
-
-void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
-{
-    int i, j;
-
-    if (!sphb->nvgpus) {
-        return;
-    }
-
-    for (i = 0; i < sphb->nvgpus->num; ++i) {
-        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
-        Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
-                                                    "nvlink2-mr[0]", NULL);
-
-        if (nv_mrobj) {
-            memory_region_del_subregion(get_system_memory(),
-                                        MEMORY_REGION(nv_mrobj));
-        }
-        for (j = 0; j < nvslot->linknum; ++j) {
-            PCIDevice *npdev = nvslot->links[j].npdev;
-            Object *atsd_mrobj;
-            atsd_mrobj = object_property_get_link(OBJECT(npdev),
-                                                  "nvlink2-atsd-mr[0]", NULL);
-            if (atsd_mrobj) {
-                memory_region_del_subregion(get_system_memory(),
-                                            MEMORY_REGION(atsd_mrobj));
-            }
-        }
-    }
-    g_free(sphb->nvgpus);
-    sphb->nvgpus = NULL;
-}
-
-void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
-                                 Error **errp)
-{
-    int i, j, atsdnum = 0;
-    uint64_t atsd[8]; /* The existing limitation of known guests */
-
-    if (!sphb->nvgpus) {
-        return;
-    }
-
-    for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) {
-        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
-
-        if (!nvslot->gpdev) {
-            continue;
-        }
-        for (j = 0; j < nvslot->linknum; ++j) {
-            if (!nvslot->links[j].atsd_gpa) {
-                continue;
-            }
-
-            if (atsdnum == ARRAY_SIZE(atsd)) {
-                error_report("Only %"PRIuPTR" ATSD registers supported",
-                             ARRAY_SIZE(atsd));
-                break;
-            }
-            atsd[atsdnum] = cpu_to_be64(nvslot->links[j].atsd_gpa);
-            ++atsdnum;
-        }
-    }
-
-    if (!atsdnum) {
-        error_setg(errp, "No ATSD registers found");
-        return;
-    }
-
-    if (!spapr_phb_eeh_available(sphb)) {
-        /*
-         * ibm,mmio-atsd contains ATSD registers; these belong to an NPU PHB
-         * which we do not emulate as a separate device. Instead we put
-         * ibm,mmio-atsd to the vPHB with GPU and make sure that we do not
-         * put GPUs from different IOMMU groups to the same vPHB to ensure
-         * that the guest will use ATSDs from the corresponding NPU.
-         */
-        error_setg(errp, "ATSD requires separate vPHB per GPU IOMMU group");
-        return;
-    }
-
-    _FDT((fdt_setprop(fdt, bus_off, "ibm,mmio-atsd", atsd,
-                      atsdnum * sizeof(atsd[0]))));
-}
-
-void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
-{
-    int i, j, linkidx, npuoff;
-    g_autofree char *npuname = NULL;
-
-    if (!sphb->nvgpus) {
-        return;
-    }
-
-    npuname = g_strdup_printf("npuphb%d", sphb->index);
-    npuoff = fdt_add_subnode(fdt, 0, npuname);
-    _FDT(npuoff);
-    _FDT(fdt_setprop_cell(fdt, npuoff, "#address-cells", 1));
-    _FDT(fdt_setprop_cell(fdt, npuoff, "#size-cells", 0));
-    /* Advertise NPU as POWER9 so the guest can enable NPU2 contexts */
-    _FDT((fdt_setprop_string(fdt, npuoff, "compatible", "ibm,power9-npu")));
-
-    for (i = 0, linkidx = 0; i < sphb->nvgpus->num; ++i) {
-        for (j = 0; j < sphb->nvgpus->slots[i].linknum; ++j) {
-            g_autofree char *linkname = g_strdup_printf("link@%d", linkidx);
-            int off = fdt_add_subnode(fdt, npuoff, linkname);
-
-            _FDT(off);
-            /* _FDT((fdt_setprop_cell(fdt, off, "reg", linkidx))); */
-            _FDT((fdt_setprop_string(fdt, off, "compatible",
-                                     "ibm,npu-link")));
-            _FDT((fdt_setprop_cell(fdt, off, "phandle",
-                                   PHANDLE_NVLINK(sphb, i, j))));
-            _FDT((fdt_setprop_cell(fdt, off, "ibm,npu-link-index", linkidx)));
-            ++linkidx;
-        }
-    }
-
-    /* Add memory nodes for GPU RAM and mark them unusable */
-    for (i = 0; i < sphb->nvgpus->num; ++i) {
-        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
-        Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
-                                                    "nvlink2-mr[0]",
-                                                    &error_abort);
-        uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
-        uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
-        g_autofree char *mem_name = g_strdup_printf("memory@%"PRIx64,
-                                                    nvslot->gpa);
-        int off = fdt_add_subnode(fdt, 0, mem_name);
-
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
-
-        spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()),
-                                          fdt, off, nvslot->numa_id);
-
-        _FDT((fdt_setprop_string(fdt, off, "compatible",
-                                 "ibm,coherent-device-memory")));
-
-        mem_reg[1] = cpu_to_be64(0);
-        _FDT((fdt_setprop(fdt, off, "linux,usable-memory", mem_reg,
-                          sizeof(mem_reg))));
-        _FDT((fdt_setprop_cell(fdt, off, "phandle",
-                               PHANDLE_GPURAM(sphb, i))));
-    }
-
-}
-
-void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
-                                        SpaprPhbState *sphb)
-{
-    int i, j;
-
-    if (!sphb->nvgpus) {
-        return;
-    }
-
-    for (i = 0; i < sphb->nvgpus->num; ++i) {
-        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
-
-        /* Skip "slot" without attached GPU */
-        if (!nvslot->gpdev) {
-            continue;
-        }
-        if (dev == nvslot->gpdev) {
-            g_autofree uint32_t *npus = g_new(uint32_t, nvslot->linknum);
-
-            for (j = 0; j < nvslot->linknum; ++j) {
-                PCIDevice *npdev = nvslot->links[j].npdev;
-
-                npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
-            }
-            _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
-                             j * sizeof(npus[0])));
-            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
-                                   PHANDLE_PCIDEV(sphb, dev))));
-            continue;
-        }
-
-        for (j = 0; j < nvslot->linknum; ++j) {
-            if (dev != nvslot->links[j].npdev) {
-                continue;
-            }
-
-            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
-                                   PHANDLE_PCIDEV(sphb, dev))));
-            _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
-                                  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
-            _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
-                                   PHANDLE_NVLINK(sphb, i, j))));
-            /*
-             * If we ever want to emulate GPU RAM at the same location as on
-             * the host - here is the encoding GPA->TGT:
-             *
-             * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
-             * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
-             * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
-             * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
-             */
-            _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
-                                  PHANDLE_GPURAM(sphb, i)));
-            _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
-                                 nvslot->tgt));
-            _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
-                                  nvslot->links[j].link_speed));
-        }
-    }
-}
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index d8aeee0b7e..9016720547 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -78,7 +78,7 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
          * call. Now we just need to check the validity of the PCI
          * pass-through devices (vfio-pci) under this sphb bus.
          * We have already validated that all the devices under this sphb
-         * are from same iommu group (within same PE) before comming here.
+         * are from same iommu group (within same PE) before coming here.
          *
          * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
          * Rework device EEH PE determination") kernel would call
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
index 18c3f92317..e3b430a81f 100644
--- a/hw/ppc/vof.c
+++ b/hw/ppc/vof.c
@@ -1024,6 +1024,8 @@ void vof_cleanup(Vof *vof)
     }
     vof->claimed = NULL;
     vof->of_instances = NULL;
+    vof->of_instance_last = 0;
+    vof->claimed_base = 0;
 }
 
 void vof_build_dt(void *fdt, Vof *vof)
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index e81bbd12df..b775aa8946 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -659,7 +659,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
     mc->default_ram_id = "microchip.icicle.kit.ram";
 
     /*
-     * Map 513 MiB high memory, the mimimum required high memory size, because
+     * Map 513 MiB high memory, the minimum required high memory size, because
      * HSS will do memory test against the high memory address range regardless
      * of physical memory installed.
      *
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 35a335b8d0..ec76dce6c9 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -674,9 +674,8 @@ static void sifive_u_machine_init(MachineState *machine)
 
     dinfo = drive_get(IF_SD, 0, 0);
     blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
-    card_dev = qdev_new(TYPE_SD_CARD);
+    card_dev = qdev_new(TYPE_SD_CARD_SPI);
     qdev_prop_set_drive_err(card_dev, "drive", blk, &error_fatal);
-    qdev_prop_set_bit(card_dev, "spi", true);
     qdev_realize_and_unref(card_dev,
                            qdev_get_child_bus(sd_dev, "sd-bus"),
                            &error_fatal);
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 99c4e6314b..5edc1d98d2 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -35,6 +35,7 @@
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
 #include "hw/riscv/numa.h"
+#include "kvm_riscv.h"
 #include "hw/intc/riscv_aclint.h"
 #include "hw/intc/riscv_aplic.h"
 #include "hw/intc/riscv_imsic.h"
@@ -66,15 +67,21 @@
 #define VIRT_IMSIC_GROUP_MAX_SIZE      (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
 #if VIRT_IMSIC_GROUP_MAX_SIZE < \
     IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
-#error "Can't accomodate single IMSIC group in address space"
+#error "Can't accommodate single IMSIC group in address space"
 #endif
 
 #define VIRT_IMSIC_MAX_SIZE            (VIRT_SOCKETS_MAX * \
                                         VIRT_IMSIC_GROUP_MAX_SIZE)
 #if 0x4000000 < VIRT_IMSIC_MAX_SIZE
-#error "Can't accomodate all IMSIC groups in address space"
+#error "Can't accommodate all IMSIC groups in address space"
 #endif
 
+/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */
+static bool virt_use_kvm_aia(RISCVVirtState *s)
+{
+    return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
+}
+
 static const MemMapEntry virt_memmap[] = {
     [VIRT_DEBUG] =        {        0x0,         0x100 },
     [VIRT_MROM] =         {     0x1000,        0xf000 },
@@ -516,79 +523,28 @@ static uint32_t imsic_num_bits(uint32_t count)
     return ret;
 }
 
-static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
-                             uint32_t *phandle, uint32_t *intc_phandles,
-                             uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
+static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
+                                 uint32_t *intc_phandles, uint32_t msi_phandle,
+                                 bool m_mode, uint32_t imsic_guest_bits)
 {
     int cpu, socket;
     char *imsic_name;
     MachineState *ms = MACHINE(s);
     int socket_count = riscv_socket_count(ms);
-    uint32_t imsic_max_hart_per_socket, imsic_guest_bits;
+    uint32_t imsic_max_hart_per_socket;
     uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
 
-    *msi_m_phandle = (*phandle)++;
-    *msi_s_phandle = (*phandle)++;
     imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
     imsic_regs = g_new0(uint32_t, socket_count * 4);
 
-    /* M-level IMSIC node */
     for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
         imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
-        imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
+        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
     }
+
     imsic_max_hart_per_socket = 0;
     for (socket = 0; socket < socket_count; socket++) {
-        imsic_addr = memmap[VIRT_IMSIC_M].base +
-                     socket * VIRT_IMSIC_GROUP_MAX_SIZE;
-        imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
-        imsic_regs[socket * 4 + 0] = 0;
-        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
-        imsic_regs[socket * 4 + 2] = 0;
-        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
-        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
-            imsic_max_hart_per_socket = s->soc[socket].num_harts;
-        }
-    }
-    imsic_name = g_strdup_printf("/soc/imsics@%lx",
-        (unsigned long)memmap[VIRT_IMSIC_M].base);
-    qemu_fdt_add_subnode(ms->fdt, imsic_name);
-    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
-        "riscv,imsics");
-    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
-        FDT_IMSIC_INT_CELLS);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
-        NULL, 0);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
-        NULL, 0);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
-        imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
-        socket_count * sizeof(uint32_t) * 4);
-    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
-        VIRT_IRQCHIP_NUM_MSIS);
-    if (socket_count > 1) {
-        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
-            imsic_num_bits(imsic_max_hart_per_socket));
-        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
-            imsic_num_bits(socket_count));
-        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
-            IMSIC_MMIO_GROUP_MIN_SHIFT);
-    }
-    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle);
-
-    g_free(imsic_name);
-
-    /* S-level IMSIC node */
-    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
-        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
-        imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
-    }
-    imsic_guest_bits = imsic_num_bits(s->aia_guests + 1);
-    imsic_max_hart_per_socket = 0;
-    for (socket = 0; socket < socket_count; socket++) {
-        imsic_addr = memmap[VIRT_IMSIC_S].base +
-                     socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+        imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
         imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
                      s->soc[socket].num_harts;
         imsic_regs[socket * 4 + 0] = 0;
@@ -599,119 +555,151 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
             imsic_max_hart_per_socket = s->soc[socket].num_harts;
         }
     }
-    imsic_name = g_strdup_printf("/soc/imsics@%lx",
-        (unsigned long)memmap[VIRT_IMSIC_S].base);
+
+    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
     qemu_fdt_add_subnode(ms->fdt, imsic_name);
-    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
-        "riscv,imsics");
+    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
     qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
-        FDT_IMSIC_INT_CELLS);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
-        NULL, 0);
-    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
-        NULL, 0);
+                          FDT_IMSIC_INT_CELLS);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
     qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
-        imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
+                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
     qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
-        socket_count * sizeof(uint32_t) * 4);
+                     socket_count * sizeof(uint32_t) * 4);
     qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
-        VIRT_IRQCHIP_NUM_MSIS);
+                     VIRT_IRQCHIP_NUM_MSIS);
+
     if (imsic_guest_bits) {
         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
-            imsic_guest_bits);
+                              imsic_guest_bits);
     }
+
     if (socket_count > 1) {
         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
-            imsic_num_bits(imsic_max_hart_per_socket));
+                              imsic_num_bits(imsic_max_hart_per_socket));
         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
-            imsic_num_bits(socket_count));
+                              imsic_num_bits(socket_count));
         qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
-            IMSIC_MMIO_GROUP_MIN_SHIFT);
+                              IMSIC_MMIO_GROUP_MIN_SHIFT);
     }
-    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle);
-    g_free(imsic_name);
+    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
 
+    g_free(imsic_name);
     g_free(imsic_regs);
     g_free(imsic_cells);
 }
 
+static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
+                             uint32_t *phandle, uint32_t *intc_phandles,
+                             uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
+{
+    *msi_m_phandle = (*phandle)++;
+    *msi_s_phandle = (*phandle)++;
+
+    if (!kvm_enabled()) {
+        /* M-level IMSIC node */
+        create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles,
+                             *msi_m_phandle, true, 0);
+    }
+
+    /* S-level IMSIC node */
+    create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles,
+                         *msi_s_phandle, false,
+                         imsic_num_bits(s->aia_guests + 1));
+
+}
+
+static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
+                                 unsigned long aplic_addr, uint32_t aplic_size,
+                                 uint32_t msi_phandle,
+                                 uint32_t *intc_phandles,
+                                 uint32_t aplic_phandle,
+                                 uint32_t aplic_child_phandle,
+                                 bool m_mode, int num_harts)
+{
+    int cpu;
+    char *aplic_name;
+    uint32_t *aplic_cells;
+    MachineState *ms = MACHINE(s);
+
+    aplic_cells = g_new0(uint32_t, num_harts * 2);
+
+    for (cpu = 0; cpu < num_harts; cpu++) {
+        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
+    }
+
+    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
+    qemu_fdt_add_subnode(ms->fdt, aplic_name);
+    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
+                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
+    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
+
+    if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
+        qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
+                         aplic_cells, num_harts * sizeof(uint32_t) * 2);
+    } else {
+        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
+    }
+
+    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
+                           0x0, aplic_addr, 0x0, aplic_size);
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
+                          VIRT_IRQCHIP_NUM_SOURCES);
+
+    if (aplic_child_phandle) {
+        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
+                              aplic_child_phandle);
+        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
+                               aplic_child_phandle, 0x1,
+                               VIRT_IRQCHIP_NUM_SOURCES);
+    }
+
+    riscv_socket_fdt_write_id(ms, aplic_name, socket);
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
+
+    g_free(aplic_name);
+    g_free(aplic_cells);
+}
+
 static void create_fdt_socket_aplic(RISCVVirtState *s,
                                     const MemMapEntry *memmap, int socket,
                                     uint32_t msi_m_phandle,
                                     uint32_t msi_s_phandle,
                                     uint32_t *phandle,
                                     uint32_t *intc_phandles,
-                                    uint32_t *aplic_phandles)
+                                    uint32_t *aplic_phandles,
+                                    int num_harts)
 {
-    int cpu;
     char *aplic_name;
-    uint32_t *aplic_cells;
     unsigned long aplic_addr;
     MachineState *ms = MACHINE(s);
     uint32_t aplic_m_phandle, aplic_s_phandle;
 
     aplic_m_phandle = (*phandle)++;
     aplic_s_phandle = (*phandle)++;
-    aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
 
-    /* M-level APLIC node */
-    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
-        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
-        aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
+    if (!kvm_enabled()) {
+        /* M-level APLIC node */
+        aplic_addr = memmap[VIRT_APLIC_M].base +
+                     (memmap[VIRT_APLIC_M].size * socket);
+        create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
+                             msi_m_phandle, intc_phandles,
+                             aplic_m_phandle, aplic_s_phandle,
+                             true, num_harts);
     }
-    aplic_addr = memmap[VIRT_APLIC_M].base +
-                 (memmap[VIRT_APLIC_M].size * socket);
-    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
-    qemu_fdt_add_subnode(ms->fdt, aplic_name);
-    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
-        "#interrupt-cells", FDT_APLIC_INT_CELLS);
-    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
-    if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
-        qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
-            aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
-    } else {
-        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
-            msi_m_phandle);
-    }
-    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
-        0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size);
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
-        VIRT_IRQCHIP_NUM_SOURCES);
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
-        aplic_s_phandle);
-    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
-        aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES);
-    riscv_socket_fdt_write_id(ms, aplic_name, socket);
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle);
-    g_free(aplic_name);
 
     /* S-level APLIC node */
-    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
-        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
-        aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
-    }
     aplic_addr = memmap[VIRT_APLIC_S].base +
                  (memmap[VIRT_APLIC_S].size * socket);
+    create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
+                         msi_s_phandle, intc_phandles,
+                         aplic_s_phandle, 0,
+                         false, num_harts);
+
     aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
-    qemu_fdt_add_subnode(ms->fdt, aplic_name);
-    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
-        "#interrupt-cells", FDT_APLIC_INT_CELLS);
-    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
-    if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
-        qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
-            aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
-    } else {
-        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
-            msi_s_phandle);
-    }
-    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
-        0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size);
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
-        VIRT_IRQCHIP_NUM_SOURCES);
-    riscv_socket_fdt_write_id(ms, aplic_name, socket);
-    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle);
 
     if (!socket) {
         platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
@@ -722,7 +710,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
 
     g_free(aplic_name);
 
-    g_free(aplic_cells);
     aplic_phandles[socket] = aplic_s_phandle;
 }
 
@@ -732,7 +719,7 @@ static void create_fdt_pmu(RISCVVirtState *s)
     MachineState *ms = MACHINE(s);
     RISCVCPU hart = s->soc[0].harts[0];
 
-    pmu_name = g_strdup_printf("/soc/pmu");
+    pmu_name = g_strdup_printf("/pmu");
     qemu_fdt_add_subnode(ms->fdt, pmu_name);
     qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
     riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name);
@@ -794,34 +781,51 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
         *msi_pcie_phandle = msi_s_phandle;
     }
 
-    phandle_pos = ms->smp.cpus;
-    for (socket = (socket_count - 1); socket >= 0; socket--) {
-        phandle_pos -= s->soc[socket].num_harts;
+    /* KVM AIA only has one APLIC instance */
+    if (kvm_enabled() && virt_use_kvm_aia(s)) {
+        create_fdt_socket_aplic(s, memmap, 0,
+                                msi_m_phandle, msi_s_phandle, phandle,
+                                &intc_phandles[0], xplic_phandles,
+                                ms->smp.cpus);
+    } else {
+        phandle_pos = ms->smp.cpus;
+        for (socket = (socket_count - 1); socket >= 0; socket--) {
+            phandle_pos -= s->soc[socket].num_harts;
 
-        if (s->aia_type == VIRT_AIA_TYPE_NONE) {
-            create_fdt_socket_plic(s, memmap, socket, phandle,
-                &intc_phandles[phandle_pos], xplic_phandles);
-        } else {
-            create_fdt_socket_aplic(s, memmap, socket,
-                msi_m_phandle, msi_s_phandle, phandle,
-                &intc_phandles[phandle_pos], xplic_phandles);
+            if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+                create_fdt_socket_plic(s, memmap, socket, phandle,
+                                       &intc_phandles[phandle_pos],
+                                       xplic_phandles);
+            } else {
+                create_fdt_socket_aplic(s, memmap, socket,
+                                        msi_m_phandle, msi_s_phandle, phandle,
+                                        &intc_phandles[phandle_pos],
+                                        xplic_phandles,
+                                        s->soc[socket].num_harts);
+            }
         }
     }
 
     g_free(intc_phandles);
 
-    for (socket = 0; socket < socket_count; socket++) {
-        if (socket == 0) {
-            *irq_mmio_phandle = xplic_phandles[socket];
-            *irq_virtio_phandle = xplic_phandles[socket];
-            *irq_pcie_phandle = xplic_phandles[socket];
-        }
-        if (socket == 1) {
-            *irq_virtio_phandle = xplic_phandles[socket];
-            *irq_pcie_phandle = xplic_phandles[socket];
-        }
-        if (socket == 2) {
-            *irq_pcie_phandle = xplic_phandles[socket];
+    if (kvm_enabled() && virt_use_kvm_aia(s)) {
+        *irq_mmio_phandle = xplic_phandles[0];
+        *irq_virtio_phandle = xplic_phandles[0];
+        *irq_pcie_phandle = xplic_phandles[0];
+    } else {
+        for (socket = 0; socket < socket_count; socket++) {
+            if (socket == 0) {
+                *irq_mmio_phandle = xplic_phandles[socket];
+                *irq_virtio_phandle = xplic_phandles[socket];
+                *irq_pcie_phandle = xplic_phandles[socket];
+            }
+            if (socket == 1) {
+                *irq_virtio_phandle = xplic_phandles[socket];
+                *irq_pcie_phandle = xplic_phandles[socket];
+            }
+            if (socket == 2) {
+                *irq_pcie_phandle = xplic_phandles[socket];
+            }
         }
     }
 
@@ -1163,16 +1167,20 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
     int i;
     hwaddr addr;
     uint32_t guest_bits;
-    DeviceState *aplic_m;
-    bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false;
+    DeviceState *aplic_s = NULL;
+    DeviceState *aplic_m = NULL;
+    bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
 
     if (msimode) {
-        /* Per-socket M-level IMSICs */
-        addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
-        for (i = 0; i < hart_count; i++) {
-            riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
-                               base_hartid + i, true, 1,
-                               VIRT_IRQCHIP_NUM_MSIS);
+        if (!kvm_enabled()) {
+            /* Per-socket M-level IMSICs */
+            addr = memmap[VIRT_IMSIC_M].base +
+                   socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+            for (i = 0; i < hart_count; i++) {
+                riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
+                                   base_hartid + i, true, 1,
+                                   VIRT_IRQCHIP_NUM_MSIS);
+            }
         }
 
         /* Per-socket S-level IMSICs */
@@ -1185,29 +1193,29 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
         }
     }
 
-    /* Per-socket M-level APLIC */
-    aplic_m = riscv_aplic_create(
-        memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size,
-        memmap[VIRT_APLIC_M].size,
-        (msimode) ? 0 : base_hartid,
-        (msimode) ? 0 : hart_count,
-        VIRT_IRQCHIP_NUM_SOURCES,
-        VIRT_IRQCHIP_NUM_PRIO_BITS,
-        msimode, true, NULL);
-
-    if (aplic_m) {
-        /* Per-socket S-level APLIC */
-        riscv_aplic_create(
-            memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size,
-            memmap[VIRT_APLIC_S].size,
-            (msimode) ? 0 : base_hartid,
-            (msimode) ? 0 : hart_count,
-            VIRT_IRQCHIP_NUM_SOURCES,
-            VIRT_IRQCHIP_NUM_PRIO_BITS,
-            msimode, false, aplic_m);
+    if (!kvm_enabled()) {
+        /* Per-socket M-level APLIC */
+        aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base +
+                                     socket * memmap[VIRT_APLIC_M].size,
+                                     memmap[VIRT_APLIC_M].size,
+                                     (msimode) ? 0 : base_hartid,
+                                     (msimode) ? 0 : hart_count,
+                                     VIRT_IRQCHIP_NUM_SOURCES,
+                                     VIRT_IRQCHIP_NUM_PRIO_BITS,
+                                     msimode, true, NULL);
     }
 
-    return aplic_m;
+    /* Per-socket S-level APLIC */
+    aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base +
+                                 socket * memmap[VIRT_APLIC_S].size,
+                                 memmap[VIRT_APLIC_S].size,
+                                 (msimode) ? 0 : base_hartid,
+                                 (msimode) ? 0 : hart_count,
+                                 VIRT_IRQCHIP_NUM_SOURCES,
+                                 VIRT_IRQCHIP_NUM_PRIO_BITS,
+                                 msimode, false, aplic_m);
+
+    return kvm_enabled() ? aplic_s : aplic_m;
 }
 
 static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
@@ -1453,6 +1461,14 @@ static void virt_machine_init(MachineState *machine)
         }
     }
 
+    if (kvm_enabled() && virt_use_kvm_aia(s)) {
+        kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
+                             VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
+                             memmap[VIRT_APLIC_S].base,
+                             memmap[VIRT_IMSIC_S].base,
+                             s->aia_guests);
+    }
+
     if (riscv_is_32bit(&s->soc[0])) {
 #if HOST_LONG_BITS == 64
         /* limit RAM size in a 32-bit system */
diff --git a/hw/rtc/exynos4210_rtc.c b/hw/rtc/exynos4210_rtc.c
index 2b8a38a296..cc7101c530 100644
--- a/hw/rtc/exynos4210_rtc.c
+++ b/hw/rtc/exynos4210_rtc.c
@@ -202,7 +202,7 @@ static void exynos4210_rtc_update_freq(Exynos4210RTCState *s,
     uint32_t freq;
 
     freq = s->freq;
-    /* set frequncy for time generator */
+    /* set frequency for time generator */
     s->freq = RTC_BASE_FREQ / (1 << TICCKSEL(reg_value));
 
     if (freq != s->freq) {
diff --git a/hw/rx/rx62n.c b/hw/rx/rx62n.c
index 3e887a0fc7..d00fcb0ef0 100644
--- a/hw/rx/rx62n.c
+++ b/hw/rx/rx62n.c
@@ -114,7 +114,7 @@ static const uint8_t ipr_table[NR_IRQS] = {
 };
 
 /*
- * Level triggerd IRQ list
+ * Level triggered IRQ list
  * Not listed IRQ is Edge trigger.
  * See "11.3.1 Interrupt Vector Table" in hardware manual.
  */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index bfcf64d007..2d75f2131f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -109,6 +109,7 @@ static const char *const reset_dev_types[] = {
     "s390-flic",
     "diag288",
     TYPE_S390_PCI_HOST_BRIDGE,
+    TYPE_AP_BRIDGE,
 };
 
 static void subsystem_reset(void)
@@ -437,10 +438,20 @@ static void s390_machine_reset(MachineState *machine, ShutdownCause reason)
     switch (reset_type) {
     case S390_RESET_EXTERNAL:
     case S390_RESET_REIPL:
+        /*
+         * Reset the subsystem which includes a AP reset. If a PV
+         * guest had APQNs attached the AP reset is a prerequisite to
+         * unprotecting since the UV checks if all APQNs are reset.
+         */
+        subsystem_reset();
         if (s390_is_pv()) {
             s390_machine_unprotect(ms);
         }
 
+        /*
+         * Device reset includes CPU clear resets so this has to be
+         * done AFTER the unprotect call above.
+         */
         qemu_devices_reset(reason);
         s390_crypto_reset();
 
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index f7d45b0b20..634ed49c2e 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1321,7 +1321,7 @@ again:
                 }
                 trace_lsi_execute_script_io_selected(id,
                                              insn & (1 << 3) ? " ATN" : "");
-                /* ??? Linux drivers compain when this is set.  Maybe
+                /* ??? Linux drivers complain when this is set.  Maybe
                    it only applies in low-level mode (unimplemented).
                 lsi_script_scsi_interrupt(s, LSI_SIST0_CMP, 0); */
                 s->select_tag = id << 8;
diff --git a/hw/scsi/mfi.h b/hw/scsi/mfi.h
index 0b4ee53dfc..cf7a2d775b 100644
--- a/hw/scsi/mfi.h
+++ b/hw/scsi/mfi.h
@@ -65,7 +65,7 @@
 #define MFI_IQPH        0xc4            /* Inbound queue port (high bytes)  */
 #define MFI_DIAG        0xf8            /* Host diag */
 #define MFI_SEQ         0xfc            /* Sequencer offset */
-#define MFI_1078_EIM    0x80000004      /* 1078 enable intrrupt mask  */
+#define MFI_1078_EIM    0x80000004      /* 1078 enable interrupt mask  */
 #define MFI_RMI         0x2             /* reply message interrupt  */
 #define MFI_1078_RM     0x80000000      /* reply 1078 message interrupt  */
 #define MFI_ODC         0x4             /* outbound doorbell change interrupt */
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 77a717d355..4823befdef 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -87,6 +87,14 @@ enum SDCardStates {
     sd_disconnect_state,
 };
 
+typedef sd_rsp_type_t (*sd_cmd_handler)(SDState *sd, SDRequest req);
+
+typedef struct SDProto {
+    const char *name;
+    sd_cmd_handler cmd[SDMMC_CMD_MAX];
+    sd_cmd_handler acmd[SDMMC_CMD_MAX];
+} SDProto;
+
 struct SDState {
     DeviceState parent_obj;
 
@@ -107,7 +115,6 @@ struct SDState {
 
     uint8_t spec_version;
     BlockBackend *blk;
-    bool spi;
 
     /* Runtime changeables */
 
@@ -137,7 +144,6 @@ struct SDState {
     qemu_irq readonly_cb;
     qemu_irq inserted_cb;
     QEMUTimer *ocr_power_timer;
-    const char *proto_name;
     bool enable;
     uint8_t dat_lines;
     bool cmd_line;
@@ -145,6 +151,33 @@ struct SDState {
 
 static void sd_realize(DeviceState *dev, Error **errp);
 
+static const struct SDProto *sd_proto(SDState *sd)
+{
+    SDCardClass *sc = SD_CARD_GET_CLASS(sd);
+
+    return sc->proto;
+}
+
+static const SDProto sd_proto_spi;
+
+static bool sd_is_spi(SDState *sd)
+{
+    return sd_proto(sd) == &sd_proto_spi;
+}
+
+static const char *sd_version_str(enum SDPhySpecificationVersion version)
+{
+    static const char *sdphy_version[] = {
+        [SD_PHY_SPECv1_10_VERS]     = "v1.10",
+        [SD_PHY_SPECv2_00_VERS]     = "v2.00",
+        [SD_PHY_SPECv3_01_VERS]     = "v3.01",
+    };
+    if (version >= ARRAY_SIZE(sdphy_version)) {
+        return "unsupported version";
+    }
+    return sdphy_version[version];
+}
+
 static const char *sd_state_name(enum SDCardStates state)
 {
     static const char *state_name[] = {
@@ -309,7 +342,7 @@ static void sd_set_ocr(SDState *sd)
     /* All voltages OK */
     sd->ocr = R_OCR_VDD_VOLTAGE_WIN_HI_MASK;
 
-    if (sd->spi) {
+    if (sd_is_spi(sd)) {
         /*
          * We don't need to emulate power up sequence in SPI-mode.
          * Thus, the card's power up status bit should be set to 1 when reset.
@@ -714,13 +747,12 @@ SDState *sd_init(BlockBackend *blk, bool is_spi)
     SDState *sd;
     Error *err = NULL;
 
-    obj = object_new(TYPE_SD_CARD);
+    obj = object_new(is_spi ? TYPE_SD_CARD_SPI : TYPE_SD_CARD);
     dev = DEVICE(obj);
     if (!qdev_prop_set_drive_err(dev, "drive", blk, &err)) {
         error_reportf_err(err, "sd_init failed: ");
         return NULL;
     }
-    qdev_prop_set_bit(dev, "spi", is_spi);
 
     /*
      * Realizing the device properly would put it into the QOM
@@ -966,6 +998,106 @@ static bool address_in_range(SDState *sd, const char *desc,
     return true;
 }
 
+static sd_rsp_type_t sd_invalid_state_for_cmd(SDState *sd, SDRequest req)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: CMD%i in a wrong state: %s (spec %s)\n",
+                  sd_proto(sd)->name, req.cmd, sd_state_name(sd->state),
+                  sd_version_str(sd->spec_version));
+
+    return sd_illegal;
+}
+
+static sd_rsp_type_t sd_cmd_illegal(SDState *sd, SDRequest req)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Unknown CMD%i for spec %s\n",
+                  sd_proto(sd)->name, req.cmd,
+                  sd_version_str(sd->spec_version));
+
+    return sd_illegal;
+}
+
+/* Commands that are recognised but not yet implemented. */
+static sd_rsp_type_t sd_cmd_unimplemented(SDState *sd, SDRequest req)
+{
+    qemu_log_mask(LOG_UNIMP, "%s: CMD%i not implemented\n",
+                  sd_proto(sd)->name, req.cmd);
+
+    return sd_illegal;
+}
+
+static sd_rsp_type_t sd_cmd_GO_IDLE_STATE(SDState *sd, SDRequest req)
+{
+    if (sd->state != sd_inactive_state) {
+        sd->state = sd_idle_state;
+        sd_reset(DEVICE(sd));
+    }
+
+    return sd_is_spi(sd) ? sd_r1 : sd_r0;
+}
+
+static sd_rsp_type_t sd_cmd_SEND_OP_CMD(SDState *sd, SDRequest req)
+{
+    sd->state = sd_transfer_state;
+
+    return sd_r1;
+}
+
+static sd_rsp_type_t sd_cmd_ALL_SEND_CID(SDState *sd, SDRequest req)
+{
+    if (sd->state != sd_ready_state) {
+        return sd_invalid_state_for_cmd(sd, req);
+    }
+
+    sd->state = sd_identification_state;
+
+    return sd_r2_i;
+}
+
+static sd_rsp_type_t sd_cmd_SEND_RELATIVE_ADDR(SDState *sd, SDRequest req)
+{
+    switch (sd->state) {
+    case sd_identification_state:
+    case sd_standby_state:
+        sd->state = sd_standby_state;
+        sd_set_rca(sd);
+        return sd_r6;
+
+    default:
+        return sd_invalid_state_for_cmd(sd, req);
+    }
+}
+
+static sd_rsp_type_t sd_cmd_SEND_TUNING_BLOCK(SDState *sd, SDRequest req)
+{
+        if (sd->spec_version < SD_PHY_SPECv3_01_VERS) {
+            return sd_cmd_illegal(sd, req);
+        }
+
+        if (sd->state != sd_transfer_state) {
+            return sd_invalid_state_for_cmd(sd, req);
+        }
+
+        sd->state = sd_sendingdata_state;
+        sd->data_offset = 0;
+
+        return sd_r1;
+}
+
+static sd_rsp_type_t sd_cmd_SET_BLOCK_COUNT(SDState *sd, SDRequest req)
+{
+        if (sd->spec_version < SD_PHY_SPECv3_01_VERS) {
+            return sd_cmd_illegal(sd, req);
+        }
+
+        if (sd->state != sd_transfer_state) {
+            return sd_invalid_state_for_cmd(sd, req);
+        }
+
+        sd->multi_blk_cnt = req.arg;
+
+        return sd_r1;
+}
+
 static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 {
     uint32_t rca = 0x0000;
@@ -975,7 +1107,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
      * However there is no ACMD55, so we want to trace this particular case.
      */
     if (req.cmd != 55 || sd->expecting_acmd) {
-        trace_sdcard_normal_command(sd->proto_name,
+        trace_sdcard_normal_command(sd_proto(sd)->name,
                                     sd_cmd_name(req.cmd), req.cmd,
                                     req.arg, sd_state_name(sd->state));
     }
@@ -999,58 +1131,13 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         return sd_illegal;
     }
 
+    if (sd_proto(sd)->cmd[req.cmd]) {
+        return sd_proto(sd)->cmd[req.cmd](sd, req);
+    }
+
     switch (req.cmd) {
     /* Basic commands (Class 0 and Class 1) */
-    case 0:  /* CMD0:   GO_IDLE_STATE */
-        switch (sd->state) {
-        case sd_inactive_state:
-            return sd->spi ? sd_r1 : sd_r0;
-
-        default:
-            sd->state = sd_idle_state;
-            sd_reset(DEVICE(sd));
-            return sd->spi ? sd_r1 : sd_r0;
-        }
-        break;
-
-    case 1:  /* CMD1:   SEND_OP_CMD */
-        if (!sd->spi)
-            goto bad_cmd;
-
-        sd->state = sd_transfer_state;
-        return sd_r1;
-
-    case 2:  /* CMD2:   ALL_SEND_CID */
-        if (sd->spi)
-            goto bad_cmd;
-        switch (sd->state) {
-        case sd_ready_state:
-            sd->state = sd_identification_state;
-            return sd_r2_i;
-
-        default:
-            break;
-        }
-        break;
-
-    case 3:  /* CMD3:   SEND_RELATIVE_ADDR */
-        if (sd->spi)
-            goto bad_cmd;
-        switch (sd->state) {
-        case sd_identification_state:
-        case sd_standby_state:
-            sd->state = sd_standby_state;
-            sd_set_rca(sd);
-            return sd_r6;
-
-        default:
-            break;
-        }
-        break;
-
     case 4:  /* CMD4:   SEND_DSR */
-        if (sd->spi)
-            goto bad_cmd;
         switch (sd->state) {
         case sd_standby_state:
             break;
@@ -1060,9 +1147,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         }
         break;
 
-    case 5: /* CMD5: reserved for SDIO cards */
-        return sd_illegal;
-
     case 6:  /* CMD6:   SWITCH_FUNCTION */
         switch (sd->mode) {
         case sd_data_transfer_mode:
@@ -1078,8 +1162,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         break;
 
     case 7:  /* CMD7:   SELECT/DESELECT_CARD */
-        if (sd->spi)
-            goto bad_cmd;
         switch (sd->state) {
         case sd_standby_state:
             if (sd->rca != rca)
@@ -1126,7 +1208,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 
         /* No response if not exactly one VHS bit is set.  */
         if (!(req.arg >> 8) || (req.arg >> (ctz32(req.arg & ~0xff) + 1))) {
-            return sd->spi ? sd_r7 : sd_r0;
+            return sd_is_spi(sd) ? sd_r7 : sd_r0;
         }
 
         /* Accept.  */
@@ -1142,8 +1224,9 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
             return sd_r2_s;
 
         case sd_transfer_state:
-            if (!sd->spi)
+            if (!sd_is_spi(sd)) {
                 break;
+            }
             sd->state = sd_sendingdata_state;
             memcpy(sd->data, sd->csd, 16);
             sd->data_start = addr;
@@ -1164,8 +1247,9 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
             return sd_r2_i;
 
         case sd_transfer_state:
-            if (!sd->spi)
+            if (!sd_is_spi(sd)) {
                 break;
+            }
             sd->state = sd_sendingdata_state;
             memcpy(sd->data, sd->cid, 16);
             sd->data_start = addr;
@@ -1197,7 +1281,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
     case 13:  /* CMD13:  SEND_STATUS */
         switch (sd->mode) {
         case sd_data_transfer_mode:
-            if (!sd->spi && sd->rca != rca) {
+            if (!sd_is_spi(sd) && sd->rca != rca) {
                 return sd_r0;
             }
 
@@ -1209,8 +1293,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         break;
 
     case 15:  /* CMD15:  GO_INACTIVE_STATE */
-        if (sd->spi)
-            goto bad_cmd;
         switch (sd->mode) {
         case sd_data_transfer_mode:
             if (sd->rca != rca)
@@ -1224,7 +1306,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         }
         break;
 
-    /* Block read commands (Classs 2) */
+    /* Block read commands (Class 2) */
     case 16:  /* CMD16:  SET_BLOCKLEN */
         switch (sd->state) {
         case sd_transfer_state:
@@ -1261,31 +1343,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         }
         break;
 
-    case 19:    /* CMD19: SEND_TUNING_BLOCK (SD) */
-        if (sd->spec_version < SD_PHY_SPECv3_01_VERS) {
-            break;
-        }
-        if (sd->state == sd_transfer_state) {
-            sd->state = sd_sendingdata_state;
-            sd->data_offset = 0;
-            return sd_r1;
-        }
-        break;
-
-    case 23:    /* CMD23: SET_BLOCK_COUNT */
-        if (sd->spec_version < SD_PHY_SPECv3_01_VERS) {
-            break;
-        }
-        switch (sd->state) {
-        case sd_transfer_state:
-            sd->multi_blk_cnt = req.arg;
-            return sd_r1;
-
-        default:
-            break;
-        }
-        break;
-
     /* Block write commands (Class 4) */
     case 24:  /* CMD24:  WRITE_SINGLE_BLOCK */
     case 25:  /* CMD25:  WRITE_MULTIPLE_BLOCK */
@@ -1317,8 +1374,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         break;
 
     case 26:  /* CMD26:  PROGRAM_CID */
-        if (sd->spi)
-            goto bad_cmd;
         switch (sd->state) {
         case sd_transfer_state:
             sd->state = sd_receivingdata_state;
@@ -1468,15 +1523,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         }
         break;
 
-    case 52 ... 54:
-        /* CMD52, CMD53, CMD54: reserved for SDIO cards
-         * (see the SDIO Simplified Specification V2.0)
-         * Handle as illegal command but do not complain
-         * on stderr, as some OSes may use these in their
-         * probing for presence of an SDIO card.
-         */
-        return sd_illegal;
-
     /* Application specific commands (Class 8) */
     case 55:  /* CMD55:  APP_CMD */
         switch (sd->state) {
@@ -1492,7 +1538,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         default:
             break;
         }
-        if (!sd->spi) {
+        if (!sd_is_spi(sd)) {
             if (sd->rca != rca) {
                 return sd_r0;
             }
@@ -1517,39 +1563,32 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         break;
 
     case 58:    /* CMD58:   READ_OCR (SPI) */
-        if (!sd->spi) {
-            goto bad_cmd;
-        }
         return sd_r3;
 
     case 59:    /* CMD59:   CRC_ON_OFF (SPI) */
-        if (!sd->spi) {
-            goto bad_cmd;
-        }
         return sd_r1;
 
     default:
-    bad_cmd:
         qemu_log_mask(LOG_GUEST_ERROR, "SD: Unknown CMD%i\n", req.cmd);
         return sd_illegal;
     }
 
-    qemu_log_mask(LOG_GUEST_ERROR, "SD: CMD%i in a wrong state: %s\n",
-                  req.cmd, sd_state_name(sd->state));
-    return sd_illegal;
+    return sd_invalid_state_for_cmd(sd, req);
 }
 
 static sd_rsp_type_t sd_app_command(SDState *sd,
                                     SDRequest req)
 {
-    trace_sdcard_app_command(sd->proto_name, sd_acmd_name(req.cmd),
+    trace_sdcard_app_command(sd_proto(sd)->name, sd_acmd_name(req.cmd),
                              req.cmd, req.arg, sd_state_name(sd->state));
     sd->card_status |= APP_CMD;
+
+    if (sd_proto(sd)->acmd[req.cmd]) {
+        return sd_proto(sd)->acmd[req.cmd](sd, req);
+    }
+
     switch (req.cmd) {
     case 6:  /* ACMD6:  SET_BUS_WIDTH */
-        if (sd->spi) {
-            goto unimplemented_spi_cmd;
-        }
         switch (sd->state) {
         case sd_transfer_state:
             sd->sd_status[0] &= 0x3f;
@@ -1600,11 +1639,6 @@ static sd_rsp_type_t sd_app_command(SDState *sd,
         break;
 
     case 41:  /* ACMD41: SD_APP_OP_COND */
-        if (sd->spi) {
-            /* SEND_OP_CMD */
-            sd->state = sd_transfer_state;
-            return sd_r1;
-        }
         if (sd->state != sd_idle_state) {
             break;
         }
@@ -1680,12 +1714,6 @@ static sd_rsp_type_t sd_app_command(SDState *sd,
     default:
         /* Fall back to standard commands.  */
         return sd_normal_command(sd, req);
-
-    unimplemented_spi_cmd:
-        /* Commands that are recognised but not yet implemented in SPI mode.  */
-        qemu_log_mask(LOG_UNIMP, "SD: CMD%i not implemented in SPI mode\n",
-                      req.cmd);
-        return sd_illegal;
     }
 
     qemu_log_mask(LOG_GUEST_ERROR, "SD: ACMD%i in a wrong state\n", req.cmd);
@@ -1836,7 +1864,7 @@ void sd_write_byte(SDState *sd, uint8_t value)
     if (sd->card_status & (ADDRESS_ERROR | WP_VIOLATION))
         return;
 
-    trace_sdcard_write_data(sd->proto_name,
+    trace_sdcard_write_data(sd_proto(sd)->name,
                             sd_acmd_name(sd->current_cmd),
                             sd->current_cmd, value);
     switch (sd->current_cmd) {
@@ -1992,7 +2020,7 @@ uint8_t sd_read_byte(SDState *sd)
 
     io_len = (sd->ocr & (1 << 30)) ? 512 : sd->blk_len;
 
-    trace_sdcard_read_data(sd->proto_name,
+    trace_sdcard_read_data(sd_proto(sd)->name,
                            sd_acmd_name(sd->current_cmd),
                            sd->current_cmd, io_len);
     switch (sd->current_cmd) {
@@ -2111,6 +2139,40 @@ void sd_enable(SDState *sd, bool enable)
     sd->enable = enable;
 }
 
+static const SDProto sd_proto_spi = {
+    .name = "SPI",
+    .cmd = {
+        [0]         = sd_cmd_GO_IDLE_STATE,
+        [1]         = sd_cmd_SEND_OP_CMD,
+        [2 ... 4]   = sd_cmd_illegal,
+        [5]         = sd_cmd_illegal,
+        [7]         = sd_cmd_illegal,
+        [15]        = sd_cmd_illegal,
+        [26]        = sd_cmd_illegal,
+        [52 ... 54] = sd_cmd_illegal,
+    },
+    .acmd = {
+        [6]         = sd_cmd_unimplemented,
+        [41]        = sd_cmd_SEND_OP_CMD,
+    },
+};
+
+static const SDProto sd_proto_sd = {
+    .name = "SD",
+    .cmd = {
+        [0]         = sd_cmd_GO_IDLE_STATE,
+        [1]         = sd_cmd_illegal,
+        [2]         = sd_cmd_ALL_SEND_CID,
+        [3]         = sd_cmd_SEND_RELATIVE_ADDR,
+        [5]         = sd_cmd_illegal,
+        [19]        = sd_cmd_SEND_TUNING_BLOCK,
+        [23]        = sd_cmd_SET_BLOCK_COUNT,
+        [52 ... 54] = sd_cmd_illegal,
+        [58]        = sd_cmd_illegal,
+        [59]        = sd_cmd_illegal,
+    },
+};
+
 static void sd_instance_init(Object *obj)
 {
     SDState *sd = SD_CARD(obj);
@@ -2131,8 +2193,6 @@ static void sd_realize(DeviceState *dev, Error **errp)
     SDState *sd = SD_CARD(dev);
     int ret;
 
-    sd->proto_name = sd->spi ? "SPI" : "SD";
-
     switch (sd->spec_version) {
     case SD_PHY_SPECv1_10_VERS
      ... SD_PHY_SPECv3_01_VERS:
@@ -2189,7 +2249,6 @@ static Property sd_properties[] = {
      * whether card should be in SSI or MMC/SD mode.  It is also up to the
      * board to ensure that ssi transfers only occur when the chip select
      * is asserted.  */
-    DEFINE_PROP_BOOL("spi", SDState, spi, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -2216,6 +2275,7 @@ static void sd_class_init(ObjectClass *klass, void *data)
     sc->enable = sd_enable;
     sc->get_inserted = sd_get_inserted;
     sc->get_readonly = sd_get_readonly;
+    sc->proto = &sd_proto_sd;
 }
 
 static const TypeInfo sd_info = {
@@ -2228,9 +2288,31 @@ static const TypeInfo sd_info = {
     .instance_finalize = sd_instance_finalize,
 };
 
+/*
+ * We do not model the chip select pin, so allow the board to select
+ * whether card should be in SSI or MMC/SD mode.  It is also up to the
+ * board to ensure that ssi transfers only occur when the chip select
+ * is asserted.
+ */
+static void sd_spi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SDCardClass *sc = SD_CARD_CLASS(klass);
+
+    dc->desc = "SD SPI";
+    sc->proto = &sd_proto_spi;
+}
+
+static const TypeInfo sd_spi_info = {
+    .name = TYPE_SD_CARD_SPI,
+    .parent = TYPE_SD_CARD,
+    .class_init = sd_spi_class_init,
+};
+
 static void sd_register_types(void)
 {
     type_register_static(&sd_info);
+    type_register_static(&sd_spi_info);
 }
 
 type_init(sd_register_types)
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 362c2c86aa..5564765a9b 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1811,7 +1811,7 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
          * on i.MX, but since it is not used by QEMU we do not care.
          *
          * We don't want to call sdhci_write(.., SDHC_TRNMOD, ...)
-         * here becuase it will result in a call to
+         * here because it will result in a call to
          * sdhci_send_command(s) which we don't want.
          *
          */
diff --git a/hw/sd/sdmmc-internal.c b/hw/sd/sdmmc-internal.c
index 2053def3f1..8648a7808d 100644
--- a/hw/sd/sdmmc-internal.c
+++ b/hw/sd/sdmmc-internal.c
@@ -14,7 +14,7 @@
 const char *sd_cmd_name(uint8_t cmd)
 {
     static const char *cmd_abbrev[SDMMC_CMD_MAX] = {
-         [0]    = "GO_IDLE_STATE",
+         [0]    = "GO_IDLE_STATE",           [1]    = "SEND_OP_CMD",
          [2]    = "ALL_SEND_CID",            [3]    = "SEND_RELATIVE_ADDR",
          [4]    = "SET_DSR",                 [5]    = "IO_SEND_OP_COND",
          [6]    = "SWITCH_FUNC",             [7]    = "SELECT/DESELECT_CARD",
diff --git a/hw/sensor/isl_pmbus_vr.c b/hw/sensor/isl_pmbus_vr.c
index eb344dd5a9..e51269f6b8 100644
--- a/hw/sensor/isl_pmbus_vr.c
+++ b/hw/sensor/isl_pmbus_vr.c
@@ -101,7 +101,7 @@ static void isl_pmbus_vr_exit_reset(Object *obj)
     }
 }
 
-/* The raa228000 uses different direct mode coefficents from most isl devices */
+/* The raa228000 uses different direct mode coefficients from most isl devices */
 static void raa228000_exit_reset(Object *obj)
 {
     PMBusDevice *pmdev = PMBUS_DEVICE(obj);
diff --git a/hw/sensor/max34451.c b/hw/sensor/max34451.c
index a91d8bd487..9db52ef677 100644
--- a/hw/sensor/max34451.c
+++ b/hw/sensor/max34451.c
@@ -734,7 +734,7 @@ static void max34451_init(Object *obj)
 
     /*
      * get and set the temperature of the internal temperature sensor in
-     * centidegrees Celcius i.e.: 2500 -> 25.00 C, max is 327.67 C
+     * centidegrees Celsius i.e.: 2500 -> 25.00 C, max is 327.67 C
      */
     for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
         object_property_add(obj, "temperature[*]", "uint16",
diff --git a/hw/sh4/sh7750_regs.h b/hw/sh4/sh7750_regs.h
index 94043431e6..edb5d18f00 100644
--- a/hw/sh4/sh7750_regs.h
+++ b/hw/sh4/sh7750_regs.h
@@ -113,7 +113,7 @@
 #define SH7750_TTB            SH7750_P4_REG32(SH7750_TTB_REGOFS)
 #define SH7750_TTB_A7         SH7750_A7_REG32(SH7750_TTB_REGOFS)
 
-/* TLB exeption address register - TEA */
+/* TLB exception address register - TEA */
 #define SH7750_TEA_REGOFS     0x00000c /* offset */
 #define SH7750_TEA            SH7750_P4_REG32(SH7750_TEA_REGOFS)
 #define SH7750_TEA_A7         SH7750_A7_REG32(SH7750_TEA_REGOFS)
@@ -183,19 +183,19 @@
 #define SH7750_TRA_IMM      0x000003fd /* Immediate data operand */
 #define SH7750_TRA_IMM_S    2
 
-/* Exeption event register - EXPEVT */
+/* Exception event register - EXPEVT */
 #define SH7750_EXPEVT_REGOFS  0x000024
 #define SH7750_EXPEVT         SH7750_P4_REG32(SH7750_EXPEVT_REGOFS)
 #define SH7750_EXPEVT_A7      SH7750_A7_REG32(SH7750_EXPEVT_REGOFS)
 
-#define SH7750_EXPEVT_EX      0x00000fff /* Exeption code */
+#define SH7750_EXPEVT_EX      0x00000fff /* Exception code */
 #define SH7750_EXPEVT_EX_S    0
 
 /* Interrupt event register */
 #define SH7750_INTEVT_REGOFS  0x000028
 #define SH7750_INTEVT         SH7750_P4_REG32(SH7750_INTEVT_REGOFS)
 #define SH7750_INTEVT_A7      SH7750_A7_REG32(SH7750_INTEVT_REGOFS)
-#define SH7750_INTEVT_EX    0x00000fff /* Exeption code */
+#define SH7750_INTEVT_EX    0x00000fff /* Exception code */
 #define SH7750_INTEVT_EX_S  0
 
 /*
@@ -1274,15 +1274,15 @@
 /*
  * User Break Controller registers
  */
-#define SH7750_BARA           0x200000 /* Break address regiser A */
-#define SH7750_BAMRA          0x200004 /* Break address mask regiser A */
-#define SH7750_BBRA           0x200008 /* Break bus cycle regiser A */
-#define SH7750_BARB           0x20000c /* Break address regiser B */
-#define SH7750_BAMRB          0x200010 /* Break address mask regiser B */
-#define SH7750_BBRB           0x200014 /* Break bus cycle regiser B */
-#define SH7750_BASRB          0x000018 /* Break ASID regiser B */
-#define SH7750_BDRB           0x200018 /* Break data regiser B */
-#define SH7750_BDMRB          0x20001c /* Break data mask regiser B */
+#define SH7750_BARA           0x200000 /* Break address register A */
+#define SH7750_BAMRA          0x200004 /* Break address mask register A */
+#define SH7750_BBRA           0x200008 /* Break bus cycle register A */
+#define SH7750_BARB           0x20000c /* Break address register B */
+#define SH7750_BAMRB          0x200010 /* Break address mask register B */
+#define SH7750_BBRB           0x200014 /* Break bus cycle register B */
+#define SH7750_BASRB          0x000018 /* Break ASID register B */
+#define SH7750_BDRB           0x200018 /* Break data register B */
+#define SH7750_BDMRB          0x20001c /* Break data mask register B */
 #define SH7750_BRCR           0x200020 /* Break control register */
 
 #define SH7750_BRCR_UDBE        0x0001 /* User break debug enable bit */
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 10cd22f610..b753705856 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -1110,7 +1110,7 @@ void smbios_get_tables(MachineState *ms,
         dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ;
 
         /*
-         * The offset determines if we need to keep additional space betweeen
+         * The offset determines if we need to keep additional space between
          * table 17 and table 19 header handle numbers so that they do
          * not overlap. For example, for a VM with larger than 8 TB guest
          * memory and DIMM like chunks of 16 GiB, the default space between
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 7281169322..2a4001b774 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -692,6 +692,14 @@ static void aspeed_smc_reset(DeviceState *d)
         memset(s->regs, 0, sizeof s->regs);
     }
 
+    for (i = 0; i < asc->cs_num_max; i++) {
+        DeviceState *dev = ssi_get_cs(s->spi, i);
+        if (dev) {
+            qemu_irq cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0);
+            qdev_connect_gpio_out_named(DEVICE(s), "cs", i, cs_line);
+        }
+    }
+
     /* Unselect all peripherals */
     for (i = 0; i < asc->cs_num_max; ++i) {
         s->regs[s->r_ctrl0 + i] |= CTRL_CE_STOP_ACTIVE;
diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
index d54a109bee..1f3e540ab8 100644
--- a/hw/ssi/ssi.c
+++ b/hw/ssi/ssi.c
@@ -13,6 +13,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
 #include "hw/ssi/ssi.h"
 #include "migration/vmstate.h"
 #include "qemu/module.h"
@@ -26,10 +27,46 @@ struct SSIBus {
 #define TYPE_SSI_BUS "SSI"
 OBJECT_DECLARE_SIMPLE_TYPE(SSIBus, SSI_BUS)
 
+DeviceState *ssi_get_cs(SSIBus *bus, uint8_t cs_index)
+{
+    BusState *b = BUS(bus);
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &b->children, sibling) {
+        SSIPeripheral *kid_ssi = SSI_PERIPHERAL(kid->child);
+        if (kid_ssi->cs_index == cs_index) {
+            return kid->child;
+        }
+    }
+
+    return NULL;
+}
+
+static bool ssi_bus_check_address(BusState *b, DeviceState *dev, Error **errp)
+{
+    SSIPeripheral *s = SSI_PERIPHERAL(dev);
+
+    if (ssi_get_cs(SSI_BUS(b), s->cs_index)) {
+        error_setg(errp, "CS index '0x%x' in use by a %s device", s->cs_index,
+                   object_get_typename(OBJECT(dev)));
+        return false;
+    }
+
+    return true;
+}
+
+static void ssi_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    k->check_address = ssi_bus_check_address;
+}
+
 static const TypeInfo ssi_bus_info = {
     .name = TYPE_SSI_BUS,
     .parent = TYPE_BUS,
     .instance_size = sizeof(SSIBus),
+    .class_init = ssi_bus_class_init,
 };
 
 static void ssi_cs_default(void *opaque, int n, int level)
@@ -71,6 +108,11 @@ static void ssi_peripheral_realize(DeviceState *dev, Error **errp)
     ssc->realize(s, errp);
 }
 
+static Property ssi_peripheral_properties[] = {
+    DEFINE_PROP_UINT8("cs", SSIPeripheral, cs_index, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ssi_peripheral_class_init(ObjectClass *klass, void *data)
 {
     SSIPeripheralClass *ssc = SSI_PERIPHERAL_CLASS(klass);
@@ -81,6 +123,7 @@ static void ssi_peripheral_class_init(ObjectClass *klass, void *data)
     if (!ssc->transfer_raw) {
         ssc->transfer_raw = ssi_transfer_raw_default;
     }
+    device_class_set_props(dc, ssi_peripheral_properties);
 }
 
 static const TypeInfo ssi_peripheral_info = {
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index 97009d3a5d..a3955c6c50 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -163,7 +163,7 @@
     FIELD(GQSPI_CNFG, ENDIAN, 26, 1)
     /* Poll timeout not implemented */
     FIELD(GQSPI_CNFG, EN_POLL_TIMEOUT, 20, 1)
-    /* QEMU doesnt care about any of these last three */
+    /* QEMU doesn't care about any of these last three */
     FIELD(GQSPI_CNFG, BR, 3, 3)
     FIELD(GQSPI_CNFG, CPH, 2, 1)
     FIELD(GQSPI_CNFG, CPL, 1, 1)
@@ -469,7 +469,7 @@ static void xlnx_zynqmp_qspips_flush_fifo_g(XlnxZynqMPQSPIPS *s)
 
             imm = ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, IMMEDIATE_DATA);
             if (!ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, DATA_XFER)) {
-                /* immedate transfer */
+                /* immediate transfer */
                 if (ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, TRANSMIT) ||
                     ARRAY_FIELD_EX32(s->regs, GQSPI_GF_SNAPSHOT, RECIEVE)) {
                     s->regs[R_GQSPI_DATA_STS] = 1;
@@ -768,7 +768,7 @@ static void xilinx_spips_check_zero_pump(XilinxSPIPS *s)
      */
     while (s->regs[R_TRANSFER_SIZE] &&
            s->rx_fifo.num + s->tx_fifo.num < RXFF_A_Q - 3) {
-        /* endianess just doesn't matter when zero pumping */
+        /* endianness just doesn't matter when zero pumping */
         tx_data_bytes(&s->tx_fifo, 0, 4, false);
         s->regs[R_TRANSFER_SIZE] &= ~0x03ull;
         s->regs[R_TRANSFER_SIZE] -= 4;
diff --git a/hw/ssi/xlnx-versal-ospi.c b/hw/ssi/xlnx-versal-ospi.c
index c762e0b367..1a61679c2f 100644
--- a/hw/ssi/xlnx-versal-ospi.c
+++ b/hw/ssi/xlnx-versal-ospi.c
@@ -837,7 +837,7 @@ static void ospi_do_ind_read(XlnxVersalOspi *s)
     /* Continue to read flash until we run out of space in sram */
     while (!ospi_ind_op_completed(op) &&
            !fifo8_is_full(&s->rx_sram)) {
-        /* Read reqested number of bytes, max bytes limited to size of sram */
+        /* Read requested number of bytes, max bytes limited to size of sram */
         next_b = ind_op_next_byte(op);
         end_b = next_b + fifo8_num_free(&s->rx_sram);
         end_b = MIN(end_b, ind_op_end_byte(op));
diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c
index 2d6d92ef93..f035b74560 100644
--- a/hw/timer/etraxfs_timer.c
+++ b/hw/timer/etraxfs_timer.c
@@ -236,7 +236,7 @@ static void watchdog_hit(void *opaque)
 {
     ETRAXTimerState *t = opaque;
     if (t->wd_hits == 0) {
-        /* real hw gives a single tick before reseting but we are
+        /* real hw gives a single tick before resetting but we are
            a bit friendlier to compensate for our slower execution.  */
         ptimer_set_count(t->ptimer_wd, 10);
         ptimer_run(t->ptimer_wd, 1);
diff --git a/hw/timer/renesas_tmr.c b/hw/timer/renesas_tmr.c
index c15f654738..43b31213bc 100644
--- a/hw/timer/renesas_tmr.c
+++ b/hw/timer/renesas_tmr.c
@@ -115,7 +115,7 @@ static int elapsed_time(RTMRState *tmr, int ch, int64_t delta)
         et = tmr->div_round[ch] / divrate;
         tmr->div_round[ch] %= divrate;
     } else {
-        /* disble clock. so no update */
+        /* disable clock. so no update */
         et = 0;
     }
     return et;
diff --git a/hw/tpm/tpm_tis.h b/hw/tpm/tpm_tis.h
index 6f29a508dd..6f14896b97 100644
--- a/hw/tpm/tpm_tis.h
+++ b/hw/tpm/tpm_tis.h
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 #ifndef TPM_TPM_TIS_H
 #define TPM_TPM_TIS_H
diff --git a/hw/tpm/tpm_tis_common.c b/hw/tpm/tpm_tis_common.c
index c07c179dbc..279ce436b5 100644
--- a/hw/tpm/tpm_tis_common.c
+++ b/hw/tpm/tpm_tis_common.c
@@ -20,7 +20,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 #include "qemu/osdep.h"
 #include "hw/irq.h"
diff --git a/hw/tpm/tpm_tis_i2c.c b/hw/tpm/tpm_tis_i2c.c
index b695fd3a46..4ecea7fa3e 100644
--- a/hw/tpm/tpm_tis_i2c.c
+++ b/hw/tpm/tpm_tis_i2c.c
@@ -13,7 +13,7 @@
  * Family 2.0, Level 00, Revision 1.00
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  *
  */
 
@@ -507,7 +507,7 @@ static void tpm_tis_i2c_realizefn(DeviceState *dev, Error **errp)
     }
 
     /*
-     * Get the backend pointer. It is not initialized propery during
+     * Get the backend pointer. It is not initialized properly during
      * device_class_set_props
      */
     s->be_driver = qemu_find_tpm_be("tpm0");
diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c
index 91e3792248..0367401586 100644
--- a/hw/tpm/tpm_tis_isa.c
+++ b/hw/tpm/tpm_tis_isa.c
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 
 #include "qemu/osdep.h"
diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
index 6724b3d4f6..2fc550f119 100644
--- a/hw/tpm/tpm_tis_sysbus.c
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -19,7 +19,7 @@
  * specification.
  *
  * TPM TIS for TPM 2 implementation following TCG PC Client Platform
- * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ * TPM Profile (PTP) Specification, Family 2.0, Revision 00.43
  */
 
 #include "qemu/osdep.h"
diff --git a/hw/ufs/Kconfig b/hw/ufs/Kconfig
new file mode 100644
index 0000000000..b7b3392e85
--- /dev/null
+++ b/hw/ufs/Kconfig
@@ -0,0 +1,4 @@
+config UFS_PCI
+    bool
+    default y if PCI_DEVICES
+    depends on PCI
diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c
new file mode 100644
index 0000000000..e1c46bddb1
--- /dev/null
+++ b/hw/ufs/lu.c
@@ -0,0 +1,1445 @@
+/*
+ * QEMU UFS Logical Unit
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Written by Jeuk Kim <jeuk20.kim@samsung.com>
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "qemu/memalign.h"
+#include "hw/scsi/scsi.h"
+#include "scsi/constants.h"
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+#include "ufs.h"
+
+/*
+ * The code below handling SCSI commands is copied from hw/scsi/scsi-disk.c,
+ * with minor adjustments to make it work for UFS.
+ */
+
+#define SCSI_DMA_BUF_SIZE (128 * KiB)
+#define SCSI_MAX_INQUIRY_LEN 256
+#define SCSI_INQUIRY_DATA_SIZE 36
+#define SCSI_MAX_MODE_LEN 256
+
+typedef struct UfsSCSIReq {
+    SCSIRequest req;
+    /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
+    uint64_t sector;
+    uint32_t sector_count;
+    uint32_t buflen;
+    bool started;
+    bool need_fua_emulation;
+    struct iovec iov;
+    QEMUIOVector qiov;
+    BlockAcctCookie acct;
+} UfsSCSIReq;
+
+static void ufs_scsi_free_request(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+
+    qemu_vfree(r->iov.iov_base);
+}
+
+static void scsi_check_condition(UfsSCSIReq *r, SCSISense sense)
+{
+    trace_ufs_scsi_check_condition(r->req.tag, sense.key, sense.asc,
+                                   sense.ascq);
+    scsi_req_build_sense(&r->req, sense);
+    scsi_req_complete(&r->req, CHECK_CONDITION);
+}
+
+static int ufs_scsi_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf,
+                                     uint32_t outbuf_len)
+{
+    UfsHc *u = UFS(req->bus->qbus.parent);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, req->dev);
+    uint8_t page_code = req->cmd.buf[2];
+    int start, buflen = 0;
+
+    if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) {
+        return -1;
+    }
+
+    outbuf[buflen++] = lu->qdev.type & 0x1f;
+    outbuf[buflen++] = page_code;
+    outbuf[buflen++] = 0x00;
+    outbuf[buflen++] = 0x00;
+    start = buflen;
+
+    switch (page_code) {
+    case 0x00: /* Supported page codes, mandatory */
+    {
+        trace_ufs_scsi_emulate_vpd_page_00(req->cmd.xfer);
+        outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
+        if (u->params.serial) {
+            outbuf[buflen++] = 0x80; /* unit serial number */
+        }
+        outbuf[buflen++] = 0x87; /* mode page policy */
+        break;
+    }
+    case 0x80: /* Device serial number, optional */
+    {
+        int l;
+
+        if (!u->params.serial) {
+            trace_ufs_scsi_emulate_vpd_page_80_not_supported();
+            return -1;
+        }
+
+        l = strlen(u->params.serial);
+        if (l > SCSI_INQUIRY_DATA_SIZE) {
+            l = SCSI_INQUIRY_DATA_SIZE;
+        }
+
+        trace_ufs_scsi_emulate_vpd_page_80(req->cmd.xfer);
+        memcpy(outbuf + buflen, u->params.serial, l);
+        buflen += l;
+        break;
+    }
+    case 0x87: /* Mode Page Policy, mandatory */
+    {
+        trace_ufs_scsi_emulate_vpd_page_87(req->cmd.xfer);
+        outbuf[buflen++] = 0x3f; /* apply to all mode pages and subpages */
+        outbuf[buflen++] = 0xff;
+        outbuf[buflen++] = 0; /* shared */
+        outbuf[buflen++] = 0;
+        break;
+    }
+    default:
+        return -1;
+    }
+    /* done with EVPD */
+    assert(buflen - start <= 255);
+    outbuf[start - 1] = buflen - start;
+    return buflen;
+}
+
+static int ufs_scsi_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf,
+                                    uint32_t outbuf_len)
+{
+    int buflen = 0;
+
+    if (outbuf_len < SCSI_INQUIRY_DATA_SIZE) {
+        return -1;
+    }
+
+    if (req->cmd.buf[1] & 0x1) {
+        /* Vital product data */
+        return ufs_scsi_emulate_vpd_page(req, outbuf, outbuf_len);
+    }
+
+    /* Standard INQUIRY data */
+    if (req->cmd.buf[2] != 0) {
+        return -1;
+    }
+
+    /* PAGE CODE == 0 */
+    buflen = req->cmd.xfer;
+    if (buflen > SCSI_MAX_INQUIRY_LEN) {
+        buflen = SCSI_MAX_INQUIRY_LEN;
+    }
+
+    if (is_wlun(req->lun)) {
+        outbuf[0] = TYPE_WLUN;
+    } else {
+        outbuf[0] = 0;
+    }
+    outbuf[1] = 0;
+
+    strpadcpy((char *)&outbuf[16], 16, "QEMU UFS", ' ');
+    strpadcpy((char *)&outbuf[8], 8, "QEMU", ' ');
+
+    memset(&outbuf[32], 0, 4);
+
+    outbuf[2] = 0x06; /* SPC-4 */
+    outbuf[3] = 0x2;
+
+    if (buflen > SCSI_INQUIRY_DATA_SIZE) {
+        outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
+    } else {
+        /*
+         * If the allocation length of CDB is too small, the additional
+         * length is not adjusted
+         */
+        outbuf[4] = SCSI_INQUIRY_DATA_SIZE - 5;
+    }
+
+    /* Support TCQ.  */
+    outbuf[7] = req->bus->info->tcq ? 0x02 : 0;
+    return buflen;
+}
+
+static int mode_sense_page(UfsLu *lu, int page, uint8_t **p_outbuf,
+                           int page_control)
+{
+    static const int mode_sense_valid[0x3f] = {
+        [MODE_PAGE_CACHING] = 1,
+        [MODE_PAGE_R_W_ERROR] = 1,
+        [MODE_PAGE_CONTROL] = 1,
+    };
+
+    uint8_t *p = *p_outbuf + 2;
+    int length;
+
+    assert(page < ARRAY_SIZE(mode_sense_valid));
+    if ((mode_sense_valid[page]) == 0) {
+        return -1;
+    }
+
+    /*
+     * If Changeable Values are requested, a mask denoting those mode parameters
+     * that are changeable shall be returned. As we currently don't support
+     * parameter changes via MODE_SELECT all bits are returned set to zero.
+     * The buffer was already memset to zero by the caller of this function.
+     */
+    switch (page) {
+    case MODE_PAGE_CACHING:
+        length = 0x12;
+        if (page_control == 1 || /* Changeable Values */
+            blk_enable_write_cache(lu->qdev.conf.blk)) {
+            p[0] = 4; /* WCE */
+        }
+        break;
+
+    case MODE_PAGE_R_W_ERROR:
+        length = 10;
+        if (page_control == 1) { /* Changeable Values */
+            break;
+        }
+        p[0] = 0x80; /* Automatic Write Reallocation Enabled */
+        break;
+
+    case MODE_PAGE_CONTROL:
+        length = 10;
+        if (page_control == 1) { /* Changeable Values */
+            break;
+        }
+        p[1] = 0x10; /* Queue Algorithm modifier */
+        p[8] = 0xff; /* Busy Timeout Period */
+        p[9] = 0xff;
+        break;
+
+    default:
+        return -1;
+    }
+
+    assert(length < 256);
+    (*p_outbuf)[0] = page;
+    (*p_outbuf)[1] = length;
+    *p_outbuf += length + 2;
+    return length + 2;
+}
+
+static int ufs_scsi_emulate_mode_sense(UfsSCSIReq *r, uint8_t *outbuf)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    bool dbd;
+    int page, buflen, ret, page_control;
+    uint8_t *p;
+    uint8_t dev_specific_param = 0;
+
+    dbd = (r->req.cmd.buf[1] & 0x8) != 0;
+    if (!dbd) {
+        return -1;
+    }
+
+    page = r->req.cmd.buf[2] & 0x3f;
+    page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
+
+    trace_ufs_scsi_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
+                                                                          10,
+                                      page, r->req.cmd.xfer, page_control);
+    memset(outbuf, 0, r->req.cmd.xfer);
+    p = outbuf;
+
+    if (!blk_is_writable(lu->qdev.conf.blk)) {
+        dev_specific_param |= 0x80; /* Readonly.  */
+    }
+
+    p[2] = 0; /* Medium type.  */
+    p[3] = dev_specific_param;
+    p[6] = p[7] = 0; /* Block descriptor length.  */
+    p += 8;
+
+    if (page_control == 3) {
+        /* Saved Values */
+        scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
+        return -1;
+    }
+
+    if (page == 0x3f) {
+        for (page = 0; page <= 0x3e; page++) {
+            mode_sense_page(lu, page, &p, page_control);
+        }
+    } else {
+        ret = mode_sense_page(lu, page, &p, page_control);
+        if (ret == -1) {
+            return -1;
+        }
+    }
+
+    buflen = p - outbuf;
+    /*
+     * The mode data length field specifies the length in bytes of the
+     * following data that is available to be transferred. The mode data
+     * length does not include itself.
+     */
+    outbuf[0] = ((buflen - 2) >> 8) & 0xff;
+    outbuf[1] = (buflen - 2) & 0xff;
+    return buflen;
+}
+
+/*
+ * scsi_handle_rw_error has two return values.  False means that the error
+ * must be ignored, true means that the error has been processed and the
+ * caller should not do anything else for this request.  Note that
+ * scsi_handle_rw_error always manages its reference counts, independent
+ * of the return value.
+ */
+static bool scsi_handle_rw_error(UfsSCSIReq *r, int ret, bool acct_failed)
+{
+    bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    SCSISense sense = SENSE_CODE(NO_SENSE);
+    int error = 0;
+    bool req_has_sense = false;
+    BlockErrorAction action;
+    int status;
+
+    if (ret < 0) {
+        status = scsi_sense_from_errno(-ret, &sense);
+        error = -ret;
+    } else {
+        /* A passthrough command has completed with nonzero status.  */
+        status = ret;
+        if (status == CHECK_CONDITION) {
+            req_has_sense = true;
+            error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
+        } else {
+            error = EINVAL;
+        }
+    }
+
+    /*
+     * Check whether the error has to be handled by the guest or should
+     * rather follow the rerror=/werror= settings.  Guest-handled errors
+     * are usually retried immediately, so do not post them to QMP and
+     * do not account them as failed I/O.
+     */
+    if (req_has_sense && scsi_sense_buf_is_guest_recoverable(
+                             r->req.sense, sizeof(r->req.sense))) {
+        action = BLOCK_ERROR_ACTION_REPORT;
+        acct_failed = false;
+    } else {
+        action = blk_get_error_action(lu->qdev.conf.blk, is_read, error);
+        blk_error_action(lu->qdev.conf.blk, action, is_read, error);
+    }
+
+    switch (action) {
+    case BLOCK_ERROR_ACTION_REPORT:
+        if (acct_failed) {
+            block_acct_failed(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+        }
+        if (!req_has_sense && status == CHECK_CONDITION) {
+            scsi_req_build_sense(&r->req, sense);
+        }
+        scsi_req_complete(&r->req, status);
+        return true;
+
+    case BLOCK_ERROR_ACTION_IGNORE:
+        return false;
+
+    case BLOCK_ERROR_ACTION_STOP:
+        scsi_req_retry(&r->req);
+        return true;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool ufs_scsi_req_check_error(UfsSCSIReq *r, int ret, bool acct_failed)
+{
+    if (r->req.io_canceled) {
+        scsi_req_cancel_complete(&r->req);
+        return true;
+    }
+
+    if (ret < 0) {
+        return scsi_handle_rw_error(r, ret, acct_failed);
+    }
+
+    return false;
+}
+
+static void scsi_aio_complete(void *opaque, int ret)
+{
+    UfsSCSIReq *r = (UfsSCSIReq *)opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+    aio_context_acquire(blk_get_aio_context(lu->qdev.conf.blk));
+    if (ufs_scsi_req_check_error(r, ret, true)) {
+        goto done;
+    }
+
+    block_acct_done(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    scsi_req_complete(&r->req, GOOD);
+
+done:
+    aio_context_release(blk_get_aio_context(lu->qdev.conf.blk));
+    scsi_req_unref(&r->req);
+}
+
+static int32_t ufs_scsi_emulate_command(SCSIRequest *req, uint8_t *buf)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, req->dev);
+    uint32_t last_block = 0;
+    uint8_t *outbuf;
+    int buflen;
+
+    switch (req->cmd.buf[0]) {
+    case INQUIRY:
+    case MODE_SENSE_10:
+    case START_STOP:
+    case REQUEST_SENSE:
+        break;
+
+    default:
+        if (!blk_is_available(lu->qdev.conf.blk)) {
+            scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+            return 0;
+        }
+        break;
+    }
+
+    /*
+     * FIXME: we shouldn't return anything bigger than 4k, but the code
+     * requires the buffer to be as big as req->cmd.xfer in several
+     * places.  So, do not allow CDBs with a very large ALLOCATION
+     * LENGTH.  The real fix would be to modify scsi_read_data and
+     * dma_buf_read, so that they return data beyond the buflen
+     * as all zeros.
+     */
+    if (req->cmd.xfer > 65536) {
+        goto illegal_request;
+    }
+    r->buflen = MAX(4096, req->cmd.xfer);
+
+    if (!r->iov.iov_base) {
+        r->iov.iov_base = blk_blockalign(lu->qdev.conf.blk, r->buflen);
+    }
+
+    outbuf = r->iov.iov_base;
+    memset(outbuf, 0, r->buflen);
+    switch (req->cmd.buf[0]) {
+    case TEST_UNIT_READY:
+        assert(blk_is_available(lu->qdev.conf.blk));
+        break;
+    case INQUIRY:
+        buflen = ufs_scsi_emulate_inquiry(req, outbuf, r->buflen);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
+    case MODE_SENSE_10:
+        buflen = ufs_scsi_emulate_mode_sense(r, outbuf);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
+    case READ_CAPACITY_10:
+        /* The normal LEN field for this command is zero.  */
+        memset(outbuf, 0, 8);
+        if (lu->qdev.max_lba > 0) {
+            last_block = lu->qdev.max_lba - 1;
+        };
+        outbuf[0] = (last_block >> 24) & 0xff;
+        outbuf[1] = (last_block >> 16) & 0xff;
+        outbuf[2] = (last_block >> 8) & 0xff;
+        outbuf[3] = last_block & 0xff;
+        outbuf[4] = (lu->qdev.blocksize >> 24) & 0xff;
+        outbuf[5] = (lu->qdev.blocksize >> 16) & 0xff;
+        outbuf[6] = (lu->qdev.blocksize >> 8) & 0xff;
+        outbuf[7] = lu->qdev.blocksize & 0xff;
+        break;
+    case REQUEST_SENSE:
+        /* Just return "NO SENSE".  */
+        buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
+                                    (req->cmd.buf[1] & 1) == 0);
+        if (buflen < 0) {
+            goto illegal_request;
+        }
+        break;
+    case SYNCHRONIZE_CACHE:
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
+        r->req.aiocb = blk_aio_flush(lu->qdev.conf.blk, scsi_aio_complete, r);
+        return 0;
+    case VERIFY_10:
+        trace_ufs_scsi_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
+        if (req->cmd.buf[1] & 6) {
+            goto illegal_request;
+        }
+        break;
+    case SERVICE_ACTION_IN_16:
+        /* Service Action In subcommands. */
+        if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
+            trace_ufs_scsi_emulate_command_SAI_16();
+            memset(outbuf, 0, req->cmd.xfer);
+
+            if (lu->qdev.max_lba > 0) {
+                last_block = lu->qdev.max_lba - 1;
+            };
+            outbuf[0] = 0;
+            outbuf[1] = 0;
+            outbuf[2] = 0;
+            outbuf[3] = 0;
+            outbuf[4] = (last_block >> 24) & 0xff;
+            outbuf[5] = (last_block >> 16) & 0xff;
+            outbuf[6] = (last_block >> 8) & 0xff;
+            outbuf[7] = last_block & 0xff;
+            outbuf[8] = (lu->qdev.blocksize >> 24) & 0xff;
+            outbuf[9] = (lu->qdev.blocksize >> 16) & 0xff;
+            outbuf[10] = (lu->qdev.blocksize >> 8) & 0xff;
+            outbuf[11] = lu->qdev.blocksize & 0xff;
+            outbuf[12] = 0;
+            outbuf[13] = get_physical_block_exp(&lu->qdev.conf);
+
+            if (lu->unit_desc.provisioning_type == 2 ||
+                lu->unit_desc.provisioning_type == 3) {
+                outbuf[14] = 0x80;
+            }
+            /* Protection, exponent and lowest lba field left blank. */
+            break;
+        }
+        trace_ufs_scsi_emulate_command_SAI_unsupported();
+        goto illegal_request;
+    case MODE_SELECT_10:
+        trace_ufs_scsi_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
+        break;
+    case START_STOP:
+        /*
+         * TODO: START_STOP is not yet implemented. It always returns success.
+         * Revisit it when ufs power management is implemented.
+         */
+        trace_ufs_scsi_emulate_command_START_STOP();
+        break;
+    case FORMAT_UNIT:
+        trace_ufs_scsi_emulate_command_FORMAT_UNIT();
+        break;
+    case SEND_DIAGNOSTIC:
+        trace_ufs_scsi_emulate_command_SEND_DIAGNOSTIC();
+        break;
+    default:
+        trace_ufs_scsi_emulate_command_UNKNOWN(buf[0],
+                                               scsi_command_name(buf[0]));
+        scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
+        return 0;
+    }
+    assert(!r->req.aiocb);
+    r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
+    if (r->iov.iov_len == 0) {
+        scsi_req_complete(&r->req, GOOD);
+    }
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        assert(r->iov.iov_len == req->cmd.xfer);
+        return -r->iov.iov_len;
+    } else {
+        return r->iov.iov_len;
+    }
+
+illegal_request:
+    if (r->req.status == -1) {
+        scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+    }
+    return 0;
+}
+
+static void ufs_scsi_emulate_read_data(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+    int buflen = r->iov.iov_len;
+
+    if (buflen) {
+        trace_ufs_scsi_emulate_read_data(buflen);
+        r->iov.iov_len = 0;
+        r->started = true;
+        scsi_req_data(&r->req, buflen);
+        return;
+    }
+
+    /* This also clears the sense buffer for REQUEST SENSE.  */
+    scsi_req_complete(&r->req, GOOD);
+}
+
+static int ufs_scsi_check_mode_select(UfsLu *lu, int page, uint8_t *inbuf,
+                                      int inlen)
+{
+    uint8_t mode_current[SCSI_MAX_MODE_LEN];
+    uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
+    uint8_t *p;
+    int len, expected_len, changeable_len, i;
+
+    /*
+     * The input buffer does not include the page header, so it is
+     * off by 2 bytes.
+     */
+    expected_len = inlen + 2;
+    if (expected_len > SCSI_MAX_MODE_LEN) {
+        return -1;
+    }
+
+    /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
+    if (page == MODE_PAGE_ALLS) {
+        return -1;
+    }
+
+    p = mode_current;
+    memset(mode_current, 0, inlen + 2);
+    len = mode_sense_page(lu, page, &p, 0);
+    if (len < 0 || len != expected_len) {
+        return -1;
+    }
+
+    p = mode_changeable;
+    memset(mode_changeable, 0, inlen + 2);
+    changeable_len = mode_sense_page(lu, page, &p, 1);
+    assert(changeable_len == len);
+
+    /*
+     * Check that unchangeable bits are the same as what MODE SENSE
+     * would return.
+     */
+    for (i = 2; i < len; i++) {
+        if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void ufs_scsi_apply_mode_select(UfsLu *lu, int page, uint8_t *p)
+{
+    switch (page) {
+    case MODE_PAGE_CACHING:
+        blk_set_enable_write_cache(lu->qdev.conf.blk, (p[0] & 4) != 0);
+        break;
+
+    default:
+        break;
+    }
+}
+
+static int mode_select_pages(UfsSCSIReq *r, uint8_t *p, int len, bool change)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    while (len > 0) {
+        int page, page_len;
+
+        page = p[0] & 0x3f;
+        if (p[0] & 0x40) {
+            goto invalid_param;
+        } else {
+            if (len < 2) {
+                goto invalid_param_len;
+            }
+            page_len = p[1];
+            p += 2;
+            len -= 2;
+        }
+
+        if (page_len > len) {
+            goto invalid_param_len;
+        }
+
+        if (!change) {
+            if (ufs_scsi_check_mode_select(lu, page, p, page_len) < 0) {
+                goto invalid_param;
+            }
+        } else {
+            ufs_scsi_apply_mode_select(lu, page, p);
+        }
+
+        p += page_len;
+        len -= page_len;
+    }
+    return 0;
+
+invalid_param:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
+    return -1;
+
+invalid_param_len:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
+    return -1;
+}
+
+static void ufs_scsi_emulate_mode_select(UfsSCSIReq *r, uint8_t *inbuf)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    uint8_t *p = inbuf;
+    int len = r->req.cmd.xfer;
+    int hdr_len = 8;
+    int bd_len;
+    int pass;
+
+    /* We only support PF=1, SP=0.  */
+    if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
+        goto invalid_field;
+    }
+
+    if (len < hdr_len) {
+        goto invalid_param_len;
+    }
+
+    bd_len = lduw_be_p(&p[6]);
+    if (bd_len != 0) {
+        goto invalid_param;
+    }
+
+    len -= hdr_len;
+    p += hdr_len;
+
+    /* Ensure no change is made if there is an error!  */
+    for (pass = 0; pass < 2; pass++) {
+        if (mode_select_pages(r, p, len, pass == 1) < 0) {
+            assert(pass == 0);
+            return;
+        }
+    }
+
+    if (!blk_enable_write_cache(lu->qdev.conf.blk)) {
+        /* The request is used as the AIO opaque value, so add a ref.  */
+        scsi_req_ref(&r->req);
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
+        r->req.aiocb = blk_aio_flush(lu->qdev.conf.blk, scsi_aio_complete, r);
+        return;
+    }
+
+    scsi_req_complete(&r->req, GOOD);
+    return;
+
+invalid_param:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
+    return;
+
+invalid_param_len:
+    scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
+    return;
+
+invalid_field:
+    scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+}
+
+/* block_num and nb_blocks expected to be in qdev blocksize */
+static inline bool check_lba_range(UfsLu *lu, uint64_t block_num,
+                                   uint32_t nb_blocks)
+{
+    /*
+     * The first line tests that no overflow happens when computing the last
+     * block.  The second line tests that the last accessed block is in
+     * range.
+     *
+     * Careful, the computations should not underflow for nb_blocks == 0,
+     * and a 0-block read to the first LBA beyond the end of device is
+     * valid.
+     */
+    return (block_num <= block_num + nb_blocks &&
+            block_num + nb_blocks <= lu->qdev.max_lba + 1);
+}
+
+static void ufs_scsi_emulate_write_data(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+
+    if (r->iov.iov_len) {
+        int buflen = r->iov.iov_len;
+        trace_ufs_scsi_emulate_write_data(buflen);
+        r->iov.iov_len = 0;
+        scsi_req_data(&r->req, buflen);
+        return;
+    }
+
+    switch (req->cmd.buf[0]) {
+    case MODE_SELECT_10:
+        /* This also clears the sense buffer for REQUEST SENSE.  */
+        ufs_scsi_emulate_mode_select(r, r->iov.iov_base);
+        break;
+    default:
+        abort();
+    }
+}
+
+/* Return a pointer to the data buffer.  */
+static uint8_t *ufs_scsi_get_buf(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+
+    return (uint8_t *)r->iov.iov_base;
+}
+
+static int32_t ufs_scsi_dma_command(SCSIRequest *req, uint8_t *buf)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, req->dev);
+    uint32_t len;
+    uint8_t command;
+
+    command = buf[0];
+
+    if (!blk_is_available(lu->qdev.conf.blk)) {
+        scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
+        return 0;
+    }
+
+    len = scsi_data_cdb_xfer(r->req.cmd.buf);
+    switch (command) {
+    case READ_6:
+    case READ_10:
+        trace_ufs_scsi_dma_command_READ(r->req.cmd.lba, len);
+        if (r->req.cmd.buf[1] & 0xe0) {
+            goto illegal_request;
+        }
+        if (!check_lba_range(lu, r->req.cmd.lba, len)) {
+            goto illegal_lba;
+        }
+        r->sector = r->req.cmd.lba * (lu->qdev.blocksize / BDRV_SECTOR_SIZE);
+        r->sector_count = len * (lu->qdev.blocksize / BDRV_SECTOR_SIZE);
+        break;
+    case WRITE_6:
+    case WRITE_10:
+        trace_ufs_scsi_dma_command_WRITE(r->req.cmd.lba, len);
+        if (!blk_is_writable(lu->qdev.conf.blk)) {
+            scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
+            return 0;
+        }
+        if (r->req.cmd.buf[1] & 0xe0) {
+            goto illegal_request;
+        }
+        if (!check_lba_range(lu, r->req.cmd.lba, len)) {
+            goto illegal_lba;
+        }
+        r->sector = r->req.cmd.lba * (lu->qdev.blocksize / BDRV_SECTOR_SIZE);
+        r->sector_count = len * (lu->qdev.blocksize / BDRV_SECTOR_SIZE);
+        break;
+    default:
+        abort();
+    illegal_request:
+        scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+        return 0;
+    illegal_lba:
+        scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
+        return 0;
+    }
+    r->need_fua_emulation = ((r->req.cmd.buf[1] & 8) != 0);
+    if (r->sector_count == 0) {
+        scsi_req_complete(&r->req, GOOD);
+    }
+    assert(r->iov.iov_len == 0);
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        return -r->sector_count * BDRV_SECTOR_SIZE;
+    } else {
+        return r->sector_count * BDRV_SECTOR_SIZE;
+    }
+}
+
+static void scsi_write_do_fua(UfsSCSIReq *r)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb == NULL);
+    assert(!r->req.io_canceled);
+
+    if (r->need_fua_emulation) {
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
+        r->req.aiocb = blk_aio_flush(lu->qdev.conf.blk, scsi_aio_complete, r);
+        return;
+    }
+
+    scsi_req_complete(&r->req, GOOD);
+    scsi_req_unref(&r->req);
+}
+
+static void scsi_dma_complete_noio(UfsSCSIReq *r, int ret)
+{
+    assert(r->req.aiocb == NULL);
+    if (ufs_scsi_req_check_error(r, ret, false)) {
+        goto done;
+    }
+
+    r->sector += r->sector_count;
+    r->sector_count = 0;
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        scsi_write_do_fua(r);
+        return;
+    } else {
+        scsi_req_complete(&r->req, GOOD);
+    }
+
+done:
+    scsi_req_unref(&r->req);
+}
+
+static void scsi_dma_complete(void *opaque, int ret)
+{
+    UfsSCSIReq *r = (UfsSCSIReq *)opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    aio_context_acquire(blk_get_aio_context(lu->qdev.conf.blk));
+    if (ret < 0) {
+        block_acct_failed(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    } else {
+        block_acct_done(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    }
+    scsi_dma_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(lu->qdev.conf.blk));
+}
+
+static BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    UfsSCSIReq *r = opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    return blk_aio_preadv(lu->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
+static void scsi_init_iovec(UfsSCSIReq *r, size_t size)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    if (!r->iov.iov_base) {
+        r->buflen = size;
+        r->iov.iov_base = blk_blockalign(lu->qdev.conf.blk, r->buflen);
+    }
+    r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
+    qemu_iovec_init_external(&r->qiov, &r->iov, 1);
+}
+
+static void scsi_read_complete_noio(UfsSCSIReq *r, int ret)
+{
+    uint32_t n;
+
+    assert(r->req.aiocb == NULL);
+    if (ufs_scsi_req_check_error(r, ret, false)) {
+        goto done;
+    }
+
+    n = r->qiov.size / BDRV_SECTOR_SIZE;
+    r->sector += n;
+    r->sector_count -= n;
+    scsi_req_data(&r->req, r->qiov.size);
+
+done:
+    scsi_req_unref(&r->req);
+}
+
+static void scsi_read_complete(void *opaque, int ret)
+{
+    UfsSCSIReq *r = (UfsSCSIReq *)opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+    trace_ufs_scsi_read_data_count(r->sector_count);
+    aio_context_acquire(blk_get_aio_context(lu->qdev.conf.blk));
+    if (ret < 0) {
+        block_acct_failed(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    } else {
+        block_acct_done(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+        trace_ufs_scsi_read_complete(r->req.tag, r->qiov.size);
+    }
+    scsi_read_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(lu->qdev.conf.blk));
+}
+
+/* Actually issue a read to the block device.  */
+static void scsi_do_read(UfsSCSIReq *r, int ret)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb == NULL);
+    if (ufs_scsi_req_check_error(r, ret, false)) {
+        goto done;
+    }
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+
+    if (r->req.sg) {
+        dma_acct_start(lu->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
+        r->req.residual -= r->req.sg->size;
+        r->req.aiocb = dma_blk_io(
+            blk_get_aio_context(lu->qdev.conf.blk), r->req.sg,
+            r->sector << BDRV_SECTOR_BITS, BDRV_SECTOR_SIZE, scsi_dma_readv, r,
+            scsi_dma_complete, r, DMA_DIRECTION_FROM_DEVICE);
+    } else {
+        scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct,
+                         r->qiov.size, BLOCK_ACCT_READ);
+        r->req.aiocb = scsi_dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
+                                      scsi_read_complete, r, r);
+    }
+
+done:
+    scsi_req_unref(&r->req);
+}
+
+static void scsi_do_read_cb(void *opaque, int ret)
+{
+    UfsSCSIReq *r = (UfsSCSIReq *)opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    aio_context_acquire(blk_get_aio_context(lu->qdev.conf.blk));
+    if (ret < 0) {
+        block_acct_failed(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    } else {
+        block_acct_done(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    }
+    scsi_do_read(opaque, ret);
+    aio_context_release(blk_get_aio_context(lu->qdev.conf.blk));
+}
+
+/* Read more data from scsi device into buffer.  */
+static void scsi_read_data(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    bool first;
+
+    trace_ufs_scsi_read_data_count(r->sector_count);
+    if (r->sector_count == 0) {
+        /* This also clears the sense buffer for REQUEST SENSE.  */
+        scsi_req_complete(&r->req, GOOD);
+        return;
+    }
+
+    /* No data transfer may already be in progress */
+    assert(r->req.aiocb == NULL);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
+        trace_ufs_scsi_read_data_invalid();
+        scsi_read_complete_noio(r, -EINVAL);
+        return;
+    }
+
+    if (!blk_is_available(req->dev->conf.blk)) {
+        scsi_read_complete_noio(r, -ENOMEDIUM);
+        return;
+    }
+
+    first = !r->started;
+    r->started = true;
+    if (first && r->need_fua_emulation) {
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
+        r->req.aiocb = blk_aio_flush(lu->qdev.conf.blk, scsi_do_read_cb, r);
+    } else {
+        scsi_do_read(r, 0);
+    }
+}
+
+static void scsi_write_complete_noio(UfsSCSIReq *r, int ret)
+{
+    uint32_t n;
+
+    assert(r->req.aiocb == NULL);
+    if (ufs_scsi_req_check_error(r, ret, false)) {
+        goto done;
+    }
+
+    n = r->qiov.size / BDRV_SECTOR_SIZE;
+    r->sector += n;
+    r->sector_count -= n;
+    if (r->sector_count == 0) {
+        scsi_write_do_fua(r);
+        return;
+    } else {
+        scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
+        trace_ufs_scsi_write_complete_noio(r->req.tag, r->qiov.size);
+        scsi_req_data(&r->req, r->qiov.size);
+    }
+
+done:
+    scsi_req_unref(&r->req);
+}
+
+static void scsi_write_complete(void *opaque, int ret)
+{
+    UfsSCSIReq *r = (UfsSCSIReq *)opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    assert(r->req.aiocb != NULL);
+    r->req.aiocb = NULL;
+
+    aio_context_acquire(blk_get_aio_context(lu->qdev.conf.blk));
+    if (ret < 0) {
+        block_acct_failed(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    } else {
+        block_acct_done(blk_get_stats(lu->qdev.conf.blk), &r->acct);
+    }
+    scsi_write_complete_noio(r, ret);
+    aio_context_release(blk_get_aio_context(lu->qdev.conf.blk));
+}
+
+static BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
+                                   BlockCompletionFunc *cb, void *cb_opaque,
+                                   void *opaque)
+{
+    UfsSCSIReq *r = opaque;
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+    return blk_aio_pwritev(lu->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
+static void scsi_write_data(SCSIRequest *req)
+{
+    UfsSCSIReq *r = DO_UPCAST(UfsSCSIReq, req, req);
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, r->req.dev);
+
+    /* No data transfer may already be in progress */
+    assert(r->req.aiocb == NULL);
+
+    /* The request is used as the AIO opaque value, so add a ref.  */
+    scsi_req_ref(&r->req);
+    if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
+        trace_ufs_scsi_write_data_invalid();
+        scsi_write_complete_noio(r, -EINVAL);
+        return;
+    }
+
+    if (!r->req.sg && !r->qiov.size) {
+        /* Called for the first time.  Ask the driver to send us more data.  */
+        r->started = true;
+        scsi_write_complete_noio(r, 0);
+        return;
+    }
+    if (!blk_is_available(req->dev->conf.blk)) {
+        scsi_write_complete_noio(r, -ENOMEDIUM);
+        return;
+    }
+
+    if (r->req.sg) {
+        dma_acct_start(lu->qdev.conf.blk, &r->acct, r->req.sg,
+                       BLOCK_ACCT_WRITE);
+        r->req.residual -= r->req.sg->size;
+        r->req.aiocb = dma_blk_io(
+            blk_get_aio_context(lu->qdev.conf.blk), r->req.sg,
+            r->sector << BDRV_SECTOR_BITS, BDRV_SECTOR_SIZE, scsi_dma_writev, r,
+            scsi_dma_complete, r, DMA_DIRECTION_TO_DEVICE);
+    } else {
+        block_acct_start(blk_get_stats(lu->qdev.conf.blk), &r->acct,
+                         r->qiov.size, BLOCK_ACCT_WRITE);
+        r->req.aiocb = scsi_dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
+                                       scsi_write_complete, r, r);
+    }
+}
+
+static const SCSIReqOps ufs_scsi_emulate_reqops = {
+    .size = sizeof(UfsSCSIReq),
+    .free_req = ufs_scsi_free_request,
+    .send_command = ufs_scsi_emulate_command,
+    .read_data = ufs_scsi_emulate_read_data,
+    .write_data = ufs_scsi_emulate_write_data,
+    .get_buf = ufs_scsi_get_buf,
+};
+
+static const SCSIReqOps ufs_scsi_dma_reqops = {
+    .size = sizeof(UfsSCSIReq),
+    .free_req = ufs_scsi_free_request,
+    .send_command = ufs_scsi_dma_command,
+    .read_data = scsi_read_data,
+    .write_data = scsi_write_data,
+    .get_buf = ufs_scsi_get_buf,
+};
+
+/*
+ * Following commands are not yet supported
+ * PRE_FETCH(10),
+ * UNMAP,
+ * WRITE_BUFFER, READ_BUFFER,
+ * SECURITY_PROTOCOL_IN, SECURITY_PROTOCOL_OUT
+ */
+static const SCSIReqOps *const ufs_scsi_reqops_dispatch[256] = {
+    [TEST_UNIT_READY] = &ufs_scsi_emulate_reqops,
+    [INQUIRY] = &ufs_scsi_emulate_reqops,
+    [MODE_SENSE_10] = &ufs_scsi_emulate_reqops,
+    [START_STOP] = &ufs_scsi_emulate_reqops,
+    [READ_CAPACITY_10] = &ufs_scsi_emulate_reqops,
+    [REQUEST_SENSE] = &ufs_scsi_emulate_reqops,
+    [SYNCHRONIZE_CACHE] = &ufs_scsi_emulate_reqops,
+    [MODE_SELECT_10] = &ufs_scsi_emulate_reqops,
+    [VERIFY_10] = &ufs_scsi_emulate_reqops,
+    [FORMAT_UNIT] = &ufs_scsi_emulate_reqops,
+    [SERVICE_ACTION_IN_16] = &ufs_scsi_emulate_reqops,
+    [SEND_DIAGNOSTIC] = &ufs_scsi_emulate_reqops,
+
+    [READ_6] = &ufs_scsi_dma_reqops,
+    [READ_10] = &ufs_scsi_dma_reqops,
+    [WRITE_6] = &ufs_scsi_dma_reqops,
+    [WRITE_10] = &ufs_scsi_dma_reqops,
+};
+
+static SCSIRequest *scsi_new_request(SCSIDevice *dev, uint32_t tag,
+                                     uint32_t lun, uint8_t *buf,
+                                     void *hba_private)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, dev);
+    SCSIRequest *req;
+    const SCSIReqOps *ops;
+    uint8_t command;
+
+    command = buf[0];
+    ops = ufs_scsi_reqops_dispatch[command];
+    if (!ops) {
+        ops = &ufs_scsi_emulate_reqops;
+    }
+    req = scsi_req_alloc(ops, &lu->qdev, tag, lun, hba_private);
+
+    return req;
+}
+
+static Property ufs_lu_props[] = {
+    DEFINE_PROP_DRIVE("drive", UfsLu, qdev.conf.blk),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static bool ufs_lu_brdv_init(UfsLu *lu, Error **errp)
+{
+    SCSIDevice *dev = &lu->qdev;
+    bool read_only;
+
+    if (!lu->qdev.conf.blk) {
+        error_setg(errp, "drive property not set");
+        return false;
+    }
+
+    if (!blkconf_blocksizes(&lu->qdev.conf, errp)) {
+        return false;
+    }
+
+    if (blk_get_aio_context(lu->qdev.conf.blk) != qemu_get_aio_context() &&
+        !lu->qdev.hba_supports_iothread) {
+        error_setg(errp, "HBA does not support iothreads");
+        return false;
+    }
+
+    read_only = !blk_supports_write_perm(lu->qdev.conf.blk);
+
+    if (!blkconf_apply_backend_options(&dev->conf, read_only,
+                                       dev->type == TYPE_DISK, errp)) {
+        return false;
+    }
+
+    if (blk_is_sg(lu->qdev.conf.blk)) {
+        error_setg(errp, "unwanted /dev/sg*");
+        return false;
+    }
+
+    blk_iostatus_enable(lu->qdev.conf.blk);
+    return true;
+}
+
+static bool ufs_add_lu(UfsHc *u, UfsLu *lu, Error **errp)
+{
+    BlockBackend *blk = lu->qdev.conf.blk;
+    int64_t brdv_len = blk_getlength(blk);
+    uint64_t raw_dev_cap =
+        be64_to_cpu(u->geometry_desc.total_raw_device_capacity);
+
+    if (u->device_desc.number_lu >= UFS_MAX_LUS) {
+        error_setg(errp, "ufs host controller has too many logical units.");
+        return false;
+    }
+
+    if (u->lus[lu->lun] != NULL) {
+        error_setg(errp, "ufs logical unit %d already exists.", lu->lun);
+        return false;
+    }
+
+    u->lus[lu->lun] = lu;
+    u->device_desc.number_lu++;
+    raw_dev_cap += (brdv_len >> UFS_GEOMETRY_CAPACITY_SHIFT);
+    u->geometry_desc.total_raw_device_capacity = cpu_to_be64(raw_dev_cap);
+    return true;
+}
+
+static inline uint8_t ufs_log2(uint64_t input)
+{
+    int log = 0;
+    while (input >>= 1) {
+        log++;
+    }
+    return log;
+}
+
+static void ufs_init_lu(UfsLu *lu)
+{
+    BlockBackend *blk = lu->qdev.conf.blk;
+    int64_t brdv_len = blk_getlength(blk);
+
+    lu->lun = lu->qdev.lun;
+    memset(&lu->unit_desc, 0, sizeof(lu->unit_desc));
+    lu->unit_desc.length = sizeof(UnitDescriptor);
+    lu->unit_desc.descriptor_idn = UFS_QUERY_DESC_IDN_UNIT;
+    lu->unit_desc.lu_enable = 0x01;
+    lu->unit_desc.logical_block_size = ufs_log2(lu->qdev.blocksize);
+    lu->unit_desc.unit_index = lu->qdev.lun;
+    lu->unit_desc.logical_block_count =
+        cpu_to_be64(brdv_len / (1 << lu->unit_desc.logical_block_size));
+}
+
+static bool ufs_lu_check_constraints(UfsLu *lu, Error **errp)
+{
+    if (!lu->qdev.conf.blk) {
+        error_setg(errp, "drive property not set");
+        return false;
+    }
+
+    if (lu->qdev.channel != 0) {
+        error_setg(errp, "ufs logical unit does not support channel");
+        return false;
+    }
+
+    if (lu->qdev.lun >= UFS_MAX_LUS) {
+        error_setg(errp, "lun must be between 1 and %d", UFS_MAX_LUS - 1);
+        return false;
+    }
+
+    return true;
+}
+
+static void ufs_lu_realize(SCSIDevice *dev, Error **errp)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, dev);
+    BusState *s = qdev_get_parent_bus(&dev->qdev);
+    UfsHc *u = UFS(s->parent);
+    AioContext *ctx = NULL;
+    uint64_t nb_sectors, nb_blocks;
+
+    if (!ufs_lu_check_constraints(lu, errp)) {
+        return;
+    }
+
+    if (lu->qdev.conf.blk) {
+        ctx = blk_get_aio_context(lu->qdev.conf.blk);
+        aio_context_acquire(ctx);
+        if (!blkconf_blocksizes(&lu->qdev.conf, errp)) {
+            goto out;
+        }
+    }
+    lu->qdev.blocksize = UFS_BLOCK_SIZE;
+    blk_get_geometry(lu->qdev.conf.blk, &nb_sectors);
+    nb_blocks = nb_sectors / (lu->qdev.blocksize / BDRV_SECTOR_SIZE);
+    if (nb_blocks > UINT32_MAX) {
+        nb_blocks = UINT32_MAX;
+    }
+    lu->qdev.max_lba = nb_blocks;
+    lu->qdev.type = TYPE_DISK;
+
+    ufs_init_lu(lu);
+    if (!ufs_add_lu(u, lu, errp)) {
+        goto out;
+    }
+
+    ufs_lu_brdv_init(lu, errp);
+out:
+    if (ctx) {
+        aio_context_release(ctx);
+    }
+}
+
+static void ufs_lu_unrealize(SCSIDevice *dev)
+{
+    UfsLu *lu = DO_UPCAST(UfsLu, qdev, dev);
+
+    blk_drain(lu->qdev.conf.blk);
+}
+
+static void ufs_wlu_realize(DeviceState *qdev, Error **errp)
+{
+    UfsWLu *wlu = UFSWLU(qdev);
+    SCSIDevice *dev = &wlu->qdev;
+
+    if (!is_wlun(dev->lun)) {
+        error_setg(errp, "not well-known logical unit number");
+        return;
+    }
+
+    QTAILQ_INIT(&dev->requests);
+}
+
+static void ufs_lu_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(oc);
+
+    sc->realize = ufs_lu_realize;
+    sc->unrealize = ufs_lu_unrealize;
+    sc->alloc_req = scsi_new_request;
+    dc->bus_type = TYPE_UFS_BUS;
+    device_class_set_props(dc, ufs_lu_props);
+    dc->desc = "Virtual UFS logical unit";
+}
+
+static void ufs_wlu_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(oc);
+
+    /*
+     * The realize() function of TYPE_SCSI_DEVICE causes a segmentation fault
+     * if a block drive does not exist. Define a new realize function for
+     * well-known LUs that do not have a block drive.
+     */
+    dc->realize = ufs_wlu_realize;
+    sc->alloc_req = scsi_new_request;
+    dc->bus_type = TYPE_UFS_BUS;
+    dc->desc = "Virtual UFS well-known logical unit";
+}
+
+static const TypeInfo ufs_lu_info = {
+    .name = TYPE_UFS_LU,
+    .parent = TYPE_SCSI_DEVICE,
+    .class_init = ufs_lu_class_init,
+    .instance_size = sizeof(UfsLu),
+};
+
+static const TypeInfo ufs_wlu_info = {
+    .name = TYPE_UFS_WLU,
+    .parent = TYPE_SCSI_DEVICE,
+    .class_init = ufs_wlu_class_init,
+    .instance_size = sizeof(UfsWLu),
+};
+
+static void ufs_lu_register_types(void)
+{
+    type_register_static(&ufs_lu_info);
+    type_register_static(&ufs_wlu_info);
+}
+
+type_init(ufs_lu_register_types)
diff --git a/hw/ufs/meson.build b/hw/ufs/meson.build
new file mode 100644
index 0000000000..6e68328b93
--- /dev/null
+++ b/hw/ufs/meson.build
@@ -0,0 +1 @@
+system_ss.add(when: 'CONFIG_UFS_PCI', if_true: files('ufs.c', 'lu.c'))
diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events
new file mode 100644
index 0000000000..1e55fb0d08
--- /dev/null
+++ b/hw/ufs/trace-events
@@ -0,0 +1,58 @@
+# ufs.c
+ufs_irq_raise(void) "INTx"
+ufs_irq_lower(void) "INTx"
+ufs_mmio_read(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
+ufs_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
+ufs_process_db(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_process_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_complete_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_sendback_req(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32""
+ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", lun 0x%"PRIx8", opcode 0x%"PRIx8""
+ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 0x%"PRIx8""
+ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 0x%"PRIx32", ucmdarg3 0x%"PRIx32""
+
+# lu.c
+ufs_scsi_check_condition(uint32_t tag, uint8_t key, uint8_t asc, uint8_t ascq) "Command complete tag=0x%x sense=%d/%d/%d"
+ufs_scsi_read_complete(uint32_t tag, size_t size) "Data ready tag=0x%x len=%zd"
+ufs_scsi_read_data_count(uint32_t sector_count) "Read sector_count=%d"
+ufs_scsi_read_data_invalid(void) "Data transfer direction invalid"
+ufs_scsi_write_complete_noio(uint32_t tag, size_t size) "Write complete tag=0x%x more=%zd"
+ufs_scsi_write_data_invalid(void) "Data transfer direction invalid"
+ufs_scsi_emulate_vpd_page_00(size_t xfer) "Inquiry EVPD[Supported pages] buffer size %zd"
+ufs_scsi_emulate_vpd_page_80_not_supported(void) "Inquiry EVPD[Serial number] not supported"
+ufs_scsi_emulate_vpd_page_80(size_t xfer) "Inquiry EVPD[Serial number] buffer size %zd"
+ufs_scsi_emulate_vpd_page_87(size_t xfer) "Inquiry EVPD[Mode Page Policy] buffer size %zd"
+ufs_scsi_emulate_mode_sense(int cmd, int page, size_t xfer, int control) "Mode Sense(%d) (page %d, xfer %zd, page_control %d)"
+ufs_scsi_emulate_read_data(int buflen) "Read buf_len=%d"
+ufs_scsi_emulate_write_data(int buflen) "Write buf_len=%d"
+ufs_scsi_emulate_command_START_STOP(void) "START STOP UNIT"
+ufs_scsi_emulate_command_FORMAT_UNIT(void) "FORMAT UNIT"
+ufs_scsi_emulate_command_SEND_DIAGNOSTIC(void) "SEND DIAGNOSTIC"
+ufs_scsi_emulate_command_SAI_16(void) "SAI READ CAPACITY(16)"
+ufs_scsi_emulate_command_SAI_unsupported(void) "Unsupported Service Action In"
+ufs_scsi_emulate_command_MODE_SELECT_10(size_t xfer) "Mode Select(10) (len %zd)"
+ufs_scsi_emulate_command_VERIFY(int bytchk) "Verify (bytchk %d)"
+ufs_scsi_emulate_command_UNKNOWN(int cmd, const char *name) "Unknown SCSI command (0x%2.2x=%s)"
+ufs_scsi_dma_command_READ(uint64_t lba, uint32_t len) "Read (block %" PRIu64 ", count %u)"
+ufs_scsi_dma_command_WRITE(uint64_t lba, int len) "Write (block %" PRIu64 ", count %u)"
+
+# error condition
+ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
+ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64""
+ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. UTRLDBR slot %"PRIu32", prdt addr %"PRIu64""
+ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
+ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64""
+ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error"
+ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy"
+ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is not yet supported"
+ufs_err_invalid_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is invalid"
+ufs_err_scsi_cmd_invalid_lun(uint8_t lun) "scsi command has invalid lun: 0x%"PRIx8""
+ufs_err_query_flag_not_readable(uint8_t idn) "query flag idn 0x%"PRIx8" is denied to read"
+ufs_err_query_flag_not_writable(uint8_t idn) "query flag idn 0x%"PRIx8" is denied to write"
+ufs_err_query_attr_not_readable(uint8_t idn) "query attribute idn 0x%"PRIx8" is denied to read"
+ufs_err_query_attr_not_writable(uint8_t idn) "query attribute idn 0x%"PRIx8" is denied to write"
+ufs_err_query_invalid_opcode(uint8_t opcode) "query request has invalid opcode. opcode: 0x%"PRIx8""
+ufs_err_query_invalid_idn(uint8_t opcode, uint8_t idn) "query request has invalid idn. opcode: 0x%"PRIx8", idn 0x%"PRIx8""
+ufs_err_query_invalid_index(uint8_t opcode, uint8_t index) "query request has invalid index. opcode: 0x%"PRIx8", index 0x%"PRIx8""
+ufs_err_invalid_trans_code(uint32_t slot, uint8_t trans_code) "request upiu has invalid transaction code. slot: %"PRIu32", trans_code: 0x%"PRIx8""
diff --git a/hw/ufs/trace.h b/hw/ufs/trace.h
new file mode 100644
index 0000000000..2dbd6397c3
--- /dev/null
+++ b/hw/ufs/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_ufs.h"
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
new file mode 100644
index 0000000000..0ecedb9aed
--- /dev/null
+++ b/hw/ufs/ufs.c
@@ -0,0 +1,1502 @@
+/*
+ * QEMU Universal Flash Storage (UFS) Controller
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Written by Jeuk Kim <jeuk20.kim@samsung.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/**
+ * Reference Specs: https://www.jedec.org/, 3.1
+ *
+ * Usage
+ * -----
+ *
+ * Add options:
+ *      -drive file=<file>,if=none,id=<drive_id>
+ *      -device ufs,serial=<serial>,id=<bus_name>, \
+ *              nutrs=<N[optional]>,nutmrs=<N[optional]>
+ *      -device ufs-lu,drive=<drive_id>,bus=<bus_name>
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+#include "ufs.h"
+
+/* The QEMU-UFS device follows spec version 3.1 */
+#define UFS_SPEC_VER 0x0310
+#define UFS_MAX_NUTRS 32
+#define UFS_MAX_NUTMRS 8
+
+static MemTxResult ufs_addr_read(UfsHc *u, hwaddr addr, void *buf, int size)
+{
+    hwaddr hi = addr + size - 1;
+
+    if (hi < addr) {
+        return MEMTX_DECODE_ERROR;
+    }
+
+    if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) {
+        return MEMTX_DECODE_ERROR;
+    }
+
+    return pci_dma_read(PCI_DEVICE(u), addr, buf, size);
+}
+
+static MemTxResult ufs_addr_write(UfsHc *u, hwaddr addr, const void *buf,
+                                  int size)
+{
+    hwaddr hi = addr + size - 1;
+    if (hi < addr) {
+        return MEMTX_DECODE_ERROR;
+    }
+
+    if (!FIELD_EX32(u->reg.cap, CAP, 64AS) && (hi >> 32)) {
+        return MEMTX_DECODE_ERROR;
+    }
+
+    return pci_dma_write(PCI_DEVICE(u), addr, buf, size);
+}
+
+static void ufs_complete_req(UfsRequest *req, UfsReqResult req_result);
+
+static inline hwaddr ufs_get_utrd_addr(UfsHc *u, uint32_t slot)
+{
+    hwaddr utrl_base_addr = (((hwaddr)u->reg.utrlbau) << 32) + u->reg.utrlba;
+    hwaddr utrd_addr = utrl_base_addr + slot * sizeof(UtpTransferReqDesc);
+
+    return utrd_addr;
+}
+
+static inline hwaddr ufs_get_req_upiu_base_addr(const UtpTransferReqDesc *utrd)
+{
+    uint32_t cmd_desc_base_addr_lo =
+        le32_to_cpu(utrd->command_desc_base_addr_lo);
+    uint32_t cmd_desc_base_addr_hi =
+        le32_to_cpu(utrd->command_desc_base_addr_hi);
+
+    return (((hwaddr)cmd_desc_base_addr_hi) << 32) + cmd_desc_base_addr_lo;
+}
+
+static inline hwaddr ufs_get_rsp_upiu_base_addr(const UtpTransferReqDesc *utrd)
+{
+    hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(utrd);
+    uint32_t rsp_upiu_byte_off =
+        le16_to_cpu(utrd->response_upiu_offset) * sizeof(uint32_t);
+    return req_upiu_base_addr + rsp_upiu_byte_off;
+}
+
+static MemTxResult ufs_dma_read_utrd(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    hwaddr utrd_addr = ufs_get_utrd_addr(u, req->slot);
+    MemTxResult ret;
+
+    ret = ufs_addr_read(u, utrd_addr, &req->utrd, sizeof(req->utrd));
+    if (ret) {
+        trace_ufs_err_dma_read_utrd(req->slot, utrd_addr);
+    }
+    return ret;
+}
+
+static MemTxResult ufs_dma_read_req_upiu(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    hwaddr req_upiu_base_addr = ufs_get_req_upiu_base_addr(&req->utrd);
+    UtpUpiuReq *req_upiu = &req->req_upiu;
+    uint32_t copy_size;
+    uint16_t data_segment_length;
+    MemTxResult ret;
+
+    /*
+     * To know the size of the req_upiu, we need to read the
+     * data_segment_length in the header first.
+     */
+    ret = ufs_addr_read(u, req_upiu_base_addr, &req_upiu->header,
+                        sizeof(UtpUpiuHeader));
+    if (ret) {
+        trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr);
+        return ret;
+    }
+    data_segment_length = be16_to_cpu(req_upiu->header.data_segment_length);
+
+    copy_size = sizeof(UtpUpiuHeader) + UFS_TRANSACTION_SPECIFIC_FIELD_SIZE +
+                data_segment_length;
+
+    ret = ufs_addr_read(u, req_upiu_base_addr, &req->req_upiu, copy_size);
+    if (ret) {
+        trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr);
+    }
+    return ret;
+}
+
+static MemTxResult ufs_dma_read_prdt(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    uint16_t prdt_len = le16_to_cpu(req->utrd.prd_table_length);
+    uint16_t prdt_byte_off =
+        le16_to_cpu(req->utrd.prd_table_offset) * sizeof(uint32_t);
+    uint32_t prdt_size = prdt_len * sizeof(UfshcdSgEntry);
+    g_autofree UfshcdSgEntry *prd_entries = NULL;
+    hwaddr req_upiu_base_addr, prdt_base_addr;
+    int err;
+
+    assert(!req->sg);
+
+    if (prdt_size == 0) {
+        return MEMTX_OK;
+    }
+    prd_entries = g_new(UfshcdSgEntry, prdt_size);
+
+    req_upiu_base_addr = ufs_get_req_upiu_base_addr(&req->utrd);
+    prdt_base_addr = req_upiu_base_addr + prdt_byte_off;
+
+    err = ufs_addr_read(u, prdt_base_addr, prd_entries, prdt_size);
+    if (err) {
+        trace_ufs_err_dma_read_prdt(req->slot, prdt_base_addr);
+        return err;
+    }
+
+    req->sg = g_malloc0(sizeof(QEMUSGList));
+    pci_dma_sglist_init(req->sg, PCI_DEVICE(u), prdt_len);
+
+    for (uint16_t i = 0; i < prdt_len; ++i) {
+        hwaddr data_dma_addr = le64_to_cpu(prd_entries[i].addr);
+        uint32_t data_byte_count = le32_to_cpu(prd_entries[i].size) + 1;
+        qemu_sglist_add(req->sg, data_dma_addr, data_byte_count);
+    }
+    return MEMTX_OK;
+}
+
+static MemTxResult ufs_dma_read_upiu(UfsRequest *req)
+{
+    MemTxResult ret;
+
+    ret = ufs_dma_read_utrd(req);
+    if (ret) {
+        return ret;
+    }
+
+    ret = ufs_dma_read_req_upiu(req);
+    if (ret) {
+        return ret;
+    }
+
+    ret = ufs_dma_read_prdt(req);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static MemTxResult ufs_dma_write_utrd(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    hwaddr utrd_addr = ufs_get_utrd_addr(u, req->slot);
+    MemTxResult ret;
+
+    ret = ufs_addr_write(u, utrd_addr, &req->utrd, sizeof(req->utrd));
+    if (ret) {
+        trace_ufs_err_dma_write_utrd(req->slot, utrd_addr);
+    }
+    return ret;
+}
+
+static MemTxResult ufs_dma_write_rsp_upiu(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    hwaddr rsp_upiu_base_addr = ufs_get_rsp_upiu_base_addr(&req->utrd);
+    uint32_t rsp_upiu_byte_len =
+        le16_to_cpu(req->utrd.response_upiu_length) * sizeof(uint32_t);
+    uint16_t data_segment_length =
+        be16_to_cpu(req->rsp_upiu.header.data_segment_length);
+    uint32_t copy_size = sizeof(UtpUpiuHeader) +
+                         UFS_TRANSACTION_SPECIFIC_FIELD_SIZE +
+                         data_segment_length;
+    MemTxResult ret;
+
+    if (copy_size > rsp_upiu_byte_len) {
+        copy_size = rsp_upiu_byte_len;
+    }
+
+    ret = ufs_addr_write(u, rsp_upiu_base_addr, &req->rsp_upiu, copy_size);
+    if (ret) {
+        trace_ufs_err_dma_write_rsp_upiu(req->slot, rsp_upiu_base_addr);
+    }
+    return ret;
+}
+
+static MemTxResult ufs_dma_write_upiu(UfsRequest *req)
+{
+    MemTxResult ret;
+
+    ret = ufs_dma_write_rsp_upiu(req);
+    if (ret) {
+        return ret;
+    }
+
+    return ufs_dma_write_utrd(req);
+}
+
+static void ufs_irq_check(UfsHc *u)
+{
+    PCIDevice *pci = PCI_DEVICE(u);
+
+    if ((u->reg.is & UFS_INTR_MASK) & u->reg.ie) {
+        trace_ufs_irq_raise();
+        pci_irq_assert(pci);
+    } else {
+        trace_ufs_irq_lower();
+        pci_irq_deassert(pci);
+    }
+}
+
+static void ufs_process_db(UfsHc *u, uint32_t val)
+{
+    unsigned long doorbell;
+    uint32_t slot;
+    uint32_t nutrs = u->params.nutrs;
+    UfsRequest *req;
+
+    val &= ~u->reg.utrldbr;
+    if (!val) {
+        return;
+    }
+
+    doorbell = val;
+    slot = find_first_bit(&doorbell, nutrs);
+
+    while (slot < nutrs) {
+        req = &u->req_list[slot];
+        if (req->state == UFS_REQUEST_ERROR) {
+            trace_ufs_err_utrl_slot_error(req->slot);
+            return;
+        }
+
+        if (req->state != UFS_REQUEST_IDLE) {
+            trace_ufs_err_utrl_slot_busy(req->slot);
+            return;
+        }
+
+        trace_ufs_process_db(slot);
+        req->state = UFS_REQUEST_READY;
+        slot = find_next_bit(&doorbell, nutrs, slot + 1);
+    }
+
+    qemu_bh_schedule(u->doorbell_bh);
+}
+
+static void ufs_process_uiccmd(UfsHc *u, uint32_t val)
+{
+    trace_ufs_process_uiccmd(val, u->reg.ucmdarg1, u->reg.ucmdarg2,
+                             u->reg.ucmdarg3);
+    /*
+     * Only the essential uic commands for running drivers on Linux and Windows
+     * are implemented.
+     */
+    switch (val) {
+    case UFS_UIC_CMD_DME_LINK_STARTUP:
+        u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, DP, 1);
+        u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, UTRLRDY, 1);
+        u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, UTMRLRDY, 1);
+        u->reg.ucmdarg2 = UFS_UIC_CMD_RESULT_SUCCESS;
+        break;
+    /* TODO: Revisit it when Power Management is implemented */
+    case UFS_UIC_CMD_DME_HIBER_ENTER:
+        u->reg.is = FIELD_DP32(u->reg.is, IS, UHES, 1);
+        u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, UPMCRS, UFS_PWR_LOCAL);
+        u->reg.ucmdarg2 = UFS_UIC_CMD_RESULT_SUCCESS;
+        break;
+    case UFS_UIC_CMD_DME_HIBER_EXIT:
+        u->reg.is = FIELD_DP32(u->reg.is, IS, UHXS, 1);
+        u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, UPMCRS, UFS_PWR_LOCAL);
+        u->reg.ucmdarg2 = UFS_UIC_CMD_RESULT_SUCCESS;
+        break;
+    default:
+        u->reg.ucmdarg2 = UFS_UIC_CMD_RESULT_FAILURE;
+    }
+
+    u->reg.is = FIELD_DP32(u->reg.is, IS, UCCS, 1);
+
+    ufs_irq_check(u);
+}
+
+static void ufs_write_reg(UfsHc *u, hwaddr offset, uint32_t data, unsigned size)
+{
+    switch (offset) {
+    case A_IS:
+        u->reg.is &= ~data;
+        ufs_irq_check(u);
+        break;
+    case A_IE:
+        u->reg.ie = data;
+        ufs_irq_check(u);
+        break;
+    case A_HCE:
+        if (!FIELD_EX32(u->reg.hce, HCE, HCE) && FIELD_EX32(data, HCE, HCE)) {
+            u->reg.hcs = FIELD_DP32(u->reg.hcs, HCS, UCRDY, 1);
+            u->reg.hce = FIELD_DP32(u->reg.hce, HCE, HCE, 1);
+        } else if (FIELD_EX32(u->reg.hce, HCE, HCE) &&
+                   !FIELD_EX32(data, HCE, HCE)) {
+            u->reg.hcs = 0;
+            u->reg.hce = FIELD_DP32(u->reg.hce, HCE, HCE, 0);
+        }
+        break;
+    case A_UTRLBA:
+        u->reg.utrlba = data & R_UTRLBA_UTRLBA_MASK;
+        break;
+    case A_UTRLBAU:
+        u->reg.utrlbau = data;
+        break;
+    case A_UTRLDBR:
+        ufs_process_db(u, data);
+        u->reg.utrldbr |= data;
+        break;
+    case A_UTRLRSR:
+        u->reg.utrlrsr = data;
+        break;
+    case A_UTRLCNR:
+        u->reg.utrlcnr &= ~data;
+        break;
+    case A_UTMRLBA:
+        u->reg.utmrlba = data & R_UTMRLBA_UTMRLBA_MASK;
+        break;
+    case A_UTMRLBAU:
+        u->reg.utmrlbau = data;
+        break;
+    case A_UICCMD:
+        ufs_process_uiccmd(u, data);
+        break;
+    case A_UCMDARG1:
+        u->reg.ucmdarg1 = data;
+        break;
+    case A_UCMDARG2:
+        u->reg.ucmdarg2 = data;
+        break;
+    case A_UCMDARG3:
+        u->reg.ucmdarg3 = data;
+        break;
+    case A_UTRLCLR:
+    case A_UTMRLDBR:
+    case A_UTMRLCLR:
+    case A_UTMRLRSR:
+        trace_ufs_err_unsupport_register_offset(offset);
+        break;
+    default:
+        trace_ufs_err_invalid_register_offset(offset);
+        break;
+    }
+}
+
+static uint64_t ufs_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    UfsHc *u = (UfsHc *)opaque;
+    uint8_t *ptr = (uint8_t *)&u->reg;
+    uint64_t value;
+
+    if (addr > sizeof(u->reg) - size) {
+        trace_ufs_err_invalid_register_offset(addr);
+        return 0;
+    }
+
+    value = *(uint32_t *)(ptr + addr);
+    trace_ufs_mmio_read(addr, value, size);
+    return value;
+}
+
+static void ufs_mmio_write(void *opaque, hwaddr addr, uint64_t data,
+                           unsigned size)
+{
+    UfsHc *u = (UfsHc *)opaque;
+
+    if (addr > sizeof(u->reg) - size) {
+        trace_ufs_err_invalid_register_offset(addr);
+        return;
+    }
+
+    trace_ufs_mmio_write(addr, data, size);
+    ufs_write_reg(u, addr, data, size);
+}
+
+static const MemoryRegionOps ufs_mmio_ops = {
+    .read = ufs_mmio_read,
+    .write = ufs_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static QEMUSGList *ufs_get_sg_list(SCSIRequest *scsi_req)
+{
+    UfsRequest *req = scsi_req->hba_private;
+    return req->sg;
+}
+
+static void ufs_build_upiu_sense_data(UfsRequest *req, SCSIRequest *scsi_req)
+{
+    req->rsp_upiu.sr.sense_data_len = cpu_to_be16(scsi_req->sense_len);
+    assert(scsi_req->sense_len <= SCSI_SENSE_LEN);
+    memcpy(req->rsp_upiu.sr.sense_data, scsi_req->sense, scsi_req->sense_len);
+}
+
+static void ufs_build_upiu_header(UfsRequest *req, uint8_t trans_type,
+                                  uint8_t flags, uint8_t response,
+                                  uint8_t scsi_status,
+                                  uint16_t data_segment_length)
+{
+    memcpy(&req->rsp_upiu.header, &req->req_upiu.header, sizeof(UtpUpiuHeader));
+    req->rsp_upiu.header.trans_type = trans_type;
+    req->rsp_upiu.header.flags = flags;
+    req->rsp_upiu.header.response = response;
+    req->rsp_upiu.header.scsi_status = scsi_status;
+    req->rsp_upiu.header.data_segment_length = cpu_to_be16(data_segment_length);
+}
+
+static void ufs_scsi_command_complete(SCSIRequest *scsi_req, size_t resid)
+{
+    UfsRequest *req = scsi_req->hba_private;
+    int16_t status = scsi_req->status;
+    uint32_t expected_len = be32_to_cpu(req->req_upiu.sc.exp_data_transfer_len);
+    uint32_t transfered_len = scsi_req->cmd.xfer - resid;
+    uint8_t flags = 0, response = UFS_COMMAND_RESULT_SUCESS;
+    uint16_t data_segment_length;
+
+    if (expected_len > transfered_len) {
+        req->rsp_upiu.sr.residual_transfer_count =
+            cpu_to_be32(expected_len - transfered_len);
+        flags |= UFS_UPIU_FLAG_UNDERFLOW;
+    } else if (expected_len < transfered_len) {
+        req->rsp_upiu.sr.residual_transfer_count =
+            cpu_to_be32(transfered_len - expected_len);
+        flags |= UFS_UPIU_FLAG_OVERFLOW;
+    }
+
+    if (status != 0) {
+        ufs_build_upiu_sense_data(req, scsi_req);
+        response = UFS_COMMAND_RESULT_FAIL;
+    }
+
+    data_segment_length = cpu_to_be16(scsi_req->sense_len +
+                                      sizeof(req->rsp_upiu.sr.sense_data_len));
+    ufs_build_upiu_header(req, UFS_UPIU_TRANSACTION_RESPONSE, flags, response,
+                          status, data_segment_length);
+
+    ufs_complete_req(req, UFS_REQUEST_SUCCESS);
+
+    scsi_req->hba_private = NULL;
+    scsi_req_unref(scsi_req);
+}
+
+static const struct SCSIBusInfo ufs_scsi_info = {
+    .tcq = true,
+    .max_target = 0,
+    .max_lun = UFS_MAX_LUS,
+    .max_channel = 0,
+
+    .get_sg_list = ufs_get_sg_list,
+    .complete = ufs_scsi_command_complete,
+};
+
+static UfsReqResult ufs_exec_scsi_cmd(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    uint8_t lun = req->req_upiu.header.lun;
+    uint8_t task_tag = req->req_upiu.header.task_tag;
+    SCSIDevice *dev = NULL;
+
+    trace_ufs_exec_scsi_cmd(req->slot, lun, req->req_upiu.sc.cdb[0]);
+
+    if (!is_wlun(lun)) {
+        if (lun >= u->device_desc.number_lu) {
+            trace_ufs_err_scsi_cmd_invalid_lun(lun);
+            return UFS_REQUEST_FAIL;
+        } else if (u->lus[lun] == NULL) {
+            trace_ufs_err_scsi_cmd_invalid_lun(lun);
+            return UFS_REQUEST_FAIL;
+        }
+    }
+
+    switch (lun) {
+    case UFS_UPIU_REPORT_LUNS_WLUN:
+        dev = &u->report_wlu->qdev;
+        break;
+    case UFS_UPIU_UFS_DEVICE_WLUN:
+        dev = &u->dev_wlu->qdev;
+        break;
+    case UFS_UPIU_BOOT_WLUN:
+        dev = &u->boot_wlu->qdev;
+        break;
+    case UFS_UPIU_RPMB_WLUN:
+        dev = &u->rpmb_wlu->qdev;
+        break;
+    default:
+        dev = &u->lus[lun]->qdev;
+    }
+
+    SCSIRequest *scsi_req = scsi_req_new(
+        dev, task_tag, lun, req->req_upiu.sc.cdb, UFS_CDB_SIZE, req);
+
+    uint32_t len = scsi_req_enqueue(scsi_req);
+    if (len) {
+        scsi_req_continue(scsi_req);
+    }
+
+    return UFS_REQUEST_NO_COMPLETE;
+}
+
+static UfsReqResult ufs_exec_nop_cmd(UfsRequest *req)
+{
+    trace_ufs_exec_nop_cmd(req->slot);
+    ufs_build_upiu_header(req, UFS_UPIU_TRANSACTION_NOP_IN, 0, 0, 0, 0);
+    return UFS_REQUEST_SUCCESS;
+}
+
+/*
+ * This defines the permission of flags based on their IDN. There are some
+ * things that are declared read-only, which is inconsistent with the ufs spec,
+ * because we want to return an error for features that are not yet supported.
+ */
+static const int flag_permission[UFS_QUERY_FLAG_IDN_COUNT] = {
+    [UFS_QUERY_FLAG_IDN_FDEVICEINIT] = UFS_QUERY_FLAG_READ | UFS_QUERY_FLAG_SET,
+    /* Write protection is not supported */
+    [UFS_QUERY_FLAG_IDN_PERMANENT_WPE] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_PWR_ON_WPE] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_BKOPS_EN] = UFS_QUERY_FLAG_READ | UFS_QUERY_FLAG_SET |
+                                    UFS_QUERY_FLAG_CLEAR |
+                                    UFS_QUERY_FLAG_TOGGLE,
+    [UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE] =
+        UFS_QUERY_FLAG_READ | UFS_QUERY_FLAG_SET | UFS_QUERY_FLAG_CLEAR |
+        UFS_QUERY_FLAG_TOGGLE,
+    /* Purge Operation is not supported */
+    [UFS_QUERY_FLAG_IDN_PURGE_ENABLE] = UFS_QUERY_FLAG_NONE,
+    /* Refresh Operation is not supported */
+    [UFS_QUERY_FLAG_IDN_REFRESH_ENABLE] = UFS_QUERY_FLAG_NONE,
+    /* Physical Resource Removal is not supported */
+    [UFS_QUERY_FLAG_IDN_FPHYRESOURCEREMOVAL] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_BUSY_RTC] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_PERMANENTLY_DISABLE_FW_UPDATE] = UFS_QUERY_FLAG_READ,
+    /* Write Booster is not supported */
+    [UFS_QUERY_FLAG_IDN_WB_EN] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN] = UFS_QUERY_FLAG_READ,
+    [UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8] = UFS_QUERY_FLAG_READ,
+};
+
+static inline QueryRespCode ufs_flag_check_idn_valid(uint8_t idn, int op)
+{
+    if (idn >= UFS_QUERY_FLAG_IDN_COUNT) {
+        return UFS_QUERY_RESULT_INVALID_IDN;
+    }
+
+    if (!(flag_permission[idn] & op)) {
+        if (op == UFS_QUERY_FLAG_READ) {
+            trace_ufs_err_query_flag_not_readable(idn);
+            return UFS_QUERY_RESULT_NOT_READABLE;
+        }
+        trace_ufs_err_query_flag_not_writable(idn);
+        return UFS_QUERY_RESULT_NOT_WRITEABLE;
+    }
+
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static const int attr_permission[UFS_QUERY_ATTR_IDN_COUNT] = {
+    /* booting is not supported */
+    [UFS_QUERY_ATTR_IDN_BOOT_LU_EN] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_POWER_MODE] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_OOO_DATA_EN] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_BKOPS_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_PURGE_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_MAX_DATA_IN] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_MAX_DATA_OUT] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_DYN_CAP_NEEDED] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_REF_CLK_FREQ] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_CONF_DESC_LOCK] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_MAX_NUM_OF_RTT] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_EE_CONTROL] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_EE_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_SECONDS_PASSED] = UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_CNTX_CONF] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_FFU_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_PSA_STATE] = UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_PSA_DATA_SIZE] =
+        UFS_QUERY_ATTR_READ | UFS_QUERY_ATTR_WRITE,
+    [UFS_QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_THROTTLING_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_WB_FLUSH_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE] = UFS_QUERY_ATTR_READ,
+    /* refresh operation is not supported */
+    [UFS_QUERY_ATTR_IDN_REFRESH_STATUS] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_REFRESH_FREQ] = UFS_QUERY_ATTR_READ,
+    [UFS_QUERY_ATTR_IDN_REFRESH_UNIT] = UFS_QUERY_ATTR_READ,
+};
+
+static inline QueryRespCode ufs_attr_check_idn_valid(uint8_t idn, int op)
+{
+    if (idn >= UFS_QUERY_ATTR_IDN_COUNT) {
+        return UFS_QUERY_RESULT_INVALID_IDN;
+    }
+
+    if (!(attr_permission[idn] & op)) {
+        if (op == UFS_QUERY_ATTR_READ) {
+            trace_ufs_err_query_attr_not_readable(idn);
+            return UFS_QUERY_RESULT_NOT_READABLE;
+        }
+        trace_ufs_err_query_attr_not_writable(idn);
+        return UFS_QUERY_RESULT_NOT_WRITEABLE;
+    }
+
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static QueryRespCode ufs_exec_query_flag(UfsRequest *req, int op)
+{
+    UfsHc *u = req->hc;
+    uint8_t idn = req->req_upiu.qr.idn;
+    uint32_t value;
+    QueryRespCode ret;
+
+    ret = ufs_flag_check_idn_valid(idn, op);
+    if (ret) {
+        return ret;
+    }
+
+    if (idn == UFS_QUERY_FLAG_IDN_FDEVICEINIT) {
+        value = 0;
+    } else if (op == UFS_QUERY_FLAG_READ) {
+        value = *(((uint8_t *)&u->flags) + idn);
+    } else if (op == UFS_QUERY_FLAG_SET) {
+        value = 1;
+    } else if (op == UFS_QUERY_FLAG_CLEAR) {
+        value = 0;
+    } else if (op == UFS_QUERY_FLAG_TOGGLE) {
+        value = *(((uint8_t *)&u->flags) + idn);
+        value = !value;
+    } else {
+        trace_ufs_err_query_invalid_opcode(op);
+        return UFS_QUERY_RESULT_INVALID_OPCODE;
+    }
+
+    *(((uint8_t *)&u->flags) + idn) = value;
+    req->rsp_upiu.qr.value = cpu_to_be32(value);
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static uint32_t ufs_read_attr_value(UfsHc *u, uint8_t idn)
+{
+    switch (idn) {
+    case UFS_QUERY_ATTR_IDN_BOOT_LU_EN:
+        return u->attributes.boot_lun_en;
+    case UFS_QUERY_ATTR_IDN_POWER_MODE:
+        return u->attributes.current_power_mode;
+    case UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL:
+        return u->attributes.active_icc_level;
+    case UFS_QUERY_ATTR_IDN_OOO_DATA_EN:
+        return u->attributes.out_of_order_data_en;
+    case UFS_QUERY_ATTR_IDN_BKOPS_STATUS:
+        return u->attributes.background_op_status;
+    case UFS_QUERY_ATTR_IDN_PURGE_STATUS:
+        return u->attributes.purge_status;
+    case UFS_QUERY_ATTR_IDN_MAX_DATA_IN:
+        return u->attributes.max_data_in_size;
+    case UFS_QUERY_ATTR_IDN_MAX_DATA_OUT:
+        return u->attributes.max_data_out_size;
+    case UFS_QUERY_ATTR_IDN_DYN_CAP_NEEDED:
+        return be32_to_cpu(u->attributes.dyn_cap_needed);
+    case UFS_QUERY_ATTR_IDN_REF_CLK_FREQ:
+        return u->attributes.ref_clk_freq;
+    case UFS_QUERY_ATTR_IDN_CONF_DESC_LOCK:
+        return u->attributes.config_descr_lock;
+    case UFS_QUERY_ATTR_IDN_MAX_NUM_OF_RTT:
+        return u->attributes.max_num_of_rtt;
+    case UFS_QUERY_ATTR_IDN_EE_CONTROL:
+        return be16_to_cpu(u->attributes.exception_event_control);
+    case UFS_QUERY_ATTR_IDN_EE_STATUS:
+        return be16_to_cpu(u->attributes.exception_event_status);
+    case UFS_QUERY_ATTR_IDN_SECONDS_PASSED:
+        return be32_to_cpu(u->attributes.seconds_passed);
+    case UFS_QUERY_ATTR_IDN_CNTX_CONF:
+        return be16_to_cpu(u->attributes.context_conf);
+    case UFS_QUERY_ATTR_IDN_FFU_STATUS:
+        return u->attributes.device_ffu_status;
+    case UFS_QUERY_ATTR_IDN_PSA_STATE:
+        return be32_to_cpu(u->attributes.psa_state);
+    case UFS_QUERY_ATTR_IDN_PSA_DATA_SIZE:
+        return be32_to_cpu(u->attributes.psa_data_size);
+    case UFS_QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME:
+        return u->attributes.ref_clk_gating_wait_time;
+    case UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP:
+        return u->attributes.device_case_rough_temperaure;
+    case UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND:
+        return u->attributes.device_too_high_temp_boundary;
+    case UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND:
+        return u->attributes.device_too_low_temp_boundary;
+    case UFS_QUERY_ATTR_IDN_THROTTLING_STATUS:
+        return u->attributes.throttling_status;
+    case UFS_QUERY_ATTR_IDN_WB_FLUSH_STATUS:
+        return u->attributes.wb_buffer_flush_status;
+    case UFS_QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE:
+        return u->attributes.available_wb_buffer_size;
+    case UFS_QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST:
+        return u->attributes.wb_buffer_life_time_est;
+    case UFS_QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE:
+        return be32_to_cpu(u->attributes.current_wb_buffer_size);
+    case UFS_QUERY_ATTR_IDN_REFRESH_STATUS:
+        return u->attributes.refresh_status;
+    case UFS_QUERY_ATTR_IDN_REFRESH_FREQ:
+        return u->attributes.refresh_freq;
+    case UFS_QUERY_ATTR_IDN_REFRESH_UNIT:
+        return u->attributes.refresh_unit;
+    }
+    return 0;
+}
+
+static void ufs_write_attr_value(UfsHc *u, uint8_t idn, uint32_t value)
+{
+    switch (idn) {
+    case UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL:
+        u->attributes.active_icc_level = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_MAX_DATA_IN:
+        u->attributes.max_data_in_size = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_MAX_DATA_OUT:
+        u->attributes.max_data_out_size = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_REF_CLK_FREQ:
+        u->attributes.ref_clk_freq = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_MAX_NUM_OF_RTT:
+        u->attributes.max_num_of_rtt = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_EE_CONTROL:
+        u->attributes.exception_event_control = cpu_to_be16(value);
+        break;
+    case UFS_QUERY_ATTR_IDN_SECONDS_PASSED:
+        u->attributes.seconds_passed = cpu_to_be32(value);
+        break;
+    case UFS_QUERY_ATTR_IDN_PSA_STATE:
+        u->attributes.psa_state = value;
+        break;
+    case UFS_QUERY_ATTR_IDN_PSA_DATA_SIZE:
+        u->attributes.psa_data_size = cpu_to_be32(value);
+        break;
+    }
+}
+
+static QueryRespCode ufs_exec_query_attr(UfsRequest *req, int op)
+{
+    UfsHc *u = req->hc;
+    uint8_t idn = req->req_upiu.qr.idn;
+    uint32_t value;
+    QueryRespCode ret;
+
+    ret = ufs_attr_check_idn_valid(idn, op);
+    if (ret) {
+        return ret;
+    }
+
+    if (op == UFS_QUERY_ATTR_READ) {
+        value = ufs_read_attr_value(u, idn);
+    } else {
+        value = be32_to_cpu(req->req_upiu.qr.value);
+        ufs_write_attr_value(u, idn, value);
+    }
+
+    req->rsp_upiu.qr.value = cpu_to_be32(value);
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static const RpmbUnitDescriptor rpmb_unit_desc = {
+    .length = sizeof(RpmbUnitDescriptor),
+    .descriptor_idn = 2,
+    .unit_index = UFS_UPIU_RPMB_WLUN,
+    .lu_enable = 0,
+};
+
+static QueryRespCode ufs_read_unit_desc(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    uint8_t lun = req->req_upiu.qr.index;
+
+    if (lun != UFS_UPIU_RPMB_WLUN &&
+        (lun > UFS_MAX_LUS || u->lus[lun] == NULL)) {
+        trace_ufs_err_query_invalid_index(req->req_upiu.qr.opcode, lun);
+        return UFS_QUERY_RESULT_INVALID_INDEX;
+    }
+
+    if (lun == UFS_UPIU_RPMB_WLUN) {
+        memcpy(&req->rsp_upiu.qr.data, &rpmb_unit_desc, rpmb_unit_desc.length);
+    } else {
+        memcpy(&req->rsp_upiu.qr.data, &u->lus[lun]->unit_desc,
+               sizeof(u->lus[lun]->unit_desc));
+    }
+
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static inline StringDescriptor manufacturer_str_desc(void)
+{
+    StringDescriptor desc = {
+        .length = 0x12,
+        .descriptor_idn = UFS_QUERY_DESC_IDN_STRING,
+    };
+    desc.UC[0] = cpu_to_be16('R');
+    desc.UC[1] = cpu_to_be16('E');
+    desc.UC[2] = cpu_to_be16('D');
+    desc.UC[3] = cpu_to_be16('H');
+    desc.UC[4] = cpu_to_be16('A');
+    desc.UC[5] = cpu_to_be16('T');
+    return desc;
+}
+
+static inline StringDescriptor product_name_str_desc(void)
+{
+    StringDescriptor desc = {
+        .length = 0x22,
+        .descriptor_idn = UFS_QUERY_DESC_IDN_STRING,
+    };
+    desc.UC[0] = cpu_to_be16('Q');
+    desc.UC[1] = cpu_to_be16('E');
+    desc.UC[2] = cpu_to_be16('M');
+    desc.UC[3] = cpu_to_be16('U');
+    desc.UC[4] = cpu_to_be16(' ');
+    desc.UC[5] = cpu_to_be16('U');
+    desc.UC[6] = cpu_to_be16('F');
+    desc.UC[7] = cpu_to_be16('S');
+    return desc;
+}
+
+static inline StringDescriptor product_rev_level_str_desc(void)
+{
+    StringDescriptor desc = {
+        .length = 0x0a,
+        .descriptor_idn = UFS_QUERY_DESC_IDN_STRING,
+    };
+    desc.UC[0] = cpu_to_be16('0');
+    desc.UC[1] = cpu_to_be16('0');
+    desc.UC[2] = cpu_to_be16('0');
+    desc.UC[3] = cpu_to_be16('1');
+    return desc;
+}
+
+static const StringDescriptor null_str_desc = {
+    .length = 0x02,
+    .descriptor_idn = UFS_QUERY_DESC_IDN_STRING,
+};
+
+static QueryRespCode ufs_read_string_desc(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    uint8_t index = req->req_upiu.qr.index;
+    StringDescriptor desc;
+
+    if (index == u->device_desc.manufacturer_name) {
+        desc = manufacturer_str_desc();
+        memcpy(&req->rsp_upiu.qr.data, &desc, desc.length);
+    } else if (index == u->device_desc.product_name) {
+        desc = product_name_str_desc();
+        memcpy(&req->rsp_upiu.qr.data, &desc, desc.length);
+    } else if (index == u->device_desc.serial_number) {
+        memcpy(&req->rsp_upiu.qr.data, &null_str_desc, null_str_desc.length);
+    } else if (index == u->device_desc.oem_id) {
+        memcpy(&req->rsp_upiu.qr.data, &null_str_desc, null_str_desc.length);
+    } else if (index == u->device_desc.product_revision_level) {
+        desc = product_rev_level_str_desc();
+        memcpy(&req->rsp_upiu.qr.data, &desc, desc.length);
+    } else {
+        trace_ufs_err_query_invalid_index(req->req_upiu.qr.opcode, index);
+        return UFS_QUERY_RESULT_INVALID_INDEX;
+    }
+    return UFS_QUERY_RESULT_SUCCESS;
+}
+
+static inline InterconnectDescriptor interconnect_desc(void)
+{
+    InterconnectDescriptor desc = {
+        .length = sizeof(InterconnectDescriptor),
+        .descriptor_idn = UFS_QUERY_DESC_IDN_INTERCONNECT,
+    };
+    desc.bcd_unipro_version = cpu_to_be16(0x180);
+    desc.bcd_mphy_version = cpu_to_be16(0x410);
+    return desc;
+}
+
+static QueryRespCode ufs_read_desc(UfsRequest *req)
+{
+    UfsHc *u = req->hc;
+    QueryRespCode status;
+    uint8_t idn = req->req_upiu.qr.idn;
+    uint16_t length = be16_to_cpu(req->req_upiu.qr.length);
+    InterconnectDescriptor desc;
+
+    switch (idn) {
+    case UFS_QUERY_DESC_IDN_DEVICE:
+        memcpy(&req->rsp_upiu.qr.data, &u->device_desc, sizeof(u->device_desc));
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    case UFS_QUERY_DESC_IDN_UNIT:
+        status = ufs_read_unit_desc(req);
+        break;
+    case UFS_QUERY_DESC_IDN_GEOMETRY:
+        memcpy(&req->rsp_upiu.qr.data, &u->geometry_desc,
+               sizeof(u->geometry_desc));
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    case UFS_QUERY_DESC_IDN_INTERCONNECT: {
+        desc = interconnect_desc();
+        memcpy(&req->rsp_upiu.qr.data, &desc, sizeof(InterconnectDescriptor));
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    }
+    case UFS_QUERY_DESC_IDN_STRING:
+        status = ufs_read_string_desc(req);
+        break;
+    case UFS_QUERY_DESC_IDN_POWER:
+        /* mocking of power descriptor is not supported */
+        memset(&req->rsp_upiu.qr.data, 0, sizeof(PowerParametersDescriptor));
+        req->rsp_upiu.qr.data[0] = sizeof(PowerParametersDescriptor);
+        req->rsp_upiu.qr.data[1] = UFS_QUERY_DESC_IDN_POWER;
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    case UFS_QUERY_DESC_IDN_HEALTH:
+        /* mocking of health descriptor is not supported */
+        memset(&req->rsp_upiu.qr.data, 0, sizeof(DeviceHealthDescriptor));
+        req->rsp_upiu.qr.data[0] = sizeof(DeviceHealthDescriptor);
+        req->rsp_upiu.qr.data[1] = UFS_QUERY_DESC_IDN_HEALTH;
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    default:
+        length = 0;
+        trace_ufs_err_query_invalid_idn(req->req_upiu.qr.opcode, idn);
+        status = UFS_QUERY_RESULT_INVALID_IDN;
+    }
+
+    if (length > req->rsp_upiu.qr.data[0]) {
+        length = req->rsp_upiu.qr.data[0];
+    }
+    req->rsp_upiu.qr.opcode = req->req_upiu.qr.opcode;
+    req->rsp_upiu.qr.idn = req->req_upiu.qr.idn;
+    req->rsp_upiu.qr.index = req->req_upiu.qr.index;
+    req->rsp_upiu.qr.selector = req->req_upiu.qr.selector;
+    req->rsp_upiu.qr.length = cpu_to_be16(length);
+
+    return status;
+}
+
+static QueryRespCode ufs_exec_query_read(UfsRequest *req)
+{
+    QueryRespCode status;
+    switch (req->req_upiu.qr.opcode) {
+    case UFS_UPIU_QUERY_OPCODE_NOP:
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    case UFS_UPIU_QUERY_OPCODE_READ_DESC:
+        status = ufs_read_desc(req);
+        break;
+    case UFS_UPIU_QUERY_OPCODE_READ_ATTR:
+        status = ufs_exec_query_attr(req, UFS_QUERY_ATTR_READ);
+        break;
+    case UFS_UPIU_QUERY_OPCODE_READ_FLAG:
+        status = ufs_exec_query_flag(req, UFS_QUERY_FLAG_READ);
+        break;
+    default:
+        trace_ufs_err_query_invalid_opcode(req->req_upiu.qr.opcode);
+        status = UFS_QUERY_RESULT_INVALID_OPCODE;
+        break;
+    }
+
+    return status;
+}
+
+static QueryRespCode ufs_exec_query_write(UfsRequest *req)
+{
+    QueryRespCode status;
+    switch (req->req_upiu.qr.opcode) {
+    case UFS_UPIU_QUERY_OPCODE_NOP:
+        status = UFS_QUERY_RESULT_SUCCESS;
+        break;
+    case UFS_UPIU_QUERY_OPCODE_WRITE_DESC:
+        /* write descriptor is not supported */
+        status = UFS_QUERY_RESULT_NOT_WRITEABLE;
+        break;
+    case UFS_UPIU_QUERY_OPCODE_WRITE_ATTR:
+        status = ufs_exec_query_attr(req, UFS_QUERY_ATTR_WRITE);
+        break;
+    case UFS_UPIU_QUERY_OPCODE_SET_FLAG:
+        status = ufs_exec_query_flag(req, UFS_QUERY_FLAG_SET);
+        break;
+    case UFS_UPIU_QUERY_OPCODE_CLEAR_FLAG:
+        status = ufs_exec_query_flag(req, UFS_QUERY_FLAG_CLEAR);
+        break;
+    case UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG:
+        status = ufs_exec_query_flag(req, UFS_QUERY_FLAG_TOGGLE);
+        break;
+    default:
+        trace_ufs_err_query_invalid_opcode(req->req_upiu.qr.opcode);
+        status = UFS_QUERY_RESULT_INVALID_OPCODE;
+        break;
+    }
+
+    return status;
+}
+
+static UfsReqResult ufs_exec_query_cmd(UfsRequest *req)
+{
+    uint8_t query_func = req->req_upiu.header.query_func;
+    uint16_t data_segment_length;
+    QueryRespCode status;
+
+    trace_ufs_exec_query_cmd(req->slot, req->req_upiu.qr.opcode);
+    if (query_func == UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST) {
+        status = ufs_exec_query_read(req);
+    } else if (query_func == UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST) {
+        status = ufs_exec_query_write(req);
+    } else {
+        status = UFS_QUERY_RESULT_GENERAL_FAILURE;
+    }
+
+    data_segment_length = be16_to_cpu(req->rsp_upiu.qr.length);
+    ufs_build_upiu_header(req, UFS_UPIU_TRANSACTION_QUERY_RSP, 0, status, 0,
+                          data_segment_length);
+
+    if (status != UFS_QUERY_RESULT_SUCCESS) {
+        return UFS_REQUEST_FAIL;
+    }
+    return UFS_REQUEST_SUCCESS;
+}
+
+static void ufs_exec_req(UfsRequest *req)
+{
+    UfsReqResult req_result;
+
+    if (ufs_dma_read_upiu(req)) {
+        return;
+    }
+
+    switch (req->req_upiu.header.trans_type) {
+    case UFS_UPIU_TRANSACTION_NOP_OUT:
+        req_result = ufs_exec_nop_cmd(req);
+        break;
+    case UFS_UPIU_TRANSACTION_COMMAND:
+        req_result = ufs_exec_scsi_cmd(req);
+        break;
+    case UFS_UPIU_TRANSACTION_QUERY_REQ:
+        req_result = ufs_exec_query_cmd(req);
+        break;
+    default:
+        trace_ufs_err_invalid_trans_code(req->slot,
+                                         req->req_upiu.header.trans_type);
+        req_result = UFS_REQUEST_FAIL;
+    }
+
+    /*
+     * The ufs_complete_req for scsi commands is handled by the
+     * ufs_scsi_command_complete() callback function. Therefore, to avoid
+     * duplicate processing, ufs_complete_req() is not called for scsi commands.
+     */
+    if (req_result != UFS_REQUEST_NO_COMPLETE) {
+        ufs_complete_req(req, req_result);
+    }
+}
+
+static void ufs_process_req(void *opaque)
+{
+    UfsHc *u = opaque;
+    UfsRequest *req;
+    int slot;
+
+    for (slot = 0; slot < u->params.nutrs; slot++) {
+        req = &u->req_list[slot];
+
+        if (req->state != UFS_REQUEST_READY) {
+            continue;
+        }
+        trace_ufs_process_req(slot);
+        req->state = UFS_REQUEST_RUNNING;
+
+        ufs_exec_req(req);
+    }
+}
+
+static void ufs_complete_req(UfsRequest *req, UfsReqResult req_result)
+{
+    UfsHc *u = req->hc;
+    assert(req->state == UFS_REQUEST_RUNNING);
+
+    if (req_result == UFS_REQUEST_SUCCESS) {
+        req->utrd.header.dword_2 = cpu_to_le32(UFS_OCS_SUCCESS);
+    } else {
+        req->utrd.header.dword_2 = cpu_to_le32(UFS_OCS_INVALID_CMD_TABLE_ATTR);
+    }
+
+    trace_ufs_complete_req(req->slot);
+    req->state = UFS_REQUEST_COMPLETE;
+    qemu_bh_schedule(u->complete_bh);
+}
+
+static void ufs_clear_req(UfsRequest *req)
+{
+    if (req->sg != NULL) {
+        qemu_sglist_destroy(req->sg);
+        g_free(req->sg);
+        req->sg = NULL;
+    }
+
+    memset(&req->utrd, 0, sizeof(req->utrd));
+    memset(&req->req_upiu, 0, sizeof(req->req_upiu));
+    memset(&req->rsp_upiu, 0, sizeof(req->rsp_upiu));
+}
+
+static void ufs_sendback_req(void *opaque)
+{
+    UfsHc *u = opaque;
+    UfsRequest *req;
+    int slot;
+
+    for (slot = 0; slot < u->params.nutrs; slot++) {
+        req = &u->req_list[slot];
+
+        if (req->state != UFS_REQUEST_COMPLETE) {
+            continue;
+        }
+
+        if (ufs_dma_write_upiu(req)) {
+            req->state = UFS_REQUEST_ERROR;
+            continue;
+        }
+
+        /*
+         * TODO: UTP Transfer Request Interrupt Aggregation Control is not yet
+         * supported
+         */
+        if (le32_to_cpu(req->utrd.header.dword_2) != UFS_OCS_SUCCESS ||
+            le32_to_cpu(req->utrd.header.dword_0) & UFS_UTP_REQ_DESC_INT_CMD) {
+            u->reg.is = FIELD_DP32(u->reg.is, IS, UTRCS, 1);
+        }
+
+        u->reg.utrldbr &= ~(1 << slot);
+        u->reg.utrlcnr |= (1 << slot);
+
+        trace_ufs_sendback_req(req->slot);
+
+        ufs_clear_req(req);
+        req->state = UFS_REQUEST_IDLE;
+    }
+
+    ufs_irq_check(u);
+}
+
+static bool ufs_check_constraints(UfsHc *u, Error **errp)
+{
+    if (u->params.nutrs > UFS_MAX_NUTRS) {
+        error_setg(errp, "nutrs must be less than or equal to %d",
+                   UFS_MAX_NUTRS);
+        return false;
+    }
+
+    if (u->params.nutmrs > UFS_MAX_NUTMRS) {
+        error_setg(errp, "nutmrs must be less than or equal to %d",
+                   UFS_MAX_NUTMRS);
+        return false;
+    }
+
+    return true;
+}
+
+static void ufs_init_pci(UfsHc *u, PCIDevice *pci_dev)
+{
+    uint8_t *pci_conf = pci_dev->config;
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1;
+    pci_config_set_prog_interface(pci_conf, 0x1);
+
+    memory_region_init_io(&u->iomem, OBJECT(u), &ufs_mmio_ops, u, "ufs",
+                          u->reg_size);
+    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &u->iomem);
+    u->irq = pci_allocate_irq(pci_dev);
+}
+
+static void ufs_init_state(UfsHc *u)
+{
+    u->req_list = g_new0(UfsRequest, u->params.nutrs);
+
+    for (int i = 0; i < u->params.nutrs; i++) {
+        u->req_list[i].hc = u;
+        u->req_list[i].slot = i;
+        u->req_list[i].sg = NULL;
+        u->req_list[i].state = UFS_REQUEST_IDLE;
+    }
+
+    u->doorbell_bh = qemu_bh_new_guarded(ufs_process_req, u,
+                                         &DEVICE(u)->mem_reentrancy_guard);
+    u->complete_bh = qemu_bh_new_guarded(ufs_sendback_req, u,
+                                         &DEVICE(u)->mem_reentrancy_guard);
+}
+
+static void ufs_init_hc(UfsHc *u)
+{
+    uint32_t cap = 0;
+
+    u->reg_size = pow2ceil(sizeof(UfsReg));
+
+    memset(&u->reg, 0, sizeof(u->reg));
+    cap = FIELD_DP32(cap, CAP, NUTRS, (u->params.nutrs - 1));
+    cap = FIELD_DP32(cap, CAP, RTT, 2);
+    cap = FIELD_DP32(cap, CAP, NUTMRS, (u->params.nutmrs - 1));
+    cap = FIELD_DP32(cap, CAP, AUTOH8, 0);
+    cap = FIELD_DP32(cap, CAP, 64AS, 1);
+    cap = FIELD_DP32(cap, CAP, OODDS, 0);
+    cap = FIELD_DP32(cap, CAP, UICDMETMS, 0);
+    cap = FIELD_DP32(cap, CAP, CS, 0);
+    u->reg.cap = cap;
+    u->reg.ver = UFS_SPEC_VER;
+
+    memset(&u->device_desc, 0, sizeof(DeviceDescriptor));
+    u->device_desc.length = sizeof(DeviceDescriptor);
+    u->device_desc.descriptor_idn = UFS_QUERY_DESC_IDN_DEVICE;
+    u->device_desc.device_sub_class = 0x01;
+    u->device_desc.number_lu = 0x00;
+    u->device_desc.number_wlu = 0x04;
+    /* TODO: Revisit it when Power Management is implemented */
+    u->device_desc.init_power_mode = 0x01; /* Active Mode */
+    u->device_desc.high_priority_lun = 0x7F; /* Same Priority */
+    u->device_desc.spec_version = cpu_to_be16(UFS_SPEC_VER);
+    u->device_desc.manufacturer_name = 0x00;
+    u->device_desc.product_name = 0x01;
+    u->device_desc.serial_number = 0x02;
+    u->device_desc.oem_id = 0x03;
+    u->device_desc.ud_0_base_offset = 0x16;
+    u->device_desc.ud_config_p_length = 0x1A;
+    u->device_desc.device_rtt_cap = 0x02;
+    u->device_desc.queue_depth = u->params.nutrs;
+    u->device_desc.product_revision_level = 0x04;
+
+    memset(&u->geometry_desc, 0, sizeof(GeometryDescriptor));
+    u->geometry_desc.length = sizeof(GeometryDescriptor);
+    u->geometry_desc.descriptor_idn = UFS_QUERY_DESC_IDN_GEOMETRY;
+    u->geometry_desc.max_number_lu = (UFS_MAX_LUS == 32) ? 0x1 : 0x0;
+    u->geometry_desc.segment_size = cpu_to_be32(0x2000); /* 4KB */
+    u->geometry_desc.allocation_unit_size = 0x1; /* 4KB */
+    u->geometry_desc.min_addr_block_size = 0x8; /* 4KB */
+    u->geometry_desc.max_in_buffer_size = 0x8;
+    u->geometry_desc.max_out_buffer_size = 0x8;
+    u->geometry_desc.rpmb_read_write_size = 0x40;
+    u->geometry_desc.data_ordering =
+        0x0; /* out-of-order data transfer is not supported */
+    u->geometry_desc.max_context_id_number = 0x5;
+    u->geometry_desc.supported_memory_types = cpu_to_be16(0x8001);
+
+    memset(&u->attributes, 0, sizeof(u->attributes));
+    u->attributes.max_data_in_size = 0x08;
+    u->attributes.max_data_out_size = 0x08;
+    u->attributes.ref_clk_freq = 0x01; /* 26 MHz */
+    /* configure descriptor is not supported */
+    u->attributes.config_descr_lock = 0x01;
+    u->attributes.max_num_of_rtt = 0x02;
+
+    memset(&u->flags, 0, sizeof(u->flags));
+    u->flags.permanently_disable_fw_update = 1;
+}
+
+static bool ufs_init_wlu(UfsHc *u, UfsWLu **wlu, uint8_t wlun, Error **errp)
+{
+    UfsWLu *new_wlu = UFSWLU(qdev_new(TYPE_UFS_WLU));
+
+    qdev_prop_set_uint32(DEVICE(new_wlu), "lun", wlun);
+
+    /*
+     * The well-known lu shares the same bus as the normal lu. If the well-known
+     * lu writes the same channel value as the normal lu, the report will be
+     * made not only for the normal lu but also for the well-known lu at
+     * REPORT_LUN time. To prevent this, the channel value of normal lu is fixed
+     * to 0 and the channel value of well-known lu is fixed to 1.
+     */
+    qdev_prop_set_uint32(DEVICE(new_wlu), "channel", 1);
+    if (!qdev_realize_and_unref(DEVICE(new_wlu), BUS(&u->bus), errp)) {
+        return false;
+    }
+
+    *wlu = new_wlu;
+    return true;
+}
+
+static void ufs_realize(PCIDevice *pci_dev, Error **errp)
+{
+    UfsHc *u = UFS(pci_dev);
+
+    if (!ufs_check_constraints(u, errp)) {
+        return;
+    }
+
+    qbus_init(&u->bus, sizeof(UfsBus), TYPE_UFS_BUS, &pci_dev->qdev,
+              u->parent_obj.qdev.id);
+    u->bus.parent_bus.info = &ufs_scsi_info;
+
+    ufs_init_state(u);
+    ufs_init_hc(u);
+    ufs_init_pci(u, pci_dev);
+
+    if (!ufs_init_wlu(u, &u->report_wlu, UFS_UPIU_REPORT_LUNS_WLUN, errp)) {
+        return;
+    }
+
+    if (!ufs_init_wlu(u, &u->dev_wlu, UFS_UPIU_UFS_DEVICE_WLUN, errp)) {
+        return;
+    }
+
+    if (!ufs_init_wlu(u, &u->boot_wlu, UFS_UPIU_BOOT_WLUN, errp)) {
+        return;
+    }
+
+    if (!ufs_init_wlu(u, &u->rpmb_wlu, UFS_UPIU_RPMB_WLUN, errp)) {
+        return;
+    }
+}
+
+static void ufs_exit(PCIDevice *pci_dev)
+{
+    UfsHc *u = UFS(pci_dev);
+
+    if (u->dev_wlu) {
+        object_unref(OBJECT(u->dev_wlu));
+        u->dev_wlu = NULL;
+    }
+
+    if (u->report_wlu) {
+        object_unref(OBJECT(u->report_wlu));
+        u->report_wlu = NULL;
+    }
+
+    if (u->rpmb_wlu) {
+        object_unref(OBJECT(u->rpmb_wlu));
+        u->rpmb_wlu = NULL;
+    }
+
+    if (u->boot_wlu) {
+        object_unref(OBJECT(u->boot_wlu));
+        u->boot_wlu = NULL;
+    }
+
+    qemu_bh_delete(u->doorbell_bh);
+    qemu_bh_delete(u->complete_bh);
+
+    for (int i = 0; i < u->params.nutrs; i++) {
+        ufs_clear_req(&u->req_list[i]);
+    }
+    g_free(u->req_list);
+}
+
+static Property ufs_props[] = {
+    DEFINE_PROP_STRING("serial", UfsHc, params.serial),
+    DEFINE_PROP_UINT8("nutrs", UfsHc, params.nutrs, 32),
+    DEFINE_PROP_UINT8("nutmrs", UfsHc, params.nutmrs, 8),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription ufs_vmstate = {
+    .name = "ufs",
+    .unmigratable = 1,
+};
+
+static void ufs_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+    pc->realize = ufs_realize;
+    pc->exit = ufs_exit;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_UFS;
+    pc->class_id = PCI_CLASS_STORAGE_UFS;
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    dc->desc = "Universal Flash Storage";
+    device_class_set_props(dc, ufs_props);
+    dc->vmsd = &ufs_vmstate;
+}
+
+static bool ufs_bus_check_address(BusState *qbus, DeviceState *qdev,
+                                  Error **errp)
+{
+    SCSIDevice *dev = SCSI_DEVICE(qdev);
+    UfsBusClass *ubc = UFS_BUS_GET_CLASS(qbus);
+    UfsHc *u = UFS(qbus->parent);
+
+    if (strcmp(object_get_typename(OBJECT(dev)), TYPE_UFS_WLU) == 0) {
+        if (dev->lun != UFS_UPIU_REPORT_LUNS_WLUN &&
+            dev->lun != UFS_UPIU_UFS_DEVICE_WLUN &&
+            dev->lun != UFS_UPIU_BOOT_WLUN && dev->lun != UFS_UPIU_RPMB_WLUN) {
+            error_setg(errp, "bad well-known lun: %d", dev->lun);
+            return false;
+        }
+
+        if ((dev->lun == UFS_UPIU_REPORT_LUNS_WLUN && u->report_wlu != NULL) ||
+            (dev->lun == UFS_UPIU_UFS_DEVICE_WLUN && u->dev_wlu != NULL) ||
+            (dev->lun == UFS_UPIU_BOOT_WLUN && u->boot_wlu != NULL) ||
+            (dev->lun == UFS_UPIU_RPMB_WLUN && u->rpmb_wlu != NULL)) {
+            error_setg(errp, "well-known lun %d already exists", dev->lun);
+            return false;
+        }
+
+        return true;
+    }
+
+    if (strcmp(object_get_typename(OBJECT(dev)), TYPE_UFS_LU) != 0) {
+        error_setg(errp, "%s cannot be connected to ufs-bus",
+                   object_get_typename(OBJECT(dev)));
+        return false;
+    }
+
+    return ubc->parent_check_address(qbus, qdev, errp);
+}
+
+static void ufs_bus_class_init(ObjectClass *class, void *data)
+{
+    BusClass *bc = BUS_CLASS(class);
+    UfsBusClass *ubc = UFS_BUS_CLASS(class);
+    ubc->parent_check_address = bc->check_address;
+    bc->check_address = ufs_bus_check_address;
+}
+
+static const TypeInfo ufs_info = {
+    .name = TYPE_UFS,
+    .parent = TYPE_PCI_DEVICE,
+    .class_init = ufs_class_init,
+    .instance_size = sizeof(UfsHc),
+    .interfaces = (InterfaceInfo[]){ { INTERFACE_PCIE_DEVICE }, {} },
+};
+
+static const TypeInfo ufs_bus_info = {
+    .name = TYPE_UFS_BUS,
+    .parent = TYPE_SCSI_BUS,
+    .class_init = ufs_bus_class_init,
+    .class_size = sizeof(UfsBusClass),
+    .instance_size = sizeof(UfsBus),
+};
+
+static void ufs_register_types(void)
+{
+    type_register_static(&ufs_info);
+    type_register_static(&ufs_bus_info);
+}
+
+type_init(ufs_register_types)
diff --git a/hw/ufs/ufs.h b/hw/ufs/ufs.h
new file mode 100644
index 0000000000..f244228617
--- /dev/null
+++ b/hw/ufs/ufs.h
@@ -0,0 +1,131 @@
+/*
+ * QEMU UFS
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Written by Jeuk Kim <jeuk20.kim@samsung.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_UFS_UFS_H
+#define HW_UFS_UFS_H
+
+#include "hw/pci/pci_device.h"
+#include "hw/scsi/scsi.h"
+#include "block/ufs.h"
+
+#define UFS_MAX_LUS 32
+#define UFS_BLOCK_SIZE 4096
+
+typedef struct UfsBusClass {
+    BusClass parent_class;
+    bool (*parent_check_address)(BusState *bus, DeviceState *dev, Error **errp);
+} UfsBusClass;
+
+typedef struct UfsBus {
+    SCSIBus parent_bus;
+} UfsBus;
+
+#define TYPE_UFS_BUS "ufs-bus"
+DECLARE_OBJ_CHECKERS(UfsBus, UfsBusClass, UFS_BUS, TYPE_UFS_BUS)
+
+typedef enum UfsRequestState {
+    UFS_REQUEST_IDLE = 0,
+    UFS_REQUEST_READY = 1,
+    UFS_REQUEST_RUNNING = 2,
+    UFS_REQUEST_COMPLETE = 3,
+    UFS_REQUEST_ERROR = 4,
+} UfsRequestState;
+
+typedef enum UfsReqResult {
+    UFS_REQUEST_SUCCESS = 0,
+    UFS_REQUEST_FAIL = 1,
+    UFS_REQUEST_NO_COMPLETE = 2,
+} UfsReqResult;
+
+typedef struct UfsRequest {
+    struct UfsHc *hc;
+    UfsRequestState state;
+    int slot;
+
+    UtpTransferReqDesc utrd;
+    UtpUpiuReq req_upiu;
+    UtpUpiuRsp rsp_upiu;
+
+    /* for scsi command */
+    QEMUSGList *sg;
+} UfsRequest;
+
+typedef struct UfsLu {
+    SCSIDevice qdev;
+    uint8_t lun;
+    UnitDescriptor unit_desc;
+} UfsLu;
+
+typedef struct UfsWLu {
+    SCSIDevice qdev;
+    uint8_t lun;
+} UfsWLu;
+
+typedef struct UfsParams {
+    char *serial;
+    uint8_t nutrs; /* Number of UTP Transfer Request Slots */
+    uint8_t nutmrs; /* Number of UTP Task Management Request Slots */
+} UfsParams;
+
+typedef struct UfsHc {
+    PCIDevice parent_obj;
+    UfsBus bus;
+    MemoryRegion iomem;
+    UfsReg reg;
+    UfsParams params;
+    uint32_t reg_size;
+    UfsRequest *req_list;
+
+    UfsLu *lus[UFS_MAX_LUS];
+    UfsWLu *report_wlu;
+    UfsWLu *dev_wlu;
+    UfsWLu *boot_wlu;
+    UfsWLu *rpmb_wlu;
+    DeviceDescriptor device_desc;
+    GeometryDescriptor geometry_desc;
+    Attributes attributes;
+    Flags flags;
+
+    qemu_irq irq;
+    QEMUBH *doorbell_bh;
+    QEMUBH *complete_bh;
+} UfsHc;
+
+#define TYPE_UFS "ufs"
+#define UFS(obj) OBJECT_CHECK(UfsHc, (obj), TYPE_UFS)
+
+#define TYPE_UFS_LU "ufs-lu"
+#define UFSLU(obj) OBJECT_CHECK(UfsLu, (obj), TYPE_UFS_LU)
+
+#define TYPE_UFS_WLU "ufs-wlu"
+#define UFSWLU(obj) OBJECT_CHECK(UfsWLu, (obj), TYPE_UFS_WLU)
+
+typedef enum UfsQueryFlagPerm {
+    UFS_QUERY_FLAG_NONE = 0x0,
+    UFS_QUERY_FLAG_READ = 0x1,
+    UFS_QUERY_FLAG_SET = 0x2,
+    UFS_QUERY_FLAG_CLEAR = 0x4,
+    UFS_QUERY_FLAG_TOGGLE = 0x8,
+} UfsQueryFlagPerm;
+
+typedef enum UfsQueryAttrPerm {
+    UFS_QUERY_ATTR_NONE = 0x0,
+    UFS_QUERY_ATTR_READ = 0x1,
+    UFS_QUERY_ATTR_WRITE = 0x2,
+} UfsQueryAttrPerm;
+
+static inline bool is_wlun(uint8_t lun)
+{
+    return (lun == UFS_UPIU_REPORT_LUNS_WLUN ||
+            lun == UFS_UPIU_UFS_DEVICE_WLUN || lun == UFS_UPIU_BOOT_WLUN ||
+            lun == UFS_UPIU_RPMB_WLUN);
+}
+
+#endif /* HW_UFS_UFS_H */
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index c328660075..3ee9c73b87 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -518,7 +518,7 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
         goto out2;
     }
 
-    /* TODO: a passthru backened that works on local machine. third card type?*/
+    /* TODO: a passthru backend that works on local machine. third card type?*/
     if (card->backend == BACKEND_CERTIFICATES) {
         if (card->cert1 != NULL && card->cert2 != NULL && card->cert3 != NULL) {
             ret = emulated_initialize_vcard_from_certificates(card);
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index c930c60921..19b4534c20 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1464,7 +1464,7 @@ static int ehci_process_itd(EHCIState *ehci,
                     usb_handle_packet(dev, &ehci->ipacket);
                     usb_packet_unmap(&ehci->ipacket, &ehci->isgl);
                 } else {
-                    DPRINTF("ISOCH: attempt to addess non-iso endpoint\n");
+                    DPRINTF("ISOCH: attempt to address non-iso endpoint\n");
                     ehci->ipacket.status = USB_RET_NAK;
                     ehci->ipacket.actual_length = 0;
                 }
@@ -1513,7 +1513,7 @@ static int ehci_process_itd(EHCIState *ehci,
 
 
 /*  This state is the entry point for asynchronous schedule
- *  processing.  Entry here consitutes a EHCI start event state (4.8.5)
+ *  processing.  Entry here constitutes a EHCI start event state (4.8.5)
  */
 static int ehci_state_waitlisthead(EHCIState *ehci,  int async)
 {
@@ -2458,7 +2458,7 @@ static void usb_ehci_vm_state_change(void *opaque, bool running, RunState state)
     /*
      * The schedule rebuilt from guest memory could cause the migration dest
      * to miss a QH unlink, and fail to cancel packets, since the unlinked QH
-     * will never have existed on the destination. Therefor we must flush the
+     * will never have existed on the destination. Therefore we must flush the
      * async schedule on savevm to catch any not yet noticed unlinks.
      */
     if (state == RUN_STATE_SAVE_VM) {
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index cc5cde6983..7ff1b65ced 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1355,7 +1355,7 @@ static uint32_t ohci_get_frame_remaining(OHCIState *ohci)
     if ((ohci->ctl & OHCI_CTL_HCFS) != OHCI_USB_OPERATIONAL) {
         return ohci->frt << 31;
     }
-    /* Being in USB operational state guarnatees sof_time was set already. */
+    /* Being in USB operational state guarantees sof_time was set already. */
     tks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - ohci->sof_time;
     if (tks < 0) {
         tks = 0;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index b89b618ec2..324177ad5d 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2434,7 +2434,6 @@ static void xhci_detach_slot(XHCIState *xhci, USBPort *uport)
 static TRBCCode xhci_get_port_bandwidth(XHCIState *xhci, uint64_t pctx)
 {
     dma_addr_t ctx;
-    uint8_t bw_ctx[xhci->numports+1];
 
     DPRINTF("xhci_get_port_bandwidth()\n");
 
@@ -2442,11 +2441,10 @@ static TRBCCode xhci_get_port_bandwidth(XHCIState *xhci, uint64_t pctx)
 
     DPRINTF("xhci: bandwidth context at "DMA_ADDR_FMT"\n", ctx);
 
-    /* TODO: actually implement real values here */
-    bw_ctx[0] = 0;
-    memset(&bw_ctx[1], 80, xhci->numports); /* 80% */
-    if (dma_memory_write(xhci->as, ctx, bw_ctx, sizeof(bw_ctx),
-                     MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+    /* TODO: actually implement real values here. This is 80% for all ports. */
+    if (stb_dma(xhci->as, ctx, 0, MEMTXATTRS_UNSPECIFIED) != MEMTX_OK ||
+        dma_memory_set(xhci->as, ctx + 1, 80, xhci->numports,
+                       MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: DMA memory write failed!\n",
                       __func__);
         return CC_TRB_ERROR;
diff --git a/hw/usb/quirks.h b/hw/usb/quirks.h
index c3e595f40b..94b2c95341 100644
--- a/hw/usb/quirks.h
+++ b/hw/usb/quirks.h
@@ -67,7 +67,7 @@ static const struct usb_device_id usbredir_raw_serial_ids[] = {
     { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
     { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
     { USB_DEVICE(0x10C4, 0x8044) }, /* Cygnal Debug Adapter */
-    { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */
+    { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME built-in converter */
     { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
     { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */
     { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 39fbaaab16..c9893df867 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -278,7 +278,7 @@ static gboolean usbredir_write_unblocked(void *do_not_use, GIOCondition cond,
     dev->watch = 0;
     usbredirparser_do_write(dev->parser);
 
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 static int usbredir_write(void *priv, uint8_t *data, int count)
@@ -471,7 +471,7 @@ static int bufp_alloc(USBRedirDevice *dev, uint8_t *data, uint16_t len,
         DPRINTF("bufpq overflow, dropping packets ep %02X\n", ep);
         dev->endpoint[EP2I(ep)].bufpq_dropping_packets = 1;
     }
-    /* Since we're interupting the stream anyways, drop enough packets to get
+    /* Since we're interrupting the stream anyways, drop enough packets to get
        back to our target buffer size */
     if (dev->endpoint[EP2I(ep)].bufpq_dropping_packets) {
         if (dev->endpoint[EP2I(ep)].bufpq_size >
diff --git a/hw/usb/trace-events b/hw/usb/trace-events
index 6bb9655c8d..ed7dc210d3 100644
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -248,7 +248,7 @@ usb_set_device_feature(int addr, int feature, int ret) "dev %d, feature %d, ret
 
 # dev-hub.c
 usb_hub_reset(int addr) "dev %d"
-usb_hub_control(int addr, int request, int value, int index, int length) "dev %d, req 0x%x, value %d, index %d, langth %d"
+usb_hub_control(int addr, int request, int value, int index, int length) "dev %d, req 0x%x, value %d, index %d, length %d"
 usb_hub_get_port_status(int addr, int nr, int status, int changed) "dev %d, port %d, status 0x%x, changed 0x%x"
 usb_hub_set_port_feature(int addr, int nr, const char *f) "dev %d, port %d, feature %s"
 usb_hub_clear_port_feature(int addr, int nr, const char *f) "dev %d, port %d, feature %s"
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 38ee660a30..09ec326aea 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -451,7 +451,7 @@ static int usbback_check_and_submit(struct usbback_req *usbback_req)
     wValue = le16_to_cpu(ctrl->wValue);
 
     /*
-     * When the device is first connected or resetted, USB device has no
+     * When the device is first connected or reset, USB device has no
      * address. In this initial state, following requests are sent to device
      * address (#0),
      *
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..134649226d 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -27,6 +27,7 @@
 
 #include "hw/vfio/vfio-common.h"
 #include "hw/vfio/vfio.h"
+#include "hw/vfio/pci.h"
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
@@ -363,41 +364,54 @@ bool vfio_mig_active(void)
 
 static Error *multiple_devices_migration_blocker;
 
-static unsigned int vfio_migratable_device_num(void)
+/*
+ * Multiple devices migration is allowed only if all devices support P2P
+ * migration. Single device migration is allowed regardless of P2P migration
+ * support.
+ */
+static bool vfio_multiple_devices_migration_is_supported(void)
 {
     VFIOGroup *group;
     VFIODevice *vbasedev;
     unsigned int device_num = 0;
+    bool all_support_p2p = true;
 
     QLIST_FOREACH(group, &vfio_group_list, next) {
         QLIST_FOREACH(vbasedev, &group->device_list, next) {
             if (vbasedev->migration) {
                 device_num++;
+
+                if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) {
+                    all_support_p2p = false;
+                }
             }
         }
     }
 
-    return device_num;
+    return all_support_p2p || device_num <= 1;
 }
 
 int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
 {
     int ret;
 
-    if (multiple_devices_migration_blocker ||
-        vfio_migratable_device_num() <= 1) {
+    if (vfio_multiple_devices_migration_is_supported()) {
         return 0;
     }
 
     if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
-        error_setg(errp, "Migration is currently not supported with multiple "
-                         "VFIO devices");
+        error_setg(errp, "Multiple VFIO devices migration is supported only if "
+                         "all of them support P2P migration");
         return -EINVAL;
     }
 
+    if (multiple_devices_migration_blocker) {
+        return 0;
+    }
+
     error_setg(&multiple_devices_migration_blocker,
-               "Migration is currently not supported with multiple "
-               "VFIO devices");
+               "Multiple VFIO devices migration is supported only if all of "
+               "them support P2P migration");
     ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
     if (ret < 0) {
         error_free(multiple_devices_migration_blocker);
@@ -410,7 +424,7 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
 void vfio_unblock_multiple_devices_migration(void)
 {
     if (!multiple_devices_migration_blocker ||
-        vfio_migratable_device_num() > 1) {
+        !vfio_multiple_devices_migration_is_supported()) {
         return;
     }
 
@@ -437,6 +451,22 @@ static void vfio_set_migration_error(int err)
     }
 }
 
+bool vfio_device_state_is_running(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    return migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
+           migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P;
+}
+
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ||
+           migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P;
+}
+
 static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
 {
     VFIOGroup *group;
@@ -457,8 +487,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
             }
 
             if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
-                (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
-                 migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) {
+                (vfio_device_state_is_running(vbasedev) ||
+                 vfio_device_state_is_precopy(vbasedev))) {
                 return false;
             }
         }
@@ -503,8 +533,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
                 return false;
             }
 
-            if (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
-                migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
+            if (vfio_device_state_is_running(vbasedev) ||
+                vfio_device_state_is_precopy(vbasedev)) {
                 continue;
             } else {
                 return false;
@@ -1371,6 +1401,8 @@ typedef struct VFIODirtyRanges {
     hwaddr max32;
     hwaddr min64;
     hwaddr max64;
+    hwaddr minpci64;
+    hwaddr maxpci64;
 } VFIODirtyRanges;
 
 typedef struct VFIODirtyRangesListener {
@@ -1379,6 +1411,31 @@ typedef struct VFIODirtyRangesListener {
     MemoryListener listener;
 } VFIODirtyRangesListener;
 
+static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
+                                     VFIOContainer *container)
+{
+    VFIOPCIDevice *pcidev;
+    VFIODevice *vbasedev;
+    VFIOGroup *group;
+    Object *owner;
+
+    owner = memory_region_owner(section->mr);
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if (OBJECT(pcidev) == owner) {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
 static void vfio_dirty_tracking_update(MemoryListener *listener,
                                        MemoryRegionSection *section)
 {
@@ -1395,19 +1452,32 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
     }
 
     /*
-     * The address space passed to the dirty tracker is reduced to two ranges:
-     * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges.
+     * The address space passed to the dirty tracker is reduced to three ranges:
+     * one for 32-bit DMA ranges, one for 64-bit DMA ranges and one for the
+     * PCI 64-bit hole.
+     *
      * The underlying reports of dirty will query a sub-interval of each of
      * these ranges.
      *
-     * The purpose of the dual range handling is to handle known cases of big
-     * holes in the address space, like the x86 AMD 1T hole. The alternative
-     * would be an IOVATree but that has a much bigger runtime overhead and
-     * unnecessary complexity.
+     * The purpose of the three range handling is to handle known cases of big
+     * holes in the address space, like the x86 AMD 1T hole, and firmware (like
+     * OVMF) which may relocate the pci-hole64 to the end of the address space.
+     * The latter would otherwise generate large ranges for tracking, stressing
+     * the limits of supported hardware. The pci-hole32 will always be below 4G
+     * (overlapping or not) so it doesn't need special handling and is part of
+     * the 32-bit range.
+     *
+     * The alternative would be an IOVATree but that has a much bigger runtime
+     * overhead and unnecessary complexity.
      */
-    min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
-    max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
-
+    if (vfio_section_is_vfio_pci(section, dirty->container) &&
+        iova >= UINT32_MAX) {
+        min = &range->minpci64;
+        max = &range->maxpci64;
+    } else {
+        min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
+        max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
+    }
     if (*min > iova) {
         *min = iova;
     }
@@ -1432,6 +1502,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
     memset(&dirty, 0, sizeof(dirty));
     dirty.ranges.min32 = UINT32_MAX;
     dirty.ranges.min64 = UINT64_MAX;
+    dirty.ranges.minpci64 = UINT64_MAX;
     dirty.listener = vfio_dirty_tracking_listener;
     dirty.container = container;
 
@@ -1502,7 +1573,8 @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
      * DMA logging uAPI guarantees to support at least a number of ranges that
      * fits into a single host kernel base page.
      */
-    control->num_ranges = !!tracking->max32 + !!tracking->max64;
+    control->num_ranges = !!tracking->max32 + !!tracking->max64 +
+        !!tracking->maxpci64;
     ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
                         control->num_ranges);
     if (!ranges) {
@@ -1521,11 +1593,17 @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
     if (tracking->max64) {
         ranges->iova = tracking->min64;
         ranges->length = (tracking->max64 - tracking->min64) + 1;
+        ranges++;
+    }
+    if (tracking->maxpci64) {
+        ranges->iova = tracking->minpci64;
+        ranges->length = (tracking->maxpci64 - tracking->minpci64) + 1;
     }
 
     trace_vfio_device_dirty_tracking_start(control->num_ranges,
                                            tracking->min32, tracking->max32,
-                                           tracking->min64, tracking->max64);
+                                           tracking->min64, tracking->max64,
+                                           tracking->minpci64, tracking->maxpci64);
 
     return feature;
 }
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 2674f4bc47..da43dcd2fe 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -71,8 +71,12 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
         return "STOP_COPY";
     case VFIO_DEVICE_STATE_RESUMING:
         return "RESUMING";
+    case VFIO_DEVICE_STATE_RUNNING_P2P:
+        return "RUNNING_P2P";
     case VFIO_DEVICE_STATE_PRE_COPY:
         return "PRE_COPY";
+    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
+        return "PRE_COPY_P2P";
     default:
         return "UNKNOWN STATE";
     }
@@ -331,6 +335,36 @@ static bool vfio_precopy_supported(VFIODevice *vbasedev)
 
 /* ---------------------------------------------------------------------- */
 
+static int vfio_save_prepare(void *opaque, Error **errp)
+{
+    VFIODevice *vbasedev = opaque;
+
+    /*
+     * Snapshot doesn't use postcopy nor background snapshot, so allow snapshot
+     * even if they are on.
+     */
+    if (runstate_check(RUN_STATE_SAVE_VM)) {
+        return 0;
+    }
+
+    if (migrate_postcopy_ram()) {
+        error_setg(
+            errp, "%s: VFIO migration is not supported with postcopy migration",
+            vbasedev->name);
+        return -EOPNOTSUPP;
+    }
+
+    if (migrate_background_snapshot()) {
+        error_setg(
+            errp,
+            "%s: VFIO migration is not supported with background snapshot",
+            vbasedev->name);
+        return -EOPNOTSUPP;
+    }
+
+    return 0;
+}
+
 static int vfio_save_setup(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
@@ -383,6 +417,19 @@ static void vfio_save_cleanup(void *opaque)
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
 
+    /*
+     * Changing device state from STOP_COPY to STOP can take time. Do it here,
+     * after migration has completed, so it won't increase downtime.
+     */
+    if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
+        /*
+         * If setting the device in STOP state fails, the device should be
+         * reset. To do so, use ERROR state as a recover state.
+         */
+        vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
+                                 VFIO_DEVICE_STATE_ERROR);
+    }
+
     g_free(migration->data_buffer);
     migration->data_buffer = NULL;
     migration->precopy_init_size = 0;
@@ -398,7 +445,7 @@ static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy,
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
 
-    if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) {
+    if (!vfio_device_state_is_precopy(vbasedev)) {
         return;
     }
 
@@ -431,7 +478,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
     vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
     *must_precopy += stop_copy_size;
 
-    if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
+    if (vfio_device_state_is_precopy(vbasedev)) {
         vfio_query_precopy_size(migration);
 
         *must_precopy +=
@@ -446,9 +493,8 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
 static bool vfio_is_active_iterate(void *opaque)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
 
-    return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
+    return vfio_device_state_is_precopy(vbasedev);
 }
 
 static int vfio_save_iterate(QEMUFile *f, void *opaque)
@@ -508,12 +554,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
         return ret;
     }
 
-    /*
-     * If setting the device in STOP state fails, the device should be reset.
-     * To do so, use ERROR state as a recover state.
-     */
-    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
-                                   VFIO_DEVICE_STATE_ERROR);
     trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
     return ret;
@@ -630,6 +670,7 @@ static bool vfio_switchover_ack_needed(void *opaque)
 }
 
 static const SaveVMHandlers savevm_vfio_handlers = {
+    .save_prepare = vfio_save_prepare,
     .save_setup = vfio_save_setup,
     .save_cleanup = vfio_save_cleanup,
     .state_pending_estimate = vfio_state_pending_estimate,
@@ -646,18 +687,50 @@ static const SaveVMHandlers savevm_vfio_handlers = {
 
 /* ---------------------------------------------------------------------- */
 
-static void vfio_vmstate_change(void *opaque, bool running, RunState state)
+static void vfio_vmstate_change_prepare(void *opaque, bool running,
+                                        RunState state)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
     enum vfio_device_mig_state new_state;
     int ret;
 
+    new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ?
+                    VFIO_DEVICE_STATE_PRE_COPY_P2P :
+                    VFIO_DEVICE_STATE_RUNNING_P2P;
+
+    /*
+     * If setting the device in new_state fails, the device should be reset.
+     * To do so, use ERROR state as a recover state.
+     */
+    ret = vfio_migration_set_state(vbasedev, new_state,
+                                   VFIO_DEVICE_STATE_ERROR);
+    if (ret) {
+        /*
+         * Migration should be aborted in this case, but vm_state_notify()
+         * currently does not support reporting failures.
+         */
+        if (migrate_get_current()->to_dst_file) {
+            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+        }
+    }
+
+    trace_vfio_vmstate_change_prepare(vbasedev->name, running,
+                                      RunState_str(state),
+                                      mig_state_to_str(new_state));
+}
+
+static void vfio_vmstate_change(void *opaque, bool running, RunState state)
+{
+    VFIODevice *vbasedev = opaque;
+    enum vfio_device_mig_state new_state;
+    int ret;
+
     if (running) {
         new_state = VFIO_DEVICE_STATE_RUNNING;
     } else {
         new_state =
-            (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY &&
+            (vfio_device_state_is_precopy(vbasedev) &&
              (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ?
                 VFIO_DEVICE_STATE_STOP_COPY :
                 VFIO_DEVICE_STATE_STOP;
@@ -753,6 +826,7 @@ static int vfio_migration_init(VFIODevice *vbasedev)
     char id[256] = "";
     g_autofree char *path = NULL, *oid = NULL;
     uint64_t mig_flags = 0;
+    VMChangeStateHandler *prepare_cb;
 
     if (!vbasedev->ops->vfio_get_object) {
         return -EINVAL;
@@ -793,9 +867,11 @@ static int vfio_migration_init(VFIODevice *vbasedev)
     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
                          vbasedev);
 
-    migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
-                                                           vfio_vmstate_change,
-                                                           vbasedev);
+    prepare_cb = migration->mig_flags & VFIO_MIGRATION_P2P ?
+                     vfio_vmstate_change_prepare :
+                     NULL;
+    migration->vm_state = qdev_add_vm_change_state_handler_full(
+        vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev);
     migration->migration_state.notify = vfio_migration_state_notifier;
     add_migration_state_change_notifier(&migration->migration_state);
 
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index f4ff836805..84b1a7b948 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1610,121 +1610,6 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
     return 0;
 }
 
-int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
-{
-    int ret;
-    void *p;
-    struct vfio_region_info *nv2reg = NULL;
-    struct vfio_info_cap_header *hdr;
-    struct vfio_region_info_cap_nvlink2_ssatgt *cap;
-    VFIOQuirk *quirk;
-
-    ret = vfio_get_dev_region_info(&vdev->vbasedev,
-                                   VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
-                                   PCI_VENDOR_ID_NVIDIA,
-                                   VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
-                                   &nv2reg);
-    if (ret) {
-        return ret;
-    }
-
-    hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
-    if (!hdr) {
-        ret = -ENODEV;
-        goto free_exit;
-    }
-    cap = (void *) hdr;
-
-    p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE,
-             MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
-    if (p == MAP_FAILED) {
-        ret = -errno;
-        goto free_exit;
-    }
-
-    quirk = vfio_quirk_alloc(1);
-    memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
-                               nv2reg->size, p);
-    QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
-
-    object_property_add_uint64_ptr(OBJECT(vdev), "nvlink2-tgt",
-                                   (uint64_t *) &cap->tgt,
-                                   OBJ_PROP_FLAG_READ);
-    trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
-                                          nv2reg->size);
-free_exit:
-    g_free(nv2reg);
-
-    return ret;
-}
-
-int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
-{
-    int ret;
-    void *p;
-    struct vfio_region_info *atsdreg = NULL;
-    struct vfio_info_cap_header *hdr;
-    struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
-    struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
-    VFIOQuirk *quirk;
-
-    ret = vfio_get_dev_region_info(&vdev->vbasedev,
-                                   VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
-                                   PCI_VENDOR_ID_IBM,
-                                   VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
-                                   &atsdreg);
-    if (ret) {
-        return ret;
-    }
-
-    hdr = vfio_get_region_info_cap(atsdreg,
-                                   VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
-    if (!hdr) {
-        ret = -ENODEV;
-        goto free_exit;
-    }
-    captgt = (void *) hdr;
-
-    hdr = vfio_get_region_info_cap(atsdreg,
-                                   VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
-    if (!hdr) {
-        ret = -ENODEV;
-        goto free_exit;
-    }
-    capspeed = (void *) hdr;
-
-    /* Some NVLink bridges may not have assigned ATSD */
-    if (atsdreg->size) {
-        p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE,
-                 MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
-        if (p == MAP_FAILED) {
-            ret = -errno;
-            goto free_exit;
-        }
-
-        quirk = vfio_quirk_alloc(1);
-        memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
-                                          "nvlink2-atsd-mr", atsdreg->size, p);
-        QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
-    }
-
-    object_property_add_uint64_ptr(OBJECT(vdev), "nvlink2-tgt",
-                                   (uint64_t *) &captgt->tgt,
-                                   OBJ_PROP_FLAG_READ);
-    trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
-                                              atsdreg->size);
-
-    object_property_add_uint32_ptr(OBJECT(vdev), "nvlink2-link-speed",
-                                   &capspeed->link_speed,
-                                   OBJ_PROP_FLAG_READ);
-    trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
-                                              capspeed->link_speed);
-free_exit:
-    g_free(atsdreg);
-
-    return ret;
-}
-
 /*
  * The VMD endpoint provides a real PCIe domain to the guest and the guest
  * kernel performs enumeration of the VMD sub-device domain. Guest transactions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..3b2ca3c24c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3271,20 +3271,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         }
     }
 
-    if (vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
-        ret = vfio_pci_nvidia_v100_ram_init(vdev, errp);
-        if (ret && ret != -ENODEV) {
-            error_report("Failed to setup NVIDIA V100 GPU RAM");
-        }
-    }
-
-    if (vfio_pci_is(vdev, PCI_VENDOR_ID_IBM, PCI_ANY_ID)) {
-        ret = vfio_pci_nvlink2_init(vdev, errp);
-        if (ret && ret != -ENODEV) {
-            error_report("Failed to setup NVlink2 bridge");
-        }
-    }
-
     if (!pdev->failover_pair_id) {
         if (!vfio_migration_realize(vbasedev, errp)) {
             goto out_deregister;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..2d836093a8 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -221,8 +221,6 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
 int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
                                struct vfio_region_info *info,
                                Error **errp);
-int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp);
-int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp);
 
 void vfio_display_reset(VFIOPCIDevice *vdev);
 int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index ee7509e68e..e64ca4a019 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -82,10 +82,6 @@ vfio_ioeventfd_handler(const char *name, uint64_t addr, unsigned size, uint64_t
 vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data, bool vfio) "%s+0x%"PRIx64"[%d]:0x%"PRIx64" vfio:%d"
 vfio_pci_igd_opregion_enabled(const char *name) "%s"
 
-vfio_pci_nvidia_gpu_setup_quirk(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
-vfio_pci_nvlink2_setup_quirk_ssatgt(const char *name, uint64_t tgt, uint64_t size) "%s tgt=0x%"PRIx64" size=0x%"PRIx64
-vfio_pci_nvlink2_setup_quirk_lnkspd(const char *name, uint32_t link_speed) "%s link_speed=0x%x"
-
 # igd.c
 vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [0x%03x] 0x%08x -> 0x%08x"
 vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
@@ -104,7 +100,7 @@ vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_wi
 vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
 vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]"
-vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"]"
+vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
 vfio_disconnect_container(int fd) "close container->fd=%d"
 vfio_put_group(int fd) "close group->fd=%d"
 vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
@@ -167,3 +163,4 @@ vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer
 vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
 vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
 vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
+vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 13aec771e1..0e2cc8d5a8 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -655,7 +655,7 @@ virtio_crypto_sym_op_helper(VirtIODevice *vdev,
     op_info->len_to_hash = len_to_hash;
     op_info->cipher_start_src_offset = cipher_start_src_offset;
     op_info->len_to_cipher = len_to_cipher;
-    /* Handle the initilization vector */
+    /* Handle the initialization vector */
     if (op_info->iv_len > 0) {
         DPRINTF("iv_len=%" PRIu32 "\n", op_info->iv_len);
         op_info->iv = op_info->data + curr_size;
@@ -1278,7 +1278,7 @@ static void virtio_crypto_instance_init(Object *obj)
 
     /*
      * The default config_size is sizeof(struct virtio_crypto_config).
-     * Can be overriden with virtio_crypto_set_config_size.
+     * Can be overridden with virtio_crypto_set_config_size.
      */
     vcrypto->config_size = sizeof(struct virtio_crypto_config);
 }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index b6e781741e..da5b09cefc 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -1119,7 +1119,7 @@ static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
         return -EINVAL;
     }
     /*
-     * Note: Preparation for resizeable memory regions. The maximum size
+     * Note: Preparation for resizable memory regions. The maximum size
      * of the memory region must not change during migration.
      */
     if (tmp->region_size != new_region_size) {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..4577f3f5b3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2096,7 +2096,7 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
      * being converted to LOG_GUEST_ERROR.
      *
     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
-        error_report("queue_enable is only suppported in devices of virtio "
+        error_report("queue_enable is only supported in devices of virtio "
                      "1.0 or later.");
     }
     */
@@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
 }
 
 /* A wrapper for use as a VMState .get function */
-static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
-                             const VMStateField *field)
+static int coroutine_mixed_fn
+virtio_device_get(QEMUFile *f, void *opaque, size_t size,
+                  const VMStateField *field)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
@@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
     return bad ? -1 : 0;
 }
 
+typedef struct VirtioSetFeaturesNocheckData {
+    Coroutine *co;
+    VirtIODevice *vdev;
+    uint64_t val;
+    int ret;
+} VirtioSetFeaturesNocheckData;
+
+static void virtio_set_features_nocheck_bh(void *opaque)
+{
+    VirtioSetFeaturesNocheckData *data = opaque;
+
+    data->ret = virtio_set_features_nocheck(data->vdev, data->val);
+    aio_co_wake(data->co);
+}
+
+static int coroutine_mixed_fn
+virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
+{
+    if (qemu_in_coroutine()) {
+        VirtioSetFeaturesNocheckData data = {
+            .co = qemu_coroutine_self(),
+            .vdev = vdev,
+            .val = val,
+        };
+        aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
+                                virtio_set_features_nocheck_bh, &data);
+        qemu_coroutine_yield();
+        return data.ret;
+    } else {
+        return virtio_set_features_nocheck(vdev, val);
+    }
+}
+
 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
 {
     int ret;
@@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
     return config_size;
 }
 
-int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
+int coroutine_mixed_fn
+virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
 {
     int i, ret;
     int32_t config_len;
@@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
          * host_features.
          */
         uint64_t features64 = vdev->guest_features;
-        if (virtio_set_features_nocheck(vdev, features64) < 0) {
+        if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
             error_report("Features 0x%" PRIx64 " unsupported. "
                          "Allowed features: 0x%" PRIx64,
                          features64, vdev->host_features);
             return -1;
         }
     } else {
-        if (virtio_set_features_nocheck(vdev, features) < 0) {
+        if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
             error_report("Features 0x%x unsupported. "
                          "Allowed features: 0x%" PRIx64,
                          features, vdev->host_features);
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index b20744f7c7..31bcfdf705 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -340,11 +340,9 @@ static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar)
     return s->msix && s->msix->bar_index == bar;
 }
 
-extern void *pci_assign_dev_load_option_rom(PCIDevice *dev,
-                                            int *size,
-                                            unsigned int domain,
-                                            unsigned int bus, unsigned int slot,
-                                            unsigned int function);
+void *pci_assign_dev_load_option_rom(PCIDevice *dev, int *size,
+                                     unsigned int domain, unsigned int bus,
+                                     unsigned int slot, unsigned int function);
 static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev)
 {
     return (xen_igd_gfx_pt_enabled()
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index be1504b82c..c5ad71e8dc 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -89,7 +89,7 @@ char *xenstore_read_str(const char *base, const char *node)
     str = qemu_xen_xs_read(xenstore, 0, abspath, &len);
     if (str != NULL) {
         /* move to qemu-allocated memory to make sure
-         * callers can savely g_free() stuff. */
+         * callers can safely g_free() stuff. */
         ret = g_strdup(str);
         free(str);
     }
diff --git a/hw/xtensa/pic_cpu.c b/hw/xtensa/pic_cpu.c
index 6c9447565d..8cef88c61b 100644
--- a/hw/xtensa/pic_cpu.c
+++ b/hw/xtensa/pic_cpu.c
@@ -30,6 +30,7 @@
 #include "hw/irq.h"
 #include "qemu/log.h"
 #include "qemu/timer.h"
+#include "qemu/atomic.h"
 
 void check_interrupts(CPUXtensaState *env)
 {
diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c
index 946c71cb5b..2160e61964 100644
--- a/hw/xtensa/sim.c
+++ b/hw/xtensa/sim.c
@@ -96,16 +96,11 @@ XtensaCPU *xtensa_sim_common_init(MachineState *machine)
 void xtensa_sim_load_kernel(XtensaCPU *cpu, MachineState *machine)
 {
     const char *kernel_filename = machine->kernel_filename;
-#if TARGET_BIG_ENDIAN
-    int big_endian = true;
-#else
-    int big_endian = false;
-#endif
 
     if (kernel_filename) {
         uint64_t elf_entry;
         int success = load_elf(kernel_filename, NULL, translate_phys_addr, cpu,
-                               &elf_entry, NULL, NULL, NULL, big_endian,
+                               &elf_entry, NULL, NULL, NULL, TARGET_BIG_ENDIAN,
                                EM_XTENSA, 0, 0);
 
         if (success > 0) {
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index 2a5556a35f..fbad1c83a3 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -219,11 +219,6 @@ static const MemoryRegionOps xtfpga_io_ops = {
 
 static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
 {
-#if TARGET_BIG_ENDIAN
-    int be = 1;
-#else
-    int be = 0;
-#endif
     MemoryRegion *system_memory = get_system_memory();
     XtensaCPU *cpu = NULL;
     CPUXtensaState *env = NULL;
@@ -316,7 +311,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
 
     dinfo = drive_get(IF_PFLASH, 0, 0);
     if (dinfo) {
-        flash = xtfpga_flash_init(system_io, board, dinfo, be);
+        flash = xtfpga_flash_init(system_io, board, dinfo, TARGET_BIG_ENDIAN);
     }
 
     /* Use presence of kernel file name as 'boot from SRAM' switch. */
@@ -412,7 +407,8 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
 
         uint64_t elf_entry;
         int success = load_elf(kernel_filename, NULL, translate_phys_addr, cpu,
-                &elf_entry, NULL, NULL, NULL, be, EM_XTENSA, 0, 0);
+                               &elf_entry, NULL, NULL, NULL, TARGET_BIG_ENDIAN,
+                               EM_XTENSA, 0, 0);
         if (success > 0) {
             entry_point = elf_entry;
         } else {
diff --git a/include/block/aio.h b/include/block/aio.h
index 32042e8905..f08b358077 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -31,7 +31,6 @@ typedef void BlockCompletionFunc(void *opaque, int ret);
 
 typedef struct AIOCBInfo {
     void (*cancel_async)(BlockAIOCB *acb);
-    AioContext *(*get_aio_context)(BlockAIOCB *acb);
     size_t aiocb_size;
 } AIOCBInfo;
 
@@ -468,7 +467,7 @@ void aio_dispatch(AioContext *ctx);
  * or more AIO events have completed, to ensure something has moved
  * before returning.
  */
-bool aio_poll(AioContext *ctx, bool blocking);
+bool no_coroutine_fn aio_poll(AioContext *ctx, bool blocking);
 
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
diff --git a/include/block/block-common.h b/include/block/block-common.h
index df5ffc8d09..2d2af7230d 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -66,10 +66,14 @@
  * function. The coroutine yields after scheduling the BH and is reentered when
  * the wrapped function returns.
  *
+ * A no_co_wrapper_bdrv_wrlock function is a no_co_wrapper function that
+ * automatically takes the graph wrlock when calling the wrapped function.
+ *
  * If the first parameter of the function is a BlockDriverState, BdrvChild or
  * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
  */
 #define no_co_wrapper
+#define no_co_wrapper_bdrv_wrlock
 
 #include "block/blockjob.h"
 
@@ -287,6 +291,8 @@ typedef enum {
  *                       layer rather than any backing, set by block layer
  * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
  *                 layer, set by block layer
+ * BDRV_BLOCK_COMPRESSED: the underlying data is compressed; only valid for
+ *                        the formats supporting compression: qcow, qcow2
  *
  * Internal flags:
  * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
@@ -322,6 +328,7 @@ typedef enum {
 #define BDRV_BLOCK_ALLOCATED    0x10
 #define BDRV_BLOCK_EOF          0x20
 #define BDRV_BLOCK_RECURSE      0x40
+#define BDRV_BLOCK_COMPRESSED   0x80
 
 typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
 
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index f347199bff..6061220a6c 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -185,6 +185,8 @@ void bdrv_drain_all_begin_nopoll(void);
 void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
+void bdrv_aio_cancel(BlockAIOCB *acb);
+
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 BlockDriverState *bdrv_find_node(const char *node_name);
@@ -224,13 +226,21 @@ void bdrv_img_create(const char *filename, const char *fmt,
 void bdrv_ref(BlockDriverState *bs);
 void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
 void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
-                             BlockDriverState *child_bs,
-                             const char *child_name,
-                             const BdrvChildClass *child_class,
-                             BdrvChildRole child_role,
-                             Error **errp);
+void GRAPH_WRLOCK bdrv_schedule_unref(BlockDriverState *bs);
+
+void GRAPH_WRLOCK
+bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+
+void coroutine_fn no_co_wrapper_bdrv_wrlock
+bdrv_co_unref_child(BlockDriverState *parent, BdrvChild *child);
+
+BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child(BlockDriverState *parent_bs,
+                  BlockDriverState *child_bs,
+                  const char *child_name,
+                  const BdrvChildClass *child_class,
+                  BdrvChildRole child_role,
+                  Error **errp);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
@@ -268,9 +278,11 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
 
-void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
-                    Error **errp);
-void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+void GRAPH_WRLOCK
+bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, Error **errp);
+
+void GRAPH_WRLOCK
+bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
 
 /**
  *
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 6db48f2d35..f1c796a1ce 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -101,7 +101,6 @@ bdrv_co_delete_file_noerr(BlockDriverState *bs);
 
 
 /* async block I/O */
-void bdrv_aio_cancel(BlockAIOCB *acb);
 void bdrv_aio_cancel_async(BlockAIOCB *acb);
 
 /* sg packet commands */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 74195c3004..2ca3758cb8 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -311,7 +311,7 @@ struct BlockDriver {
      */
     void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
 
-    int (*bdrv_inactivate)(BlockDriverState *bs);
+    int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs);
 
     int (*bdrv_snapshot_create)(BlockDriverState *bs,
                                 QEMUSnapshotInfo *sn_info);
@@ -393,10 +393,11 @@ struct BlockDriver {
      */
     int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
 
-    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
-                           Error **errp);
-    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
-                           Error **errp);
+    void GRAPH_WRLOCK_PTR (*bdrv_add_child)(
+        BlockDriverState *parent, BlockDriverState *child, Error **errp);
+
+    void GRAPH_WRLOCK_PTR (*bdrv_del_child)(
+        BlockDriverState *parent, BdrvChild *child, Error **errp);
 
     /**
      * Informs the block driver that a permission change is intended. The
@@ -413,12 +414,12 @@ struct BlockDriver {
      * If both conditions are met, 0 is returned. Otherwise, -errno is returned
      * and errp is set to an error describing the conflict.
      */
-    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
-                           uint64_t shared, Error **errp);
+    int GRAPH_RDLOCK_PTR (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+                                            uint64_t shared, Error **errp);
 
     /**
      * Called to inform the driver that the set of cumulative set of used
-     * permissions for @bs has changed to @perm, and the set of sharable
+     * permissions for @bs has changed to @perm, and the set of shareable
      * permission to @shared. The driver can use this to propagate changes to
      * its children (i.e. request permissions only if a parent actually needs
      * them).
@@ -426,7 +427,8 @@ struct BlockDriver {
      * This function is only invoked after bdrv_check_perm(), so block drivers
      * may rely on preparations made in their .bdrv_check_perm implementation.
      */
-    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+    void GRAPH_RDLOCK_PTR (*bdrv_set_perm)(
+        BlockDriverState *bs, uint64_t perm, uint64_t shared);
 
     /*
      * Called to inform the driver that after a previous bdrv_check_perm()
@@ -436,7 +438,7 @@ struct BlockDriver {
      * This function can be called even for nodes that never saw a
      * bdrv_check_perm() call. It is a no-op then.
      */
-    void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+    void GRAPH_RDLOCK_PTR (*bdrv_abort_perm_update)(BlockDriverState *bs);
 
     /**
      * Returns in @nperm and @nshared the permissions that the driver for @bs
@@ -450,11 +452,11 @@ struct BlockDriver {
      * permissions, but those that will be needed after applying the
      * @reopen_queue.
      */
-     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
-                             BdrvChildRole role,
-                             BlockReopenQueue *reopen_queue,
-                             uint64_t parent_perm, uint64_t parent_shared,
-                             uint64_t *nperm, uint64_t *nshared);
+     void GRAPH_RDLOCK_PTR (*bdrv_child_perm)(
+        BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+        BlockReopenQueue *reopen_queue,
+        uint64_t parent_perm, uint64_t parent_shared,
+        uint64_t *nperm, uint64_t *nshared);
 
     /**
      * Register/unregister a buffer for I/O. For example, when the driver is
@@ -944,8 +946,8 @@ struct BdrvChildClass {
      * when migration is completing) and it can start/stop requesting
      * permissions and doing I/O on it.
      */
-    void (*activate)(BdrvChild *child, Error **errp);
-    int (*inactivate)(BdrvChild *child);
+    void GRAPH_RDLOCK_PTR (*activate)(BdrvChild *child, Error **errp);
+    int GRAPH_RDLOCK_PTR (*inactivate)(BdrvChild *child);
 
     void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
     void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
@@ -1231,7 +1233,7 @@ struct BlockDriverState {
     unsigned int write_gen;               /* Current data generation */
 
     /* Protected by reqs_lock.  */
-    CoMutex reqs_lock;
+    QemuMutex reqs_lock;
     QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
     CoQueue flush_queue;                  /* Serializing flush queue */
     bool active_flush_req;                /* Flush request in flight? */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index da5fb31089..074b677838 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -202,18 +202,19 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   BdrvChildRole child_role,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp);
-void bdrv_root_unref_child(BdrvChild *child);
+void GRAPH_WRLOCK bdrv_root_unref_child(BdrvChild *child);
 
-void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
-                              uint64_t *shared_perm);
+void GRAPH_RDLOCK bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+                                           uint64_t *shared_perm);
 
 /**
  * Sets a BdrvChild's permissions.  Avoid if the parent is a BDS; use
  * bdrv_child_refresh_perms() instead and make the parent's
  * .bdrv_child_perm() implementation return the correct values.
  */
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
-                            Error **errp);
+int GRAPH_RDLOCK
+bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                        Error **errp);
 
 /**
  * Calls bs->drv->bdrv_child_perm() and updates the child's permission
@@ -223,7 +224,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
  * values than before, but which will not result in the block layer
  * automatically refreshing the permissions.
  */
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
+int GRAPH_RDLOCK
+bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
 
 bool GRAPH_RDLOCK bdrv_recurse_can_replace(BlockDriverState *bs,
                                            BlockDriverState *to_replace);
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4428bcffbb..f672b76173 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -324,8 +324,7 @@ typedef struct NBDExportInfo {
     char **contexts;
 } NBDExportInfo;
 
-int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                          QCryptoTLSCreds *tlscreds,
+int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                           const char *hostname, QIOChannel **outioc,
                           NBDExportInfo *info, Error **errp);
 void nbd_free_export_list(NBDExportInfo *info, int count);
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 18d48ddb70..8663971c58 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -36,9 +36,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 int bdrv_query_snapshot_info_list(BlockDriverState *bs,
                                   SnapshotInfoList **p_list,
                                   Error **errp);
-void bdrv_query_block_node_info(BlockDriverState *bs,
-                                BlockNodeInfo **p_info,
-                                Error **errp);
 void bdrv_query_image_info(BlockDriverState *bs,
                            ImageInfo **p_info,
                            bool flat,
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index ff282fc0f8..2355e8d9de 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -37,7 +37,7 @@ typedef struct ThrottleGroupMember {
     AioContext   *aio_context;
     /* throttled_reqs_lock protects the CoQueues for throttled requests.  */
     CoMutex      throttled_reqs_lock;
-    CoQueue      throttled_reqs[2];
+    CoQueue      throttled_reqs[THROTTLE_MAX];
 
     /* Nonzero if the I/O limits are currently being ignored; generally
      * it is zero.  Accessed with atomic operations.
@@ -54,7 +54,7 @@ typedef struct ThrottleGroupMember {
      * throttle_state tells us if I/O limits are configured. */
     ThrottleState *throttle_state;
     ThrottleTimers throttle_timers;
-    unsigned       pending_reqs[2];
+    unsigned       pending_reqs[THROTTLE_MAX];
     QLIST_ENTRY(ThrottleGroupMember) round_robin;
 
 } ThrottleGroupMember;
@@ -78,7 +78,7 @@ void throttle_group_restart_tgm(ThrottleGroupMember *tgm);
 
 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
                                                         int64_t bytes,
-                                                        bool is_write);
+                                                        ThrottleDirection direction);
 void throttle_group_attach_aio_context(ThrottleGroupMember *tgm,
                                        AioContext *new_context);
 void throttle_group_detach_aio_context(ThrottleGroupMember *tgm);
diff --git a/include/block/ufs.h b/include/block/ufs.h
new file mode 100644
index 0000000000..fd884eb8ce
--- /dev/null
+++ b/include/block/ufs.h
@@ -0,0 +1,1090 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef BLOCK_UFS_H
+#define BLOCK_UFS_H
+
+#include "hw/registerfields.h"
+
+typedef struct QEMU_PACKED UfsReg {
+    uint32_t cap;
+    uint32_t rsvd0;
+    uint32_t ver;
+    uint32_t rsvd1;
+    uint32_t hcpid;
+    uint32_t hcmid;
+    uint32_t ahit;
+    uint32_t rsvd2;
+    uint32_t is;
+    uint32_t ie;
+    uint32_t rsvd3[2];
+    uint32_t hcs;
+    uint32_t hce;
+    uint32_t uecpa;
+    uint32_t uecdl;
+    uint32_t uecn;
+    uint32_t uect;
+    uint32_t uecdme;
+    uint32_t utriacr;
+    uint32_t utrlba;
+    uint32_t utrlbau;
+    uint32_t utrldbr;
+    uint32_t utrlclr;
+    uint32_t utrlrsr;
+    uint32_t utrlcnr;
+    uint32_t rsvd4[2];
+    uint32_t utmrlba;
+    uint32_t utmrlbau;
+    uint32_t utmrldbr;
+    uint32_t utmrlclr;
+    uint32_t utmrlrsr;
+    uint32_t rsvd5[3];
+    uint32_t uiccmd;
+    uint32_t ucmdarg1;
+    uint32_t ucmdarg2;
+    uint32_t ucmdarg3;
+    uint32_t rsvd6[4];
+    uint32_t rsvd7[4];
+    uint32_t rsvd8[16];
+    uint32_t ccap;
+} UfsReg;
+
+REG32(CAP, offsetof(UfsReg, cap))
+    FIELD(CAP, NUTRS, 0, 5)
+    FIELD(CAP, RTT, 8, 8)
+    FIELD(CAP, NUTMRS, 16, 3)
+    FIELD(CAP, AUTOH8, 23, 1)
+    FIELD(CAP, 64AS, 24, 1)
+    FIELD(CAP, OODDS, 25, 1)
+    FIELD(CAP, UICDMETMS, 26, 1)
+    FIELD(CAP, CS, 28, 1)
+REG32(VER, offsetof(UfsReg, ver))
+REG32(HCPID, offsetof(UfsReg, hcpid))
+REG32(HCMID, offsetof(UfsReg, hcmid))
+REG32(AHIT, offsetof(UfsReg, ahit))
+REG32(IS, offsetof(UfsReg, is))
+    FIELD(IS, UTRCS, 0, 1)
+    FIELD(IS, UDEPRI, 1, 1)
+    FIELD(IS, UE, 2, 1)
+    FIELD(IS, UTMS, 3, 1)
+    FIELD(IS, UPMS, 4, 1)
+    FIELD(IS, UHXS, 5, 1)
+    FIELD(IS, UHES, 6, 1)
+    FIELD(IS, ULLS, 7, 1)
+    FIELD(IS, ULSS, 8, 1)
+    FIELD(IS, UTMRCS, 9, 1)
+    FIELD(IS, UCCS, 10, 1)
+    FIELD(IS, DFES, 11, 1)
+    FIELD(IS, UTPES, 12, 1)
+    FIELD(IS, HCFES, 16, 1)
+    FIELD(IS, SBFES, 17, 1)
+    FIELD(IS, CEFES, 18, 1)
+REG32(IE, offsetof(UfsReg, ie))
+    FIELD(IE, UTRCE, 0, 1)
+    FIELD(IE, UDEPRIE, 1, 1)
+    FIELD(IE, UEE, 2, 1)
+    FIELD(IE, UTMSE, 3, 1)
+    FIELD(IE, UPMSE, 4, 1)
+    FIELD(IE, UHXSE, 5, 1)
+    FIELD(IE, UHESE, 6, 1)
+    FIELD(IE, ULLSE, 7, 1)
+    FIELD(IE, ULSSE, 8, 1)
+    FIELD(IE, UTMRCE, 9, 1)
+    FIELD(IE, UCCE, 10, 1)
+    FIELD(IE, DFEE, 11, 1)
+    FIELD(IE, UTPEE, 12, 1)
+    FIELD(IE, HCFEE, 16, 1)
+    FIELD(IE, SBFEE, 17, 1)
+    FIELD(IE, CEFEE, 18, 1)
+REG32(HCS, offsetof(UfsReg, hcs))
+    FIELD(HCS, DP, 0, 1)
+    FIELD(HCS, UTRLRDY, 1, 1)
+    FIELD(HCS, UTMRLRDY, 2, 1)
+    FIELD(HCS, UCRDY, 3, 1)
+    FIELD(HCS, UPMCRS, 8, 3)
+REG32(HCE, offsetof(UfsReg, hce))
+    FIELD(HCE, HCE, 0, 1)
+    FIELD(HCE, CGE, 1, 1)
+REG32(UECPA, offsetof(UfsReg, uecpa))
+REG32(UECDL, offsetof(UfsReg, uecdl))
+REG32(UECN, offsetof(UfsReg, uecn))
+REG32(UECT, offsetof(UfsReg, uect))
+REG32(UECDME, offsetof(UfsReg, uecdme))
+REG32(UTRIACR, offsetof(UfsReg, utriacr))
+REG32(UTRLBA, offsetof(UfsReg, utrlba))
+    FIELD(UTRLBA, UTRLBA, 9, 22)
+REG32(UTRLBAU, offsetof(UfsReg, utrlbau))
+REG32(UTRLDBR, offsetof(UfsReg, utrldbr))
+REG32(UTRLCLR, offsetof(UfsReg, utrlclr))
+REG32(UTRLRSR, offsetof(UfsReg, utrlrsr))
+REG32(UTRLCNR, offsetof(UfsReg, utrlcnr))
+REG32(UTMRLBA, offsetof(UfsReg, utmrlba))
+    FIELD(UTMRLBA, UTMRLBA, 9, 22)
+REG32(UTMRLBAU, offsetof(UfsReg, utmrlbau))
+REG32(UTMRLDBR, offsetof(UfsReg, utmrldbr))
+REG32(UTMRLCLR, offsetof(UfsReg, utmrlclr))
+REG32(UTMRLRSR, offsetof(UfsReg, utmrlrsr))
+REG32(UICCMD, offsetof(UfsReg, uiccmd))
+REG32(UCMDARG1, offsetof(UfsReg, ucmdarg1))
+REG32(UCMDARG2, offsetof(UfsReg, ucmdarg2))
+REG32(UCMDARG3, offsetof(UfsReg, ucmdarg3))
+REG32(CCAP, offsetof(UfsReg, ccap))
+
+#define UFS_INTR_MASK                                    \
+    ((1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \
+     (1 << R_IS_HCFES_SHIFT) | (1 << R_IS_UTPES_SHIFT) | \
+     (1 << R_IS_DFES_SHIFT) | (1 << R_IS_UCCS_SHIFT) |   \
+     (1 << R_IS_UTMRCS_SHIFT) | (1 << R_IS_ULSS_SHIFT) | \
+     (1 << R_IS_ULLS_SHIFT) | (1 << R_IS_UHES_SHIFT) |   \
+     (1 << R_IS_UHXS_SHIFT) | (1 << R_IS_UPMS_SHIFT) |   \
+     (1 << R_IS_UTMS_SHIFT) | (1 << R_IS_UE_SHIFT) |     \
+     (1 << R_IS_UDEPRI_SHIFT) | (1 << R_IS_UTRCS_SHIFT))
+
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE_SHIFT 24
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE_MASK 0xff
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE(dword0)                       \
+    ((be32_to_cpu(dword0) >> UFS_UPIU_HEADER_TRANSACTION_TYPE_SHIFT) & \
+     UFS_UPIU_HEADER_TRANSACTION_TYPE_MASK)
+
+#define UFS_UPIU_HEADER_QUERY_FUNC_SHIFT 16
+#define UFS_UPIU_HEADER_QUERY_FUNC_MASK 0xff
+#define UFS_UPIU_HEADER_QUERY_FUNC(dword1)                       \
+    ((be32_to_cpu(dword1) >> UFS_UPIU_HEADER_QUERY_FUNC_SHIFT) & \
+     UFS_UPIU_HEADER_QUERY_FUNC_MASK)
+
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT 0
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK 0xffff
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH(dword2)                       \
+    ((be32_to_cpu(dword2) >> UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT) & \
+     UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK)
+
+typedef struct QEMU_PACKED DeviceDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t device;
+    uint8_t device_class;
+    uint8_t device_sub_class;
+    uint8_t protocol;
+    uint8_t number_lu;
+    uint8_t number_wlu;
+    uint8_t boot_enable;
+    uint8_t descr_access_en;
+    uint8_t init_power_mode;
+    uint8_t high_priority_lun;
+    uint8_t secure_removal_type;
+    uint8_t security_lu;
+    uint8_t background_ops_term_lat;
+    uint8_t init_active_icc_level;
+    uint16_t spec_version;
+    uint16_t manufacture_date;
+    uint8_t manufacturer_name;
+    uint8_t product_name;
+    uint8_t serial_number;
+    uint8_t oem_id;
+    uint16_t manufacturer_id;
+    uint8_t ud_0_base_offset;
+    uint8_t ud_config_p_length;
+    uint8_t device_rtt_cap;
+    uint16_t periodic_rtc_update;
+    uint8_t ufs_features_support;
+    uint8_t ffu_timeout;
+    uint8_t queue_depth;
+    uint16_t device_version;
+    uint8_t num_secure_wp_area;
+    uint32_t psa_max_data_size;
+    uint8_t psa_state_timeout;
+    uint8_t product_revision_level;
+    uint8_t reserved[36];
+    uint32_t extended_ufs_features_support;
+    uint8_t write_booster_buffer_preserve_user_space_en;
+    uint8_t write_booster_buffer_type;
+    uint32_t num_shared_write_booster_buffer_alloc_units;
+} DeviceDescriptor;
+
+typedef struct QEMU_PACKED GeometryDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t media_technology;
+    uint8_t reserved;
+    uint64_t total_raw_device_capacity;
+    uint8_t max_number_lu;
+    uint32_t segment_size;
+    uint8_t allocation_unit_size;
+    uint8_t min_addr_block_size;
+    uint8_t optimal_read_block_size;
+    uint8_t optimal_write_block_size;
+    uint8_t max_in_buffer_size;
+    uint8_t max_out_buffer_size;
+    uint8_t rpmb_read_write_size;
+    uint8_t dynamic_capacity_resource_policy;
+    uint8_t data_ordering;
+    uint8_t max_context_id_number;
+    uint8_t sys_data_tag_unit_size;
+    uint8_t sys_data_tag_res_size;
+    uint8_t supported_sec_r_types;
+    uint16_t supported_memory_types;
+    uint32_t system_code_max_n_alloc_u;
+    uint16_t system_code_cap_adj_fac;
+    uint32_t non_persist_max_n_alloc_u;
+    uint16_t non_persist_cap_adj_fac;
+    uint32_t enhanced_1_max_n_alloc_u;
+    uint16_t enhanced_1_cap_adj_fac;
+    uint32_t enhanced_2_max_n_alloc_u;
+    uint16_t enhanced_2_cap_adj_fac;
+    uint32_t enhanced_3_max_n_alloc_u;
+    uint16_t enhanced_3_cap_adj_fac;
+    uint32_t enhanced_4_max_n_alloc_u;
+    uint16_t enhanced_4_cap_adj_fac;
+    uint32_t optimal_logical_block_size;
+    uint8_t reserved2[7];
+    uint32_t write_booster_buffer_max_n_alloc_units;
+    uint8_t device_max_write_booster_l_us;
+    uint8_t write_booster_buffer_cap_adj_fac;
+    uint8_t supported_write_booster_buffer_user_space_reduction_types;
+    uint8_t supported_write_booster_buffer_types;
+} GeometryDescriptor;
+
+#define UFS_GEOMETRY_CAPACITY_SHIFT 9
+
+typedef struct QEMU_PACKED UnitDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t unit_index;
+    uint8_t lu_enable;
+    uint8_t boot_lun_id;
+    uint8_t lu_write_protect;
+    uint8_t lu_queue_depth;
+    uint8_t psa_sensitive;
+    uint8_t memory_type;
+    uint8_t data_reliability;
+    uint8_t logical_block_size;
+    uint64_t logical_block_count;
+    uint32_t erase_block_size;
+    uint8_t provisioning_type;
+    uint64_t phy_mem_resource_count;
+    uint16_t context_capabilities;
+    uint8_t large_unit_granularity_m1;
+    uint8_t reserved[6];
+    uint32_t lu_num_write_booster_buffer_alloc_units;
+} UnitDescriptor;
+
+typedef struct QEMU_PACKED RpmbUnitDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t unit_index;
+    uint8_t lu_enable;
+    uint8_t boot_lun_id;
+    uint8_t lu_write_protect;
+    uint8_t lu_queue_depth;
+    uint8_t psa_sensitive;
+    uint8_t memory_type;
+    uint8_t reserved;
+    uint8_t logical_block_size;
+    uint64_t logical_block_count;
+    uint32_t erase_block_size;
+    uint8_t provisioning_type;
+    uint64_t phy_mem_resource_count;
+    uint8_t reserved2[3];
+} RpmbUnitDescriptor;
+
+typedef struct QEMU_PACKED PowerParametersDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t active_icc_levels_vcc[16];
+    uint16_t active_icc_levels_vccq[16];
+    uint16_t active_icc_levels_vccq_2[16];
+} PowerParametersDescriptor;
+
+typedef struct QEMU_PACKED InterconnectDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t bcd_unipro_version;
+    uint16_t bcd_mphy_version;
+} InterconnectDescriptor;
+
+typedef struct QEMU_PACKED StringDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t UC[126];
+} StringDescriptor;
+
+typedef struct QEMU_PACKED DeviceHealthDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t pre_eol_info;
+    uint8_t device_life_time_est_a;
+    uint8_t device_life_time_est_b;
+    uint8_t vendor_prop_info[32];
+    uint32_t refresh_total_count;
+    uint32_t refresh_progress;
+} DeviceHealthDescriptor;
+
+typedef struct QEMU_PACKED Flags {
+    uint8_t reserved;
+    uint8_t device_init;
+    uint8_t permanent_wp_en;
+    uint8_t power_on_wp_en;
+    uint8_t background_ops_en;
+    uint8_t device_life_span_mode_en;
+    uint8_t purge_enable;
+    uint8_t refresh_enable;
+    uint8_t phy_resource_removal;
+    uint8_t busy_rtc;
+    uint8_t reserved2;
+    uint8_t permanently_disable_fw_update;
+    uint8_t reserved3[2];
+    uint8_t wb_en;
+    uint8_t wb_buffer_flush_en;
+    uint8_t wb_buffer_flush_during_hibernate;
+    uint8_t reserved4[2];
+} Flags;
+
+typedef struct Attributes {
+    uint8_t boot_lun_en;
+    uint8_t reserved;
+    uint8_t current_power_mode;
+    uint8_t active_icc_level;
+    uint8_t out_of_order_data_en;
+    uint8_t background_op_status;
+    uint8_t purge_status;
+    uint8_t max_data_in_size;
+    uint8_t max_data_out_size;
+    uint32_t dyn_cap_needed;
+    uint8_t ref_clk_freq;
+    uint8_t config_descr_lock;
+    uint8_t max_num_of_rtt;
+    uint16_t exception_event_control;
+    uint16_t exception_event_status;
+    uint32_t seconds_passed;
+    uint16_t context_conf;
+    uint8_t device_ffu_status;
+    uint8_t psa_state;
+    uint32_t psa_data_size;
+    uint8_t ref_clk_gating_wait_time;
+    uint8_t device_case_rough_temperaure;
+    uint8_t device_too_high_temp_boundary;
+    uint8_t device_too_low_temp_boundary;
+    uint8_t throttling_status;
+    uint8_t wb_buffer_flush_status;
+    uint8_t available_wb_buffer_size;
+    uint8_t wb_buffer_life_time_est;
+    uint32_t current_wb_buffer_size;
+    uint8_t refresh_status;
+    uint8_t refresh_freq;
+    uint8_t refresh_unit;
+    uint8_t refresh_method;
+} Attributes;
+
+#define UFS_TRANSACTION_SPECIFIC_FIELD_SIZE 20
+#define UFS_MAX_QUERY_DATA_SIZE 256
+
+/* Command response result code */
+typedef enum CommandRespCode {
+    UFS_COMMAND_RESULT_SUCESS = 0x00,
+    UFS_COMMAND_RESULT_FAIL = 0x01,
+} CommandRespCode;
+
+enum {
+    UFS_UPIU_FLAG_UNDERFLOW = 0x20,
+    UFS_UPIU_FLAG_OVERFLOW = 0x40,
+};
+
+typedef struct QEMU_PACKED UtpUpiuHeader {
+    uint8_t trans_type;
+    uint8_t flags;
+    uint8_t lun;
+    uint8_t task_tag;
+    uint8_t iid_cmd_set_type;
+    uint8_t query_func;
+    uint8_t response;
+    uint8_t scsi_status;
+    uint8_t ehs_len;
+    uint8_t device_inf;
+    uint16_t data_segment_length;
+} UtpUpiuHeader;
+
+/*
+ * The code below is copied from the linux kernel
+ * ("include/uapi/scsi/scsi_bsg_ufs.h") and modified to fit the qemu style.
+ */
+
+typedef struct QEMU_PACKED UtpUpiuQuery {
+    uint8_t opcode;
+    uint8_t idn;
+    uint8_t index;
+    uint8_t selector;
+    uint16_t reserved_osf;
+    uint16_t length;
+    uint32_t value;
+    uint32_t reserved[2];
+    /* EHS length should be 0. We don't have to worry about EHS area. */
+    uint8_t data[UFS_MAX_QUERY_DATA_SIZE];
+} UtpUpiuQuery;
+
+#define UFS_CDB_SIZE 16
+
+/*
+ * struct UtpUpiuCmd - Command UPIU structure
+ * @data_transfer_len: Data Transfer Length DW-3
+ * @cdb: Command Descriptor Block CDB DW-4 to DW-7
+ */
+typedef struct QEMU_PACKED UtpUpiuCmd {
+    uint32_t exp_data_transfer_len;
+    uint8_t cdb[UFS_CDB_SIZE];
+} UtpUpiuCmd;
+
+/*
+ * struct UtpUpiuReq - general upiu request structure
+ * @header:UPIU header structure DW-0 to DW-2
+ * @sc: fields structure for scsi command DW-3 to DW-7
+ * @qr: fields structure for query request DW-3 to DW-7
+ * @uc: use utp_upiu_query to host the 4 dwords of uic command
+ */
+typedef struct QEMU_PACKED UtpUpiuReq {
+    UtpUpiuHeader header;
+    union {
+        UtpUpiuCmd sc;
+        UtpUpiuQuery qr;
+    };
+} UtpUpiuReq;
+
+/*
+ * The code below is copied from the linux kernel ("include/ufs/ufshci.h") and
+ * modified to fit the qemu style.
+ */
+
+enum {
+    UFS_PWR_OK = 0x0,
+    UFS_PWR_LOCAL = 0x01,
+    UFS_PWR_REMOTE = 0x02,
+    UFS_PWR_BUSY = 0x03,
+    UFS_PWR_ERROR_CAP = 0x04,
+    UFS_PWR_FATAL_ERROR = 0x05,
+};
+
+/* UIC Commands */
+enum uic_cmd_dme {
+    UFS_UIC_CMD_DME_GET = 0x01,
+    UFS_UIC_CMD_DME_SET = 0x02,
+    UFS_UIC_CMD_DME_PEER_GET = 0x03,
+    UFS_UIC_CMD_DME_PEER_SET = 0x04,
+    UFS_UIC_CMD_DME_POWERON = 0x10,
+    UFS_UIC_CMD_DME_POWEROFF = 0x11,
+    UFS_UIC_CMD_DME_ENABLE = 0x12,
+    UFS_UIC_CMD_DME_RESET = 0x14,
+    UFS_UIC_CMD_DME_END_PT_RST = 0x15,
+    UFS_UIC_CMD_DME_LINK_STARTUP = 0x16,
+    UFS_UIC_CMD_DME_HIBER_ENTER = 0x17,
+    UFS_UIC_CMD_DME_HIBER_EXIT = 0x18,
+    UFS_UIC_CMD_DME_TEST_MODE = 0x1A,
+};
+
+/* UIC Config result code / Generic error code */
+enum {
+    UFS_UIC_CMD_RESULT_SUCCESS = 0x00,
+    UFS_UIC_CMD_RESULT_INVALID_ATTR = 0x01,
+    UFS_UIC_CMD_RESULT_FAILURE = 0x01,
+    UFS_UIC_CMD_RESULT_INVALID_ATTR_VALUE = 0x02,
+    UFS_UIC_CMD_RESULT_READ_ONLY_ATTR = 0x03,
+    UFS_UIC_CMD_RESULT_WRITE_ONLY_ATTR = 0x04,
+    UFS_UIC_CMD_RESULT_BAD_INDEX = 0x05,
+    UFS_UIC_CMD_RESULT_LOCKED_ATTR = 0x06,
+    UFS_UIC_CMD_RESULT_BAD_TEST_FEATURE_INDEX = 0x07,
+    UFS_UIC_CMD_RESULT_PEER_COMM_FAILURE = 0x08,
+    UFS_UIC_CMD_RESULT_BUSY = 0x09,
+    UFS_UIC_CMD_RESULT_DME_FAILURE = 0x0A,
+};
+
+#define UFS_MASK_UIC_COMMAND_RESULT 0xFF
+
+/*
+ * Request Descriptor Definitions
+ */
+
+/* Transfer request command type */
+enum {
+    UFS_UTP_CMD_TYPE_SCSI = 0x0,
+    UFS_UTP_CMD_TYPE_UFS = 0x1,
+    UFS_UTP_CMD_TYPE_DEV_MANAGE = 0x2,
+};
+
+/* To accommodate UFS2.0 required Command type */
+enum {
+    UFS_UTP_CMD_TYPE_UFS_STORAGE = 0x1,
+};
+
+enum {
+    UFS_UTP_SCSI_COMMAND = 0x00000000,
+    UFS_UTP_NATIVE_UFS_COMMAND = 0x10000000,
+    UFS_UTP_DEVICE_MANAGEMENT_FUNCTION = 0x20000000,
+    UFS_UTP_REQ_DESC_INT_CMD = 0x01000000,
+    UFS_UTP_REQ_DESC_CRYPTO_ENABLE_CMD = 0x00800000,
+};
+
+/* UTP Transfer Request Data Direction (DD) */
+enum {
+    UFS_UTP_NO_DATA_TRANSFER = 0x00000000,
+    UFS_UTP_HOST_TO_DEVICE = 0x02000000,
+    UFS_UTP_DEVICE_TO_HOST = 0x04000000,
+};
+
+/* Overall command status values */
+enum UtpOcsCodes {
+    UFS_OCS_SUCCESS = 0x0,
+    UFS_OCS_INVALID_CMD_TABLE_ATTR = 0x1,
+    UFS_OCS_INVALID_PRDT_ATTR = 0x2,
+    UFS_OCS_MISMATCH_DATA_BUF_SIZE = 0x3,
+    UFS_OCS_MISMATCH_RESP_UPIU_SIZE = 0x4,
+    UFS_OCS_PEER_COMM_FAILURE = 0x5,
+    UFS_OCS_ABORTED = 0x6,
+    UFS_OCS_FATAL_ERROR = 0x7,
+    UFS_OCS_DEVICE_FATAL_ERROR = 0x8,
+    UFS_OCS_INVALID_CRYPTO_CONFIG = 0x9,
+    UFS_OCS_GENERAL_CRYPTO_ERROR = 0xa,
+    UFS_OCS_INVALID_COMMAND_STATUS = 0xf,
+};
+
+enum {
+    UFS_MASK_OCS = 0x0F,
+};
+
+/*
+ * struct UfshcdSgEntry - UFSHCI PRD Entry
+ * @addr: Physical address; DW-0 and DW-1.
+ * @reserved: Reserved for future use DW-2
+ * @size: size of physical segment DW-3
+ */
+typedef struct QEMU_PACKED UfshcdSgEntry {
+    uint64_t addr;
+    uint32_t reserved;
+    uint32_t size;
+    /*
+     * followed by variant-specific fields if
+     * CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE has been defined.
+     */
+} UfshcdSgEntry;
+
+/*
+ * struct RequestDescHeader - Descriptor Header common to both UTRD and UTMRD
+ * @dword0: Descriptor Header DW0
+ * @dword1: Descriptor Header DW1
+ * @dword2: Descriptor Header DW2
+ * @dword3: Descriptor Header DW3
+ */
+typedef struct QEMU_PACKED RequestDescHeader {
+    uint32_t dword_0;
+    uint32_t dword_1;
+    uint32_t dword_2;
+    uint32_t dword_3;
+} RequestDescHeader;
+
+/*
+ * struct UtpTransferReqDesc - UTP Transfer Request Descriptor (UTRD)
+ * @header: UTRD header DW-0 to DW-3
+ * @command_desc_base_addr_lo: UCD base address low DW-4
+ * @command_desc_base_addr_hi: UCD base address high DW-5
+ * @response_upiu_length: response UPIU length DW-6
+ * @response_upiu_offset: response UPIU offset DW-6
+ * @prd_table_length: Physical region descriptor length DW-7
+ * @prd_table_offset: Physical region descriptor offset DW-7
+ */
+typedef struct QEMU_PACKED UtpTransferReqDesc {
+    /* DW 0-3 */
+    RequestDescHeader header;
+
+    /* DW 4-5*/
+    uint32_t command_desc_base_addr_lo;
+    uint32_t command_desc_base_addr_hi;
+
+    /* DW 6 */
+    uint16_t response_upiu_length;
+    uint16_t response_upiu_offset;
+
+    /* DW 7 */
+    uint16_t prd_table_length;
+    uint16_t prd_table_offset;
+} UtpTransferReqDesc;
+
+/*
+ * UTMRD structure.
+ */
+typedef struct QEMU_PACKED UtpTaskReqDesc {
+    /* DW 0-3 */
+    RequestDescHeader header;
+
+    /* DW 4-11 - Task request UPIU structure */
+    struct {
+        UtpUpiuHeader req_header;
+        uint32_t input_param1;
+        uint32_t input_param2;
+        uint32_t input_param3;
+        uint32_t reserved1[2];
+    } upiu_req;
+
+    /* DW 12-19 - Task Management Response UPIU structure */
+    struct {
+        UtpUpiuHeader rsp_header;
+        uint32_t output_param1;
+        uint32_t output_param2;
+        uint32_t reserved2[3];
+    } upiu_rsp;
+} UtpTaskReqDesc;
+
+/*
+ * The code below is copied from the linux kernel ("include/ufs/ufs.h") and
+ * modified to fit the qemu style.
+ */
+
+#define UFS_GENERAL_UPIU_REQUEST_SIZE (sizeof(UtpUpiuReq))
+#define UFS_QUERY_DESC_MAX_SIZE 255
+#define UFS_QUERY_DESC_MIN_SIZE 2
+#define UFS_QUERY_DESC_HDR_SIZE 2
+#define UFS_QUERY_OSF_SIZE (GENERAL_UPIU_REQUEST_SIZE - (sizeof(UtpUpiuHeader)))
+#define UFS_SENSE_SIZE 18
+
+/*
+ * UFS device may have standard LUs and LUN id could be from 0x00 to
+ * 0x7F. Standard LUs use "Peripheral Device Addressing Format".
+ * UFS device may also have the Well Known LUs (also referred as W-LU)
+ * which again could be from 0x00 to 0x7F. For W-LUs, device only use
+ * the "Extended Addressing Format" which means the W-LUNs would be
+ * from 0xc100 (SCSI_W_LUN_BASE) onwards.
+ * This means max. LUN number reported from UFS device could be 0xC17F.
+ */
+#define UFS_UPIU_MAX_UNIT_NUM_ID 0x7F
+#define UFS_UPIU_WLUN_ID (1 << 7)
+
+/* WriteBooster buffer is available only for the logical unit from 0 to 7 */
+#define UFS_UPIU_MAX_WB_LUN_ID 8
+
+/*
+ * WriteBooster buffer lifetime has a limit setted by vendor.
+ * If it is over the limit, WriteBooster feature will be disabled.
+ */
+#define UFS_WB_EXCEED_LIFETIME 0x0B
+
+/*
+ * In UFS Spec, the Extra Header Segment (EHS) starts from byte 32 in UPIU
+ * request/response packet
+ */
+#define UFS_EHS_OFFSET_IN_RESPONSE 32
+
+/* Well known logical unit id in LUN field of UPIU */
+enum {
+    UFS_UPIU_REPORT_LUNS_WLUN = 0x81,
+    UFS_UPIU_UFS_DEVICE_WLUN = 0xD0,
+    UFS_UPIU_BOOT_WLUN = 0xB0,
+    UFS_UPIU_RPMB_WLUN = 0xC4,
+};
+
+/*
+ * UFS Protocol Information Unit related definitions
+ */
+
+/* Task management functions */
+enum {
+    UFS_ABORT_TASK = 0x01,
+    UFS_ABORT_TASK_SET = 0x02,
+    UFS_CLEAR_TASK_SET = 0x04,
+    UFS_LOGICAL_RESET = 0x08,
+    UFS_QUERY_TASK = 0x80,
+    UFS_QUERY_TASK_SET = 0x81,
+};
+
+/* UTP UPIU Transaction Codes Initiator to Target */
+enum {
+    UFS_UPIU_TRANSACTION_NOP_OUT = 0x00,
+    UFS_UPIU_TRANSACTION_COMMAND = 0x01,
+    UFS_UPIU_TRANSACTION_DATA_OUT = 0x02,
+    UFS_UPIU_TRANSACTION_TASK_REQ = 0x04,
+    UFS_UPIU_TRANSACTION_QUERY_REQ = 0x16,
+};
+
+/* UTP UPIU Transaction Codes Target to Initiator */
+enum {
+    UFS_UPIU_TRANSACTION_NOP_IN = 0x20,
+    UFS_UPIU_TRANSACTION_RESPONSE = 0x21,
+    UFS_UPIU_TRANSACTION_DATA_IN = 0x22,
+    UFS_UPIU_TRANSACTION_TASK_RSP = 0x24,
+    UFS_UPIU_TRANSACTION_READY_XFER = 0x31,
+    UFS_UPIU_TRANSACTION_QUERY_RSP = 0x36,
+    UFS_UPIU_TRANSACTION_REJECT_UPIU = 0x3F,
+};
+
+/* UPIU Read/Write flags */
+enum {
+    UFS_UPIU_CMD_FLAGS_NONE = 0x00,
+    UFS_UPIU_CMD_FLAGS_WRITE = 0x20,
+    UFS_UPIU_CMD_FLAGS_READ = 0x40,
+};
+
+/* UPIU Task Attributes */
+enum {
+    UFS_UPIU_TASK_ATTR_SIMPLE = 0x00,
+    UFS_UPIU_TASK_ATTR_ORDERED = 0x01,
+    UFS_UPIU_TASK_ATTR_HEADQ = 0x02,
+    UFS_UPIU_TASK_ATTR_ACA = 0x03,
+};
+
+/* UPIU Query request function */
+enum {
+    UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST = 0x01,
+    UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST = 0x81,
+};
+
+/* Flag idn for Query Requests*/
+enum flag_idn {
+    UFS_QUERY_FLAG_IDN_FDEVICEINIT = 0x01,
+    UFS_QUERY_FLAG_IDN_PERMANENT_WPE = 0x02,
+    UFS_QUERY_FLAG_IDN_PWR_ON_WPE = 0x03,
+    UFS_QUERY_FLAG_IDN_BKOPS_EN = 0x04,
+    UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE = 0x05,
+    UFS_QUERY_FLAG_IDN_PURGE_ENABLE = 0x06,
+    UFS_QUERY_FLAG_IDN_REFRESH_ENABLE = 0x07,
+    UFS_QUERY_FLAG_IDN_FPHYRESOURCEREMOVAL = 0x08,
+    UFS_QUERY_FLAG_IDN_BUSY_RTC = 0x09,
+    UFS_QUERY_FLAG_IDN_RESERVED3 = 0x0A,
+    UFS_QUERY_FLAG_IDN_PERMANENTLY_DISABLE_FW_UPDATE = 0x0B,
+    UFS_QUERY_FLAG_IDN_WB_EN = 0x0E,
+    UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN = 0x0F,
+    UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8 = 0x10,
+    UFS_QUERY_FLAG_IDN_HPB_RESET = 0x11,
+    UFS_QUERY_FLAG_IDN_HPB_EN = 0x12,
+    UFS_QUERY_FLAG_IDN_COUNT,
+};
+
+/* Attribute idn for Query requests */
+enum attr_idn {
+    UFS_QUERY_ATTR_IDN_BOOT_LU_EN = 0x00,
+    UFS_QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD = 0x01,
+    UFS_QUERY_ATTR_IDN_POWER_MODE = 0x02,
+    UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL = 0x03,
+    UFS_QUERY_ATTR_IDN_OOO_DATA_EN = 0x04,
+    UFS_QUERY_ATTR_IDN_BKOPS_STATUS = 0x05,
+    UFS_QUERY_ATTR_IDN_PURGE_STATUS = 0x06,
+    UFS_QUERY_ATTR_IDN_MAX_DATA_IN = 0x07,
+    UFS_QUERY_ATTR_IDN_MAX_DATA_OUT = 0x08,
+    UFS_QUERY_ATTR_IDN_DYN_CAP_NEEDED = 0x09,
+    UFS_QUERY_ATTR_IDN_REF_CLK_FREQ = 0x0A,
+    UFS_QUERY_ATTR_IDN_CONF_DESC_LOCK = 0x0B,
+    UFS_QUERY_ATTR_IDN_MAX_NUM_OF_RTT = 0x0C,
+    UFS_QUERY_ATTR_IDN_EE_CONTROL = 0x0D,
+    UFS_QUERY_ATTR_IDN_EE_STATUS = 0x0E,
+    UFS_QUERY_ATTR_IDN_SECONDS_PASSED = 0x0F,
+    UFS_QUERY_ATTR_IDN_CNTX_CONF = 0x10,
+    UFS_QUERY_ATTR_IDN_CORR_PRG_BLK_NUM = 0x11,
+    UFS_QUERY_ATTR_IDN_RESERVED2 = 0x12,
+    UFS_QUERY_ATTR_IDN_RESERVED3 = 0x13,
+    UFS_QUERY_ATTR_IDN_FFU_STATUS = 0x14,
+    UFS_QUERY_ATTR_IDN_PSA_STATE = 0x15,
+    UFS_QUERY_ATTR_IDN_PSA_DATA_SIZE = 0x16,
+    UFS_QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME = 0x17,
+    UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP = 0x18,
+    UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND = 0x19,
+    UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND = 0x1A,
+    UFS_QUERY_ATTR_IDN_THROTTLING_STATUS = 0x1B,
+    UFS_QUERY_ATTR_IDN_WB_FLUSH_STATUS = 0x1C,
+    UFS_QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D,
+    UFS_QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E,
+    UFS_QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F,
+    UFS_QUERY_ATTR_IDN_REFRESH_STATUS = 0x2C,
+    UFS_QUERY_ATTR_IDN_REFRESH_FREQ = 0x2D,
+    UFS_QUERY_ATTR_IDN_REFRESH_UNIT = 0x2E,
+    UFS_QUERY_ATTR_IDN_COUNT,
+};
+
+/* Descriptor idn for Query requests */
+enum desc_idn {
+    UFS_QUERY_DESC_IDN_DEVICE = 0x0,
+    UFS_QUERY_DESC_IDN_CONFIGURATION = 0x1,
+    UFS_QUERY_DESC_IDN_UNIT = 0x2,
+    UFS_QUERY_DESC_IDN_RFU_0 = 0x3,
+    UFS_QUERY_DESC_IDN_INTERCONNECT = 0x4,
+    UFS_QUERY_DESC_IDN_STRING = 0x5,
+    UFS_QUERY_DESC_IDN_RFU_1 = 0x6,
+    UFS_QUERY_DESC_IDN_GEOMETRY = 0x7,
+    UFS_QUERY_DESC_IDN_POWER = 0x8,
+    UFS_QUERY_DESC_IDN_HEALTH = 0x9,
+    UFS_QUERY_DESC_IDN_MAX,
+};
+
+enum desc_header_offset {
+    UFS_QUERY_DESC_LENGTH_OFFSET = 0x00,
+    UFS_QUERY_DESC_DESC_TYPE_OFFSET = 0x01,
+};
+
+/* Unit descriptor parameters offsets in bytes*/
+enum unit_desc_param {
+    UFS_UNIT_DESC_PARAM_LEN = 0x0,
+    UFS_UNIT_DESC_PARAM_TYPE = 0x1,
+    UFS_UNIT_DESC_PARAM_UNIT_INDEX = 0x2,
+    UFS_UNIT_DESC_PARAM_LU_ENABLE = 0x3,
+    UFS_UNIT_DESC_PARAM_BOOT_LUN_ID = 0x4,
+    UFS_UNIT_DESC_PARAM_LU_WR_PROTECT = 0x5,
+    UFS_UNIT_DESC_PARAM_LU_Q_DEPTH = 0x6,
+    UFS_UNIT_DESC_PARAM_PSA_SENSITIVE = 0x7,
+    UFS_UNIT_DESC_PARAM_MEM_TYPE = 0x8,
+    UFS_UNIT_DESC_PARAM_DATA_RELIABILITY = 0x9,
+    UFS_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE = 0xA,
+    UFS_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT = 0xB,
+    UFS_UNIT_DESC_PARAM_ERASE_BLK_SIZE = 0x13,
+    UFS_UNIT_DESC_PARAM_PROVISIONING_TYPE = 0x17,
+    UFS_UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT = 0x18,
+    UFS_UNIT_DESC_PARAM_CTX_CAPABILITIES = 0x20,
+    UFS_UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1 = 0x22,
+    UFS_UNIT_DESC_PARAM_HPB_LU_MAX_ACTIVE_RGNS = 0x23,
+    UFS_UNIT_DESC_PARAM_HPB_PIN_RGN_START_OFF = 0x25,
+    UFS_UNIT_DESC_PARAM_HPB_NUM_PIN_RGNS = 0x27,
+    UFS_UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS = 0x29,
+};
+
+/* RPMB Unit descriptor parameters offsets in bytes*/
+enum rpmb_unit_desc_param {
+    UFS_RPMB_UNIT_DESC_PARAM_LEN = 0x0,
+    UFS_RPMB_UNIT_DESC_PARAM_TYPE = 0x1,
+    UFS_RPMB_UNIT_DESC_PARAM_UNIT_INDEX = 0x2,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_ENABLE = 0x3,
+    UFS_RPMB_UNIT_DESC_PARAM_BOOT_LUN_ID = 0x4,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_WR_PROTECT = 0x5,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_Q_DEPTH = 0x6,
+    UFS_RPMB_UNIT_DESC_PARAM_PSA_SENSITIVE = 0x7,
+    UFS_RPMB_UNIT_DESC_PARAM_MEM_TYPE = 0x8,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION_EN = 0x9,
+    UFS_RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE = 0xA,
+    UFS_RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT = 0xB,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION0_SIZE = 0x13,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION1_SIZE = 0x14,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION2_SIZE = 0x15,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION3_SIZE = 0x16,
+    UFS_RPMB_UNIT_DESC_PARAM_PROVISIONING_TYPE = 0x17,
+    UFS_RPMB_UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT = 0x18,
+};
+
+/* Device descriptor parameters offsets in bytes*/
+enum device_desc_param {
+    UFS_DEVICE_DESC_PARAM_LEN = 0x0,
+    UFS_DEVICE_DESC_PARAM_TYPE = 0x1,
+    UFS_DEVICE_DESC_PARAM_DEVICE_TYPE = 0x2,
+    UFS_DEVICE_DESC_PARAM_DEVICE_CLASS = 0x3,
+    UFS_DEVICE_DESC_PARAM_DEVICE_SUB_CLASS = 0x4,
+    UFS_DEVICE_DESC_PARAM_PRTCL = 0x5,
+    UFS_DEVICE_DESC_PARAM_NUM_LU = 0x6,
+    UFS_DEVICE_DESC_PARAM_NUM_WLU = 0x7,
+    UFS_DEVICE_DESC_PARAM_BOOT_ENBL = 0x8,
+    UFS_DEVICE_DESC_PARAM_DESC_ACCSS_ENBL = 0x9,
+    UFS_DEVICE_DESC_PARAM_INIT_PWR_MODE = 0xA,
+    UFS_DEVICE_DESC_PARAM_HIGH_PR_LUN = 0xB,
+    UFS_DEVICE_DESC_PARAM_SEC_RMV_TYPE = 0xC,
+    UFS_DEVICE_DESC_PARAM_SEC_LU = 0xD,
+    UFS_DEVICE_DESC_PARAM_BKOP_TERM_LT = 0xE,
+    UFS_DEVICE_DESC_PARAM_ACTVE_ICC_LVL = 0xF,
+    UFS_DEVICE_DESC_PARAM_SPEC_VER = 0x10,
+    UFS_DEVICE_DESC_PARAM_MANF_DATE = 0x12,
+    UFS_DEVICE_DESC_PARAM_MANF_NAME = 0x14,
+    UFS_DEVICE_DESC_PARAM_PRDCT_NAME = 0x15,
+    UFS_DEVICE_DESC_PARAM_SN = 0x16,
+    UFS_DEVICE_DESC_PARAM_OEM_ID = 0x17,
+    UFS_DEVICE_DESC_PARAM_MANF_ID = 0x18,
+    UFS_DEVICE_DESC_PARAM_UD_OFFSET = 0x1A,
+    UFS_DEVICE_DESC_PARAM_UD_LEN = 0x1B,
+    UFS_DEVICE_DESC_PARAM_RTT_CAP = 0x1C,
+    UFS_DEVICE_DESC_PARAM_FRQ_RTC = 0x1D,
+    UFS_DEVICE_DESC_PARAM_UFS_FEAT = 0x1F,
+    UFS_DEVICE_DESC_PARAM_FFU_TMT = 0x20,
+    UFS_DEVICE_DESC_PARAM_Q_DPTH = 0x21,
+    UFS_DEVICE_DESC_PARAM_DEV_VER = 0x22,
+    UFS_DEVICE_DESC_PARAM_NUM_SEC_WPA = 0x24,
+    UFS_DEVICE_DESC_PARAM_PSA_MAX_DATA = 0x25,
+    UFS_DEVICE_DESC_PARAM_PSA_TMT = 0x29,
+    UFS_DEVICE_DESC_PARAM_PRDCT_REV = 0x2A,
+    UFS_DEVICE_DESC_PARAM_HPB_VER = 0x40,
+    UFS_DEVICE_DESC_PARAM_HPB_CONTROL = 0x42,
+    UFS_DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP = 0x4F,
+    UFS_DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN = 0x53,
+    UFS_DEVICE_DESC_PARAM_WB_TYPE = 0x54,
+    UFS_DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS = 0x55,
+};
+
+/* Interconnect descriptor parameters offsets in bytes*/
+enum interconnect_desc_param {
+    UFS_INTERCONNECT_DESC_PARAM_LEN = 0x0,
+    UFS_INTERCONNECT_DESC_PARAM_TYPE = 0x1,
+    UFS_INTERCONNECT_DESC_PARAM_UNIPRO_VER = 0x2,
+    UFS_INTERCONNECT_DESC_PARAM_MPHY_VER = 0x4,
+};
+
+/* Geometry descriptor parameters offsets in bytes*/
+enum geometry_desc_param {
+    UFS_GEOMETRY_DESC_PARAM_LEN = 0x0,
+    UFS_GEOMETRY_DESC_PARAM_TYPE = 0x1,
+    UFS_GEOMETRY_DESC_PARAM_DEV_CAP = 0x4,
+    UFS_GEOMETRY_DESC_PARAM_MAX_NUM_LUN = 0xC,
+    UFS_GEOMETRY_DESC_PARAM_SEG_SIZE = 0xD,
+    UFS_GEOMETRY_DESC_PARAM_ALLOC_UNIT_SIZE = 0x11,
+    UFS_GEOMETRY_DESC_PARAM_MIN_BLK_SIZE = 0x12,
+    UFS_GEOMETRY_DESC_PARAM_OPT_RD_BLK_SIZE = 0x13,
+    UFS_GEOMETRY_DESC_PARAM_OPT_WR_BLK_SIZE = 0x14,
+    UFS_GEOMETRY_DESC_PARAM_MAX_IN_BUF_SIZE = 0x15,
+    UFS_GEOMETRY_DESC_PARAM_MAX_OUT_BUF_SIZE = 0x16,
+    UFS_GEOMETRY_DESC_PARAM_RPMB_RW_SIZE = 0x17,
+    UFS_GEOMETRY_DESC_PARAM_DYN_CAP_RSRC_PLC = 0x18,
+    UFS_GEOMETRY_DESC_PARAM_DATA_ORDER = 0x19,
+    UFS_GEOMETRY_DESC_PARAM_MAX_NUM_CTX = 0x1A,
+    UFS_GEOMETRY_DESC_PARAM_TAG_UNIT_SIZE = 0x1B,
+    UFS_GEOMETRY_DESC_PARAM_TAG_RSRC_SIZE = 0x1C,
+    UFS_GEOMETRY_DESC_PARAM_SEC_RM_TYPES = 0x1D,
+    UFS_GEOMETRY_DESC_PARAM_MEM_TYPES = 0x1E,
+    UFS_GEOMETRY_DESC_PARAM_SCM_MAX_NUM_UNITS = 0x20,
+    UFS_GEOMETRY_DESC_PARAM_SCM_CAP_ADJ_FCTR = 0x24,
+    UFS_GEOMETRY_DESC_PARAM_NPM_MAX_NUM_UNITS = 0x26,
+    UFS_GEOMETRY_DESC_PARAM_NPM_CAP_ADJ_FCTR = 0x2A,
+    UFS_GEOMETRY_DESC_PARAM_ENM1_MAX_NUM_UNITS = 0x2C,
+    UFS_GEOMETRY_DESC_PARAM_ENM1_CAP_ADJ_FCTR = 0x30,
+    UFS_GEOMETRY_DESC_PARAM_ENM2_MAX_NUM_UNITS = 0x32,
+    UFS_GEOMETRY_DESC_PARAM_ENM2_CAP_ADJ_FCTR = 0x36,
+    UFS_GEOMETRY_DESC_PARAM_ENM3_MAX_NUM_UNITS = 0x38,
+    UFS_GEOMETRY_DESC_PARAM_ENM3_CAP_ADJ_FCTR = 0x3C,
+    UFS_GEOMETRY_DESC_PARAM_ENM4_MAX_NUM_UNITS = 0x3E,
+    UFS_GEOMETRY_DESC_PARAM_ENM4_CAP_ADJ_FCTR = 0x42,
+    UFS_GEOMETRY_DESC_PARAM_OPT_LOG_BLK_SIZE = 0x44,
+    UFS_GEOMETRY_DESC_PARAM_HPB_REGION_SIZE = 0x48,
+    UFS_GEOMETRY_DESC_PARAM_HPB_NUMBER_LU = 0x49,
+    UFS_GEOMETRY_DESC_PARAM_HPB_SUBREGION_SIZE = 0x4A,
+    UFS_GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS = 0x4B,
+    UFS_GEOMETRY_DESC_PARAM_WB_MAX_ALLOC_UNITS = 0x4F,
+    UFS_GEOMETRY_DESC_PARAM_WB_MAX_WB_LUNS = 0x53,
+    UFS_GEOMETRY_DESC_PARAM_WB_BUFF_CAP_ADJ = 0x54,
+    UFS_GEOMETRY_DESC_PARAM_WB_SUP_RED_TYPE = 0x55,
+    UFS_GEOMETRY_DESC_PARAM_WB_SUP_WB_TYPE = 0x56,
+};
+
+/* Health descriptor parameters offsets in bytes*/
+enum health_desc_param {
+    UFS_HEALTH_DESC_PARAM_LEN = 0x0,
+    UFS_HEALTH_DESC_PARAM_TYPE = 0x1,
+    UFS_HEALTH_DESC_PARAM_EOL_INFO = 0x2,
+    UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_A = 0x3,
+    UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4,
+};
+
+/* WriteBooster buffer mode */
+enum {
+    UFS_WB_BUF_MODE_LU_DEDICATED = 0x0,
+    UFS_WB_BUF_MODE_SHARED = 0x1,
+};
+
+/*
+ * Logical Unit Write Protect
+ * 00h: LU not write protected
+ * 01h: LU write protected when fPowerOnWPEn =1
+ * 02h: LU permanently write protected when fPermanentWPEn =1
+ */
+enum ufs_lu_wp_type {
+    UFS_LU_NO_WP = 0x00,
+    UFS_LU_POWER_ON_WP = 0x01,
+    UFS_LU_PERM_WP = 0x02,
+};
+
+/* UTP QUERY Transaction Specific Fields OpCode */
+enum query_opcode {
+    UFS_UPIU_QUERY_OPCODE_NOP = 0x0,
+    UFS_UPIU_QUERY_OPCODE_READ_DESC = 0x1,
+    UFS_UPIU_QUERY_OPCODE_WRITE_DESC = 0x2,
+    UFS_UPIU_QUERY_OPCODE_READ_ATTR = 0x3,
+    UFS_UPIU_QUERY_OPCODE_WRITE_ATTR = 0x4,
+    UFS_UPIU_QUERY_OPCODE_READ_FLAG = 0x5,
+    UFS_UPIU_QUERY_OPCODE_SET_FLAG = 0x6,
+    UFS_UPIU_QUERY_OPCODE_CLEAR_FLAG = 0x7,
+    UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG = 0x8,
+};
+
+/* Query response result code */
+typedef enum QueryRespCode {
+    UFS_QUERY_RESULT_SUCCESS = 0x00,
+    UFS_QUERY_RESULT_NOT_READABLE = 0xF6,
+    UFS_QUERY_RESULT_NOT_WRITEABLE = 0xF7,
+    UFS_QUERY_RESULT_ALREADY_WRITTEN = 0xF8,
+    UFS_QUERY_RESULT_INVALID_LENGTH = 0xF9,
+    UFS_QUERY_RESULT_INVALID_VALUE = 0xFA,
+    UFS_QUERY_RESULT_INVALID_SELECTOR = 0xFB,
+    UFS_QUERY_RESULT_INVALID_INDEX = 0xFC,
+    UFS_QUERY_RESULT_INVALID_IDN = 0xFD,
+    UFS_QUERY_RESULT_INVALID_OPCODE = 0xFE,
+    UFS_QUERY_RESULT_GENERAL_FAILURE = 0xFF,
+} QueryRespCode;
+
+/* UTP Transfer Request Command Type (CT) */
+enum {
+    UFS_UPIU_COMMAND_SET_TYPE_SCSI = 0x0,
+    UFS_UPIU_COMMAND_SET_TYPE_UFS = 0x1,
+    UFS_UPIU_COMMAND_SET_TYPE_QUERY = 0x2,
+};
+
+/* Task management service response */
+enum {
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_COMPL = 0x00,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_NOT_SUPPORTED = 0x04,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED = 0x08,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_FAILED = 0x05,
+    UFS_UPIU_INCORRECT_LOGICAL_UNIT_NO = 0x09,
+};
+
+/* UFS device power modes */
+enum ufs_dev_pwr_mode {
+    UFS_ACTIVE_PWR_MODE = 1,
+    UFS_SLEEP_PWR_MODE = 2,
+    UFS_POWERDOWN_PWR_MODE = 3,
+    UFS_DEEPSLEEP_PWR_MODE = 4,
+};
+
+/*
+ * struct UtpCmdRsp - Response UPIU structure
+ * @residual_transfer_count: Residual transfer count DW-3
+ * @reserved: Reserved double words DW-4 to DW-7
+ * @sense_data_len: Sense data length DW-8 U16
+ * @sense_data: Sense data field DW-8 to DW-12
+ */
+typedef struct QEMU_PACKED UtpCmdRsp {
+    uint32_t residual_transfer_count;
+    uint32_t reserved[4];
+    uint16_t sense_data_len;
+    uint8_t sense_data[UFS_SENSE_SIZE];
+} UtpCmdRsp;
+
+/*
+ * struct UtpUpiuRsp - general upiu response structure
+ * @header: UPIU header structure DW-0 to DW-2
+ * @sr: fields structure for scsi command DW-3 to DW-12
+ * @qr: fields structure for query request DW-3 to DW-7
+ */
+typedef struct QEMU_PACKED UtpUpiuRsp {
+    UtpUpiuHeader header;
+    union {
+        UtpCmdRsp sr;
+        UtpUpiuQuery qr;
+    };
+} UtpUpiuRsp;
+
+static inline void _ufs_check_size(void)
+{
+    QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x104);
+    QEMU_BUILD_BUG_ON(sizeof(DeviceDescriptor) != 89);
+    QEMU_BUILD_BUG_ON(sizeof(GeometryDescriptor) != 87);
+    QEMU_BUILD_BUG_ON(sizeof(UnitDescriptor) != 45);
+    QEMU_BUILD_BUG_ON(sizeof(RpmbUnitDescriptor) != 35);
+    QEMU_BUILD_BUG_ON(sizeof(PowerParametersDescriptor) != 98);
+    QEMU_BUILD_BUG_ON(sizeof(InterconnectDescriptor) != 6);
+    QEMU_BUILD_BUG_ON(sizeof(StringDescriptor) != 254);
+    QEMU_BUILD_BUG_ON(sizeof(DeviceHealthDescriptor) != 45);
+    QEMU_BUILD_BUG_ON(sizeof(Flags) != 0x13);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuHeader) != 12);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuQuery) != 276);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuCmd) != 20);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuReq) != 288);
+    QEMU_BUILD_BUG_ON(sizeof(UfshcdSgEntry) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(RequestDescHeader) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(UtpTransferReqDesc) != 32);
+    QEMU_BUILD_BUG_ON(sizeof(UtpTaskReqDesc) != 80);
+    QEMU_BUILD_BUG_ON(sizeof(UtpCmdRsp) != 40);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuRsp) != 288);
+}
+#endif
diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
index 8c420fa36e..0ff6f87511 100644
--- a/include/chardev/char-fe.h
+++ b/include/chardev/char-fe.h
@@ -78,7 +78,7 @@ bool qemu_chr_fe_backend_open(CharBackend *be);
  *             is not supported and will not be attempted
  * @opaque: an opaque pointer for the callbacks
  * @context: a main loop context or NULL for the default
- * @set_open: whether to call qemu_chr_fe_set_open() implicitely when
+ * @set_open: whether to call qemu_chr_fe_set_open() implicitly when
  * any of the handler is non-NULL
  * @sync_state: whether to issue event callback with updated state
  *
@@ -138,7 +138,7 @@ void qemu_chr_fe_disconnect(CharBackend *be);
 /**
  * qemu_chr_fe_wait_connected:
  *
- * Wait for characted backend to be connected, return < 0 on error or
+ * Wait for character backend to be connected, return < 0 on error or
  * if no associated Chardev.
  */
 int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp);
@@ -175,6 +175,20 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
     G_GNUC_PRINTF(2, 3);
 
 
+/**
+ * FEWatchFunc: a #GSourceFunc called when any conditions requested by
+ *              qemu_chr_fe_add_watch() is satisfied.
+ * @do_not_use: depending on the underlying chardev, a GIOChannel or a
+ *              QIOChannel. DO NOT USE!
+ * @cond: bitwise combination of conditions watched and satisfied
+ *              before calling this callback.
+ * @data: user data passed at creation to qemu_chr_fe_add_watch(). Can
+ *              be NULL.
+ *
+ * Returns: G_SOURCE_REMOVE if the GSource should be removed from the
+ *              main loop, or G_SOURCE_CONTINUE to leave the GSource in
+ *              the main loop.
+ */
 typedef gboolean (*FEWatchFunc)(void *do_not_use, GIOCondition condition, void *data);
 
 /**
diff --git a/include/chardev/char.h b/include/chardev/char.h
index 44cd82e405..01df55f9e8 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -320,7 +320,4 @@ GSource *qemu_chr_timeout_add_ms(Chardev *chr, guint ms,
 void suspend_mux_open(void);
 void resume_mux_open(void);
 
-/* console.c */
-void qemu_chr_parse_vc(QemuOpts *opts, ChardevBackend *backend, Error **errp);
-
 #endif
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 709d4d226b..381f24c902 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -30,4 +30,11 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
 extern const uint8_t AES_sbox[256];
 extern const uint8_t AES_isbox[256];
 
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Td0[256];
+
 #endif
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index 214e58ca47..8756105f22 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -30,7 +30,7 @@ typedef struct QCryptoAkCipher QCryptoAkCipher;
  * qcrypto_akcipher_supports:
  * @opts: the asymmetric key algorithm and related options
  *
- * Determine if asymmetric key cipher decribed with @opts is
+ * Determine if asymmetric key cipher described with @opts is
  * supported by the current configured build
  *
  * Returns: true if it is supported, false otherwise.
diff --git a/include/crypto/clmul.h b/include/crypto/clmul.h
new file mode 100644
index 0000000000..446931fe05
--- /dev/null
+++ b/include/crypto/clmul.h
@@ -0,0 +1,83 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef CRYPTO_CLMUL_H
+#define CRYPTO_CLMUL_H
+
+#include "qemu/int128.h"
+#include "host/crypto/clmul.h"
+
+/**
+ * clmul_8x8_low:
+ *
+ * Perform eight 8x8->8 carry-less multiplies.
+ */
+uint64_t clmul_8x8_low(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_even:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The odd bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_even(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_odd:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The even bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_packed:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ */
+uint64_t clmul_8x4_packed(uint32_t, uint32_t);
+
+/**
+ * clmul_16x2_even:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The odd words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_even(uint64_t, uint64_t);
+
+/**
+ * clmul_16x2_odd:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The even words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_32:
+ *
+ * Perform a 32x32->64 carry-less multiply.
+ */
+uint64_t clmul_32(uint32_t, uint32_t);
+
+/**
+ * clmul_64:
+ *
+ * Perform a 64x64->128 carry-less multiply.
+ */
+Int128 clmul_64_gen(uint64_t, uint64_t);
+
+static inline Int128 clmul_64(uint64_t a, uint64_t b)
+{
+    if (HAVE_CLMUL_ACCEL) {
+        return clmul_64_accel(a, b);
+    } else {
+        return clmul_64_gen(a, b);
+    }
+}
+
+#endif /* CRYPTO_CLMUL_H */
diff --git a/include/crypto/ivgen.h b/include/crypto/ivgen.h
index e41521519c..a09d5732da 100644
--- a/include/crypto/ivgen.h
+++ b/include/crypto/ivgen.h
@@ -32,7 +32,7 @@
  * sector.
  *
  * <example>
- *   <title>Encrypting block data with initialiation vectors</title>
+ *   <title>Encrypting block data with initialization vectors</title>
  *   <programlisting>
  * uint8_t *data = ....data to encrypt...
  * size_t ndata = XXX;
@@ -147,7 +147,7 @@ QCryptoIVGen *qcrypto_ivgen_new(QCryptoIVGenAlgorithm alg,
  * @niv: the number of bytes in @iv
  * @errp: pointer to a NULL-initialized error object
  *
- * Calculate a new initialiation vector for the data
+ * Calculate a new initialization vector for the data
  * to be stored in sector @sector. The IV will be
  * written into the buffer @iv of size @niv.
  *
diff --git a/include/crypto/secret_common.h b/include/crypto/secret_common.h
index 42c7ff7af6..a0a22e1abd 100644
--- a/include/crypto/secret_common.h
+++ b/include/crypto/secret_common.h
@@ -48,13 +48,11 @@ struct QCryptoSecretCommonClass {
 };
 
 
-extern int qcrypto_secret_lookup(const char *secretid,
-                                 uint8_t **data,
-                                 size_t *datalen,
-                                 Error **errp);
-extern char *qcrypto_secret_lookup_as_utf8(const char *secretid,
-                                           Error **errp);
-extern char *qcrypto_secret_lookup_as_base64(const char *secretid,
-                                             Error **errp);
+int qcrypto_secret_lookup(const char *secretid,
+                          uint8_t **data,
+                          size_t *datalen,
+                          Error **errp);
+char *qcrypto_secret_lookup_as_utf8(const char *secretid, Error **errp);
+char *qcrypto_secret_lookup_as_base64(const char *secretid, Error **errp);
 
 #endif /* QCRYPTO_SECRET_COMMON_H */
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
index 9bd3ebc62e..382b26d922 100644
--- a/include/crypto/sm4.h
+++ b/include/crypto/sm4.h
@@ -2,5 +2,14 @@
 #define QEMU_SM4_H
 
 extern const uint8_t sm4_sbox[256];
+extern const uint32_t sm4_ck[32];
+
+static inline uint32_t sm4_subword(uint32_t word)
+{
+    return sm4_sbox[word & 0xff] |
+           sm4_sbox[(word >> 8) & 0xff] << 8 |
+           sm4_sbox[(word >> 16) & 0xff] << 16 |
+           sm4_sbox[(word >> 24) & 0xff] << 24;
+}
 
 #endif
diff --git a/include/elf.h b/include/elf.h
index ec9755e73b..e7259ec366 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1125,9 +1125,9 @@ typedef struct {
 #define EFA_PARISC_1_1      0x0210 /* PA-RISC 1.1 big-endian.  */
 #define EFA_PARISC_2_0      0x0214 /* PA-RISC 2.0 big-endian.  */
 
-/* Additional section indeces.  */
+/* Additional section indices.  */
 
-#define SHN_PARISC_ANSI_COMMON  0xff00   /* Section for tenatively declared
+#define SHN_PARISC_ANSI_COMMON  0xff00   /* Section for tentatively declared
                                               symbols in ANSI C.  */
 #define SHN_PARISC_HUGE_COMMON  0xff01   /* Common blocks in huge model.  */
 
diff --git a/include/exec/address-spaces.h b/include/exec/address-spaces.h
index db8bfa9a92..0d0aa61d68 100644
--- a/include/exec/address-spaces.h
+++ b/include/exec/address-spaces.h
@@ -19,8 +19,6 @@
  * you're one of them.
  */
 
-#include "exec/memory.h"
-
 #ifndef CONFIG_USER_ONLY
 
 /* Get the root memory region.  This interface should only be used temporarily
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index fb4c8d480f..350287852e 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -100,12 +100,12 @@
 typedef struct CPUTLBEntryFull {
     /*
      * @xlat_section contains:
-     *  - in the lower TARGET_PAGE_BITS, a physical section number
-     *  - with the lower TARGET_PAGE_BITS masked off, an offset which
-     *    must be added to the virtual address to obtain:
-     *     + the ram_addr_t of the target RAM (if the physical section
-     *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
-     *     + the offset within the target MemoryRegion (otherwise)
+     *  - For ram, an offset which must be added to the virtual address
+     *    to obtain the ram_addr_t of the target RAM
+     *  - For other memory regions,
+     *     + in the lower TARGET_PAGE_BITS, the physical section number
+     *     + with the TARGET_PAGE_BITS masked off, the offset within
+     *       the target MemoryRegion
      */
     hwaddr xlat_section;
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d02517e95f..b2f5cd4c2a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -21,7 +21,7 @@
 #define EXEC_ALL_H
 
 #include "cpu.h"
-#ifdef CONFIG_TCG
+#if defined(CONFIG_USER_ONLY)
 #include "exec/cpu_ldst.h"
 #endif
 #include "exec/translation-block.h"
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 68284428f8..ef23d65afc 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -235,6 +235,12 @@ typedef struct IOMMUTLBEvent {
 /* RAM is an mmap-ed named file */
 #define RAM_NAMED_FILE (1 << 9)
 
+/* RAM is mmap-ed read-only */
+#define RAM_READONLY (1 << 10)
+
+/* RAM FD is opened read-only */
+#define RAM_READONLY_FD (1 << 11)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -1089,6 +1095,7 @@ struct AddressSpace {
     struct FlatView *current_map;
 
     int ioeventfd_nb;
+    int ioeventfd_notifiers;
     struct MemoryRegionIoeventfd *ioeventfds;
     QTAILQ_HEAD(, MemoryListener) listeners;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
@@ -1331,10 +1338,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @align: alignment of the region base address; if 0, the default alignment
  *         (getpagesize()) will be used.
  * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *             RAM_NORESERVE,
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @path: the path in which to allocate the RAM.
  * @offset: offset within the file referenced by path
- * @readonly: true to open @path for reading, false for read/write.
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
@@ -1348,7 +1355,6 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       uint32_t ram_flags,
                                       const char *path,
                                       ram_addr_t offset,
-                                      bool readonly,
                                       Error **errp);
 
 /**
@@ -1360,7 +1366,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @name: the name of the region.
  * @size: size of the region.
  * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *             RAM_NORESERVE, RAM_PROTECTED.
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @fd: the fd to mmap.
  * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
diff --git a/include/exec/memory_ldst.h.inc b/include/exec/memory_ldst.h.inc
index 7c3a641f7e..92ad74e956 100644
--- a/include/exec/memory_ldst.h.inc
+++ b/include/exec/memory_ldst.h.inc
@@ -20,48 +20,48 @@
  */
 
 #ifdef TARGET_ENDIANNESS
-extern uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw, SUFFIX)(ARG1_DECL,
+void glue(address_space_stw, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
 #else
-extern uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
+uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stb, SUFFIX)(ARG1_DECL,
+void glue(address_space_stb, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
+void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
+void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq_le, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq_be, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
 #endif
 
diff --git a/include/exec/page-vary.h b/include/exec/page-vary.h
index ebbe9b169b..54ddde308a 100644
--- a/include/exec/page-vary.h
+++ b/include/exec/page-vary.h
@@ -27,8 +27,8 @@ typedef struct {
 } TargetPageBits;
 
 #ifdef IN_PAGE_VARY
-extern bool set_preferred_target_page_bits_common(int bits);
-extern void finalize_target_page_bits_common(int min);
+bool set_preferred_target_page_bits_common(int bits);
+void finalize_target_page_bits_common(int min);
 #endif
 
 /**
diff --git a/include/exec/poison.h b/include/exec/poison.h
index e94ee8dfef..1ea5633eb3 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -81,7 +81,6 @@
 #pragma GCC poison CONFIG_SPARC_DIS
 #pragma GCC poison CONFIG_XTENSA_DIS
 
-#pragma GCC poison CONFIG_HAX
 #pragma GCC poison CONFIG_HVF
 #pragma GCC poison CONFIG_LINUX_USER
 #pragma GCC poison CONFIG_KVM
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 9f2e3893f5..90676093f5 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -108,10 +108,10 @@ long qemu_maxrampagesize(void);
  *  @size: the size in bytes of the ram block
  *  @mr: the memory region where the ram block is
  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *              RAM_NORESERVE.
+ *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *              RAM_READONLY_FD
  *  @mem_path or @fd: specify the backing file or device
  *  @offset: Offset into target file
- *  @readonly: true to open @path for reading, false for read/write.
  *  @errp: pointer to Error*, to store an error if it happens
  *
  * Return:
@@ -120,10 +120,10 @@ long qemu_maxrampagesize(void);
  */
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
-                                   off_t offset, bool readonly, Error **errp);
+                                   off_t offset, Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
-                                 bool readonly, Error **errp);
+                                 Error **errp);
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp);
diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h
index 5119924927..b785751774 100644
--- a/include/exec/translation-block.h
+++ b/include/exec/translation-block.h
@@ -7,11 +7,11 @@
 #ifndef EXEC_TRANSLATION_BLOCK_H
 #define EXEC_TRANSLATION_BLOCK_H
 
-#include "qemu/atomic.h"
 #include "qemu/thread.h"
-#include "qemu/interval-tree.h"
 #include "exec/cpu-common.h"
-#include "exec/target_page.h"
+#ifdef CONFIG_USER_ONLY
+#include "qemu/interval-tree.h"
+#endif
 
 /*
  * Page tracking code uses ram addresses in system mode, and virtual
diff --git a/include/exec/translator.h b/include/exec/translator.h
index a53d3243d4..4e17c4f401 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -159,7 +159,7 @@ bool translator_use_goto_tb(DisasContextBase *db, vaddr dest);
  * translator_io_start
  * @db: Disassembly context
  *
- * If icount is enabled, set cpu->can_to_io, adjust db->is_jmp to
+ * If icount is enabled, set cpu->can_do_io, adjust db->is_jmp to
  * DISAS_TOO_MANY if it is still DISAS_NEXT, and return true.
  * Otherwise return false.
  */
diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h
index 6eedef48d8..2ebfecf58e 100644
--- a/include/exec/user/thunk.h
+++ b/include/exec/user/thunk.h
@@ -111,8 +111,7 @@ static inline int thunk_type_size(const argtype *type_ptr, int is_host)
         if (is_host) {
 #if defined(HOST_X86_64)
             return 8;
-#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \
-      defined(HOST_PARISC) || defined(HOST_SPARC64)
+#elif defined(HOST_MIPS) || defined(HOST_SPARC64)
             return 4;
 #elif defined(HOST_PPC)
             return sizeof(void *);
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index cd130564d8..eb64075b9c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -366,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status);
 bfloat16 float64_to_bfloat16(float64 a, float_status *status);
 float64 bfloat16_to_float64(bfloat16 a, float_status *status);
 
+int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode,
+                               int, float_status *status);
 int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
@@ -373,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
 int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 
+int8_t bfloat16_to_int8(bfloat16, float_status *status);
 int16_t bfloat16_to_int16(bfloat16, float_status *status);
 int32_t bfloat16_to_int32(bfloat16, float_status *status);
 int64_t bfloat16_to_int64(bfloat16, float_status *status);
 
+int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status);
 int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
 int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
 int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);
 
+uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode,
+                                 int, float_status *status);
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
@@ -388,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 
+uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);
 
+uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);
 
+bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status);
 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
+bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status);
 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);
 
+bfloat16 int8_to_bfloat16(int8_t a, float_status *status);
 bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
 bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
 bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
+bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status);
 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index d1fb08514b..ff2a310270 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -277,7 +277,7 @@ void free_aml_allocator(void);
  * @child: element that is copied into @parent_ctx context
  *
  * Joins Aml elements together and helps to construct AML tables
- * Examle of usage:
+ * Example of usage:
  *   Aml *table = aml_def_block("SSDT", ...);
  *   Aml *sb = aml_scope("\\_SB");
  *   Aml *dev = aml_device("PCI0");
diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h
index 31bc9191c3..8a654248e9 100644
--- a/include/hw/acpi/pc-hotplug.h
+++ b/include/hw/acpi/pc-hotplug.h
@@ -13,7 +13,7 @@
 #define PC_HOTPLUG_H
 
 /*
- * ONLY DEFINEs are permited in this file since it's shared
+ * ONLY DEFINEs are permitted in this file since it's shared
  * between C and ASL code.
  */
 
diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h
index dc8bb3433e..fb135d5bcb 100644
--- a/include/hw/acpi/vmgenid.h
+++ b/include/hw/acpi/vmgenid.h
@@ -13,7 +13,7 @@
 
 #define VMGENID_FW_CFG_SIZE      4096 /* Occupy a page of memory */
 #define VMGENID_GUID_OFFSET      40   /* allow space for
-                                       * OVMF SDT Header Probe Supressor
+                                       * OVMF SDT Header Probe Suppressor
                                        */
 
 OBJECT_DECLARE_SIMPLE_TYPE(VmGenIdState, VMGENID)
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 39ee31185c..7b419f88c2 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -32,6 +32,8 @@
 #include "hw/misc/xlnx-versal-crl.h"
 #include "hw/misc/xlnx-versal-pmc-iou-slcr.h"
 #include "hw/net/xlnx-versal-canfd.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+#include "hw/misc/xlnx-versal-cframe-reg.h"
 
 #define TYPE_XLNX_VERSAL "xlnx-versal"
 OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
@@ -46,6 +48,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
 #define XLNX_VERSAL_NR_IRQS    192
 #define XLNX_VERSAL_NR_CANFD   2
 #define XLNX_VERSAL_CANFD_REF_CLK (24 * 1000 * 1000)
+#define XLNX_VERSAL_NR_CFRAME  15
 
 struct Versal {
     /*< private >*/
@@ -117,6 +120,11 @@ struct Versal {
         XlnxEFuse efuse;
         XlnxVersalEFuseCtrl efuse_ctrl;
         XlnxVersalEFuseCache efuse_cache;
+        XlnxVersalCFUAPB cfu_apb;
+        XlnxVersalCFUFDRO cfu_fdro;
+        XlnxVersalCFUSFR cfu_sfr;
+        XlnxVersalCFrameReg cframe[XLNX_VERSAL_NR_CFRAME];
+        XlnxVersalCFrameBcastReg cframe_bcast;
 
         OrIRQState apb_irq_orgate;
     } pmc;
@@ -147,6 +155,7 @@ struct Versal {
 #define VERSAL_GEM1_WAKE_IRQ_0     59
 #define VERSAL_ADMA_IRQ_0          60
 #define VERSAL_XRAM_IRQ_0          79
+#define VERSAL_CFU_IRQ_0           120
 #define VERSAL_PMC_APB_IRQ         121
 #define VERSAL_OSPI_IRQ            124
 #define VERSAL_SD0_IRQ_0           126
@@ -240,6 +249,82 @@ struct Versal {
 #define MM_PMC_EFUSE_CACHE          0xf1250000
 #define MM_PMC_EFUSE_CACHE_SIZE     0x00C00
 
+#define MM_PMC_CFU_APB              0xf12b0000
+#define MM_PMC_CFU_APB_SIZE         0x10000
+#define MM_PMC_CFU_STREAM           0xf12c0000
+#define MM_PMC_CFU_STREAM_SIZE      0x1000
+#define MM_PMC_CFU_SFR              0xf12c1000
+#define MM_PMC_CFU_SFR_SIZE         0x1000
+#define MM_PMC_CFU_FDRO             0xf12c2000
+#define MM_PMC_CFU_FDRO_SIZE        0x1000
+#define MM_PMC_CFU_STREAM_2         0xf1f80000
+#define MM_PMC_CFU_STREAM_2_SIZE    0x40000
+
+#define MM_PMC_CFRAME0_REG          0xf12d0000
+#define MM_PMC_CFRAME0_REG_SIZE     0x1000
+#define MM_PMC_CFRAME0_FDRI         0xf12d1000
+#define MM_PMC_CFRAME0_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME1_REG          0xf12d2000
+#define MM_PMC_CFRAME1_REG_SIZE     0x1000
+#define MM_PMC_CFRAME1_FDRI         0xf12d3000
+#define MM_PMC_CFRAME1_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME2_REG          0xf12d4000
+#define MM_PMC_CFRAME2_REG_SIZE     0x1000
+#define MM_PMC_CFRAME2_FDRI         0xf12d5000
+#define MM_PMC_CFRAME2_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME3_REG          0xf12d6000
+#define MM_PMC_CFRAME3_REG_SIZE     0x1000
+#define MM_PMC_CFRAME3_FDRI         0xf12d7000
+#define MM_PMC_CFRAME3_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME4_REG          0xf12d8000
+#define MM_PMC_CFRAME4_REG_SIZE     0x1000
+#define MM_PMC_CFRAME4_FDRI         0xf12d9000
+#define MM_PMC_CFRAME4_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME5_REG          0xf12da000
+#define MM_PMC_CFRAME5_REG_SIZE     0x1000
+#define MM_PMC_CFRAME5_FDRI         0xf12db000
+#define MM_PMC_CFRAME5_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME6_REG          0xf12dc000
+#define MM_PMC_CFRAME6_REG_SIZE     0x1000
+#define MM_PMC_CFRAME6_FDRI         0xf12dd000
+#define MM_PMC_CFRAME6_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME7_REG          0xf12de000
+#define MM_PMC_CFRAME7_REG_SIZE     0x1000
+#define MM_PMC_CFRAME7_FDRI         0xf12df000
+#define MM_PMC_CFRAME7_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME8_REG          0xf12e0000
+#define MM_PMC_CFRAME8_REG_SIZE     0x1000
+#define MM_PMC_CFRAME8_FDRI         0xf12e1000
+#define MM_PMC_CFRAME8_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME9_REG          0xf12e2000
+#define MM_PMC_CFRAME9_REG_SIZE     0x1000
+#define MM_PMC_CFRAME9_FDRI         0xf12e3000
+#define MM_PMC_CFRAME9_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME10_REG         0xf12e4000
+#define MM_PMC_CFRAME10_REG_SIZE    0x1000
+#define MM_PMC_CFRAME10_FDRI        0xf12e5000
+#define MM_PMC_CFRAME10_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME11_REG         0xf12e6000
+#define MM_PMC_CFRAME11_REG_SIZE    0x1000
+#define MM_PMC_CFRAME11_FDRI        0xf12e7000
+#define MM_PMC_CFRAME11_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME12_REG         0xf12e8000
+#define MM_PMC_CFRAME12_REG_SIZE    0x1000
+#define MM_PMC_CFRAME12_FDRI        0xf12e9000
+#define MM_PMC_CFRAME12_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME13_REG         0xf12ea000
+#define MM_PMC_CFRAME13_REG_SIZE    0x1000
+#define MM_PMC_CFRAME13_FDRI        0xf12eb000
+#define MM_PMC_CFRAME13_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME14_REG         0xf12ec000
+#define MM_PMC_CFRAME14_REG_SIZE    0x1000
+#define MM_PMC_CFRAME14_FDRI        0xf12ed000
+#define MM_PMC_CFRAME14_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME_BCAST_REG       0xf12ee000
+#define MM_PMC_CFRAME_BCAST_REG_SIZE  0x1000
+#define MM_PMC_CFRAME_BCAST_FDRI      0xf12ef000
+#define MM_PMC_CFRAME_BCAST_FDRI_SIZE 0x1000
+
 #define MM_PMC_CRP                  0xf1260000U
 #define MM_PMC_CRP_SIZE             0x10000
 #define MM_PMC_RTC                  0xf12a0000
diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h
index 7198953702..de93756cbe 100644
--- a/include/hw/block/flash.h
+++ b/include/hw/block/flash.h
@@ -76,4 +76,8 @@ uint8_t ecc_digest(ECCState *s, uint8_t sample);
 void ecc_reset(ECCState *s);
 extern const VMStateDescription vmstate_ecc_state;
 
+/* m25p80.c */
+
+BlockBackend *m25p80_get_blk(DeviceState *dev);
+
 #endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 3b541ffd24..6c67af196a 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -158,7 +158,7 @@ typedef struct {
  *    any actions to be performed by hotplug handler.
  * @cpu_index_to_instance_props:
  *    used to provide @cpu_index to socket/core/thread number mapping, allowing
- *    legacy code to perform maping from cpu_index to topology properties
+ *    legacy code to perform mapping from cpu_index to topology properties
  *    Returns: tuple of socket/core/thread ids given cpu_index belongs to.
  *    used to provide @cpu_index to socket number mapping, allowing
  *    a machine to group CPU threads belonging to the same socket/package
@@ -211,10 +211,10 @@ typedef struct {
  *    the rejection.  If the hook is not provided, all hotplug will be
  *    allowed.
  * @default_ram_id:
- *    Specifies inital RAM MemoryRegion name to be used for default backend
+ *    Specifies initial RAM MemoryRegion name to be used for default backend
  *    creation if user explicitly hasn't specified backend with "memory-backend"
  *    property.
- *    It also will be used as a way to optin into "-m" option support.
+ *    It also will be used as a way to option into "-m" option support.
  *    If it's not set by board, '-m' will be ignored and generic code will
  *    not create default RAM MemoryRegion.
  * @fixup_ram_size:
diff --git a/include/hw/char/avr_usart.h b/include/hw/char/avr_usart.h
index 62eaa1528e..0cc599e9b1 100644
--- a/include/hw/char/avr_usart.h
+++ b/include/hw/char/avr_usart.h
@@ -34,7 +34,7 @@
 #define USART_BRRH 0x05
 #define USART_BRRL 0x04
 
-/* Relevant bits in regiters. */
+/* Relevant bits in registers. */
 #define USART_CSRA_RXC    (1 << 7)
 #define USART_CSRA_TXC    (1 << 6)
 #define USART_CSRA_DRE    (1 << 5)
diff --git a/include/hw/clock.h b/include/hw/clock.h
index 5c927cee7f..bb12117f67 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -204,7 +204,7 @@ static inline bool clock_set_ns(Clock *clk, unsigned ns)
  * Propagate the clock period that has been previously configured using
  * @clock_set(). This will update recursively all connected clocks.
  * It is an error to call this function on a clock which has a source.
- * Note: this function must not be called during device inititialization
+ * Note: this function must not be called during device initialization
  * or migration.
  */
 void clock_propagate(Clock *clk);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index fdcbe87352..648b5b3586 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -227,17 +227,6 @@ struct CPUWatchpoint {
     QTAILQ_ENTRY(CPUWatchpoint) entry;
 };
 
-#ifdef CONFIG_PLUGIN
-/*
- * For plugins we sometime need to save the resolved iotlb data before
- * the memory regions get moved around  by io_writex.
- */
-typedef struct SavedIOTLB {
-    MemoryRegionSection *section;
-    hwaddr mr_offset;
-} SavedIOTLB;
-#endif
-
 struct KVMState;
 struct kvm_run;
 
@@ -409,8 +398,6 @@ struct CPUState {
 
 #ifdef CONFIG_PLUGIN
     GArray *plugin_mem_cbs;
-    /* saved iotlb data from io_writex */
-    SavedIOTLB saved_iotlb;
 #endif
 
     /* TODO Move common fields from CPUArchState here. */
@@ -422,7 +409,7 @@ struct CPUState {
     int32_t exception_index;
 
     AccelCPUState *accel;
-    /* shared by kvm, hax and hvf */
+    /* shared by kvm and hvf */
     bool vcpu_dirty;
 
     /* Used to keep track of an outstanding cpu throttle thread for migration
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 3e8b1b737a..479713a36e 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -14,7 +14,7 @@
 
 struct TCGCPUOps {
     /**
-     * @initialize: Initalize TCG state
+     * @initialize: Initialize TCG state
      *
      * Called when the first CPU is realized.
      */
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index 56c9e7676e..4944725849 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -29,7 +29,7 @@ typedef struct PXBCXLDev PXBCXLDev;
 typedef struct CXLFixedWindow {
     uint64_t size;
     char **targets;
-    PXBCXLDev *target_hbs[8];
+    PXBCXLDev *target_hbs[16];
     uint8_t num_targets;
     uint8_t enc_int_ways;
     uint8_t enc_int_gran;
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 1978730fba..51cd0d9ce3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -208,7 +208,7 @@ void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type,
  * > is the maximum access size allowed for these registers. If this rule is not
  * > followed, the behavior is undefined
  *
- * CXL 2.0 Errata F4 states futher that the layouts in the specification are
+ * CXL 2.0 Errata F4 states further that the layouts in the specification are
  * shown as greater than 128 bits, but implementations are expected to
  * use any size of access up to 64 bits.
  *
@@ -300,7 +300,7 @@ REG64(CXL_MEM_DEV_STS, 0)
 typedef struct CXLError {
     QTAILQ_ENTRY(CXLError) node;
     int type; /* Error code as per FE definition */
-    uint32_t header[32];
+    uint32_t header[CXL_RAS_ERR_HEADER_NUM];
 } CXLError;
 
 typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
diff --git a/include/hw/hyperv/vmbus.h b/include/hw/hyperv/vmbus.h
index 8ea660dd8e..5c505852f2 100644
--- a/include/hw/hyperv/vmbus.h
+++ b/include/hw/hyperv/vmbus.h
@@ -51,7 +51,7 @@ struct VMBusDeviceClass {
     uint16_t channel_flags;
     uint16_t mmio_size_mb;
 
-    /* Extentions to standard device callbacks */
+    /* Extensions to standard device callbacks */
     void (*vmdev_realize)(VMBusDevice *vdev, Error **errp);
     void (*vmdev_unrealize)(VMBusDevice *vdev);
     void (*vmdev_reset)(VMBusDevice *vdev);
diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h
index 51c944efea..a064479e59 100644
--- a/include/hw/i2c/aspeed_i2c.h
+++ b/include/hw/i2c/aspeed_i2c.h
@@ -139,10 +139,11 @@ REG32(I2CD_CMD, 0x14) /* I2CD Command/Status */
 REG32(I2CD_DEV_ADDR, 0x18) /* Slave Device Address */
     SHARED_FIELD(SLAVE_DEV_ADDR1, 0, 7)
 REG32(I2CD_POOL_CTRL, 0x1C) /* Pool Buffer Control */
-    SHARED_FIELD(RX_COUNT, 24, 5)
+    SHARED_FIELD(RX_COUNT, 24, 6)
     SHARED_FIELD(RX_SIZE, 16, 5)
-    SHARED_FIELD(TX_COUNT, 9, 5)
+    SHARED_FIELD(TX_COUNT, 8, 5)
     FIELD(I2CD_POOL_CTRL, OFFSET, 2, 6) /* AST2400 */
+    SHARED_FIELD(BUF_ORGANIZATION, 0, 1) /* AST2600 */
 REG32(I2CD_BYTE_BUF, 0x20) /* Transmit/Receive Byte Buffer */
     SHARED_FIELD(RX_BUF, 8, 8)
     SHARED_FIELD(TX_BUF, 0, 8)
diff --git a/include/hw/i2c/npcm7xx_smbus.h b/include/hw/i2c/npcm7xx_smbus.h
index 3555e6836f..dc45963c0e 100644
--- a/include/hw/i2c/npcm7xx_smbus.h
+++ b/include/hw/i2c/npcm7xx_smbus.h
@@ -58,7 +58,7 @@ typedef enum NPCM7xxSMBusStatus {
  * @sclht: The SCL high time register.
  * @fif_ctl: The FIFO control register.
  * @fif_cts: The FIFO control status register.
- * @fair_per: The fair preriod register.
+ * @fair_per: The fair period register.
  * @txf_ctl: The transmit FIFO control register.
  * @t_out: The SMBus timeout register.
  * @txf_sts: The transmit FIFO status register.
diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
index 81573f6cfd..380cb27ded 100644
--- a/include/hw/i386/topology.h
+++ b/include/hw/i386/topology.h
@@ -31,7 +31,7 @@
  *
  * This code should be compatible with AMD's "Extended Method" described at:
  *   AMD CPUID Specification (Publication #25481)
- *   Section 3: Multiple Core Calcuation
+ *   Section 3: Multiple Core Calculation
  * as long as:
  *  nr_threads is set to 1;
  *  OFFSET_IDX is assumed to be 0;
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index acf887c83d..d3b763453a 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -77,6 +77,12 @@ struct NVDIMMDevice {
      */
     bool unarmed;
 
+    /*
+     * Whether our DIMM is backed by ROM, and even label data cannot be
+     * written. If set, implies that "unarmed" is also set.
+     */
+    bool readonly;
+
     /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
index 5c7c04eedf..7cb6018dbc 100644
--- a/include/hw/misc/aspeed_scu.h
+++ b/include/hw/misc/aspeed_scu.h
@@ -51,7 +51,7 @@ struct AspeedSCUState {
 
 #define ASPEED_IS_AST2500(si_rev)     ((((si_rev) >> 24) & 0xff) == 0x04)
 
-extern bool is_supported_silicon_rev(uint32_t silicon_rev);
+bool is_supported_silicon_rev(uint32_t silicon_rev);
 
 
 struct AspeedSCUClass {
diff --git a/include/hw/misc/auxbus.h b/include/hw/misc/auxbus.h
index b05799d2f7..03cacdee42 100644
--- a/include/hw/misc/auxbus.h
+++ b/include/hw/misc/auxbus.h
@@ -106,7 +106,7 @@ void aux_bus_realize(AUXBus *bus);
  *
  * Returns the reply of the request.
  *
- * @bus Ths bus where the request happen.
+ * @bus The bus where the request happen.
  * @cmd The command requested.
  * @address The 20bits address of the slave.
  * @len The length of the read or write.
diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h
index ba76afb52a..ceb12082ae 100644
--- a/include/hw/misc/macio/pmu.h
+++ b/include/hw/misc/macio/pmu.h
@@ -76,7 +76,7 @@
 #define PMU_INT_WAITING_CHARGER    0x01    /* ??? */
 #define PMU_INT_AUTO_SRQ_POLL      0x02    /* ??? */
 
-/* Bits in the environement message (either obtained via PMU_GET_COVER,
+/* Bits in the environment message (either obtained via PMU_GET_COVER,
  * or via PMU_INT_ENVIRONMENT on core99 */
 #define PMU_ENV_LID_CLOSED     0x01    /* The lid is closed */
 
diff --git a/include/hw/misc/xlnx-cfi-if.h b/include/hw/misc/xlnx-cfi-if.h
new file mode 100644
index 0000000000..f9bd12292d
--- /dev/null
+++ b/include/hw/misc/xlnx-cfi-if.h
@@ -0,0 +1,59 @@
+/*
+ * Xilinx CFI interface
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef XLNX_CFI_IF_H
+#define XLNX_CFI_IF_H 1
+
+#include "qemu/help-texts.h"
+#include "hw/hw.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_CFI_IF "xlnx-cfi-if"
+typedef struct XlnxCfiIfClass XlnxCfiIfClass;
+DECLARE_CLASS_CHECKERS(XlnxCfiIfClass, XLNX_CFI_IF, TYPE_XLNX_CFI_IF)
+
+#define XLNX_CFI_IF(obj) \
+     INTERFACE_CHECK(XlnxCfiIf, (obj), TYPE_XLNX_CFI_IF)
+
+typedef enum {
+    PACKET_TYPE_CFU = 0x52,
+    PACKET_TYPE_CFRAME = 0xA1,
+} xlnx_cfi_packet_type;
+
+typedef enum {
+    CFRAME_FAR = 1,
+    CFRAME_SFR = 2,
+    CFRAME_FDRI = 4,
+    CFRAME_CMD = 6,
+} xlnx_cfi_reg_addr;
+
+typedef struct XlnxCfiPacket {
+    uint8_t reg_addr;
+    uint32_t data[4];
+} XlnxCfiPacket;
+
+typedef struct XlnxCfiIf {
+    Object Parent;
+} XlnxCfiIf;
+
+typedef struct XlnxCfiIfClass {
+    InterfaceClass parent;
+
+    void (*cfi_transfer_packet)(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt);
+} XlnxCfiIfClass;
+
+/**
+ * Transfer a XlnxCfiPacket.
+ *
+ * @cfi_if: the object implementing this interface
+ * @XlnxCfiPacket: a pointer to the XlnxCfiPacket to transfer
+ */
+void xlnx_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt);
+
+#endif /* XLNX_CFI_IF_H */
diff --git a/include/hw/misc/xlnx-versal-cframe-reg.h b/include/hw/misc/xlnx-versal-cframe-reg.h
new file mode 100644
index 0000000000..a14fbd7fe4
--- /dev/null
+++ b/include/hw/misc/xlnx-versal-cframe-reg.h
@@ -0,0 +1,303 @@
+/*
+ * QEMU model of the Configuration Frame Control module
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * References:
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://www.xilinx.com/htmldocs/registers/am012/am012-versal-register-reference.html
+ */
+#ifndef HW_MISC_XLNX_VERSAL_CFRAME_REG_H
+#define HW_MISC_XLNX_VERSAL_CFRAME_REG_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/misc/xlnx-cfi-if.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+#include "qemu/fifo32.h"
+
+#define TYPE_XLNX_VERSAL_CFRAME_REG "xlnx,cframe-reg"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameReg, XLNX_VERSAL_CFRAME_REG)
+
+#define TYPE_XLNX_VERSAL_CFRAME_BCAST_REG "xlnx.cframe-bcast-reg"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameBcastReg,
+                           XLNX_VERSAL_CFRAME_BCAST_REG)
+
+/*
+ * The registers in this module are 128 bits wide but it is ok to write
+ * and read them through 4 sequential 32 bit accesses (address[3:2] = 0,
+ * 1, 2, 3).
+ */
+REG32(CRC0, 0x0)
+    FIELD(CRC, CRC, 0, 32)
+REG32(CRC1, 0x4)
+REG32(CRC2, 0x8)
+REG32(CRC3, 0xc)
+REG32(FAR0, 0x10)
+    FIELD(FAR0, SEGMENT, 23, 2)
+    FIELD(FAR0, BLOCKTYPE, 20, 3)
+    FIELD(FAR0, FRAME_ADDR, 0, 20)
+REG32(FAR1, 0x14)
+REG32(FAR2, 0x18)
+REG32(FAR3, 0x1c)
+REG32(FAR_SFR0, 0x20)
+    FIELD(FAR_SFR0, BLOCKTYPE, 20, 3)
+    FIELD(FAR_SFR0, FRAME_ADDR, 0, 20)
+REG32(FAR_SFR1, 0x24)
+REG32(FAR_SFR2, 0x28)
+REG32(FAR_SFR3, 0x2c)
+REG32(FDRI0, 0x40)
+REG32(FDRI1, 0x44)
+REG32(FDRI2, 0x48)
+REG32(FDRI3, 0x4c)
+REG32(FRCNT0, 0x50)
+    FIELD(FRCNT0, FRCNT, 0, 32)
+REG32(FRCNT1, 0x54)
+REG32(FRCNT2, 0x58)
+REG32(FRCNT3, 0x5c)
+REG32(CMD0, 0x60)
+    FIELD(CMD0, CMD, 0, 5)
+REG32(CMD1, 0x64)
+REG32(CMD2, 0x68)
+REG32(CMD3, 0x6c)
+REG32(CR_MASK0, 0x70)
+REG32(CR_MASK1, 0x74)
+REG32(CR_MASK2, 0x78)
+REG32(CR_MASK3, 0x7c)
+REG32(CTL0, 0x80)
+    FIELD(CTL, PER_FRAME_CRC, 0, 1)
+REG32(CTL1, 0x84)
+REG32(CTL2, 0x88)
+REG32(CTL3, 0x8c)
+REG32(CFRM_ISR0, 0x150)
+    FIELD(CFRM_ISR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_ISR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_ISR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_ISR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_ISR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_ISR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_ISR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_ISR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_ISR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_ISR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_ISR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_ISR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_ISR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_ISR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_ISR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_ISR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_ISR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_ISR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_ISR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_ISR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_ISR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_ISR1, 0x154)
+REG32(CFRM_ISR2, 0x158)
+REG32(CFRM_ISR3, 0x15c)
+REG32(CFRM_IMR0, 0x160)
+    FIELD(CFRM_IMR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IMR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IMR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IMR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IMR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IMR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IMR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IMR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IMR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IMR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IMR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IMR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IMR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IMR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IMR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IMR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IMR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IMR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IMR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IMR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IMR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IMR1, 0x164)
+REG32(CFRM_IMR2, 0x168)
+REG32(CFRM_IMR3, 0x16c)
+REG32(CFRM_IER0, 0x170)
+    FIELD(CFRM_IER0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IER0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IER0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IER0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IER0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IER0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IER0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IER0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IER0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IER0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IER0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IER0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IER0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IER0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IER0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IER0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IER0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IER0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IER0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IER0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IER0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IER1, 0x174)
+REG32(CFRM_IER2, 0x178)
+REG32(CFRM_IER3, 0x17c)
+REG32(CFRM_IDR0, 0x180)
+    FIELD(CFRM_IDR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IDR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IDR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IDR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IDR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IDR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IDR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IDR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IDR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IDR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IDR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IDR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IDR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IDR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IDR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IDR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IDR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IDR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IDR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IDR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IDR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IDR1, 0x184)
+REG32(CFRM_IDR2, 0x188)
+REG32(CFRM_IDR3, 0x18c)
+REG32(CFRM_ITR0, 0x190)
+    FIELD(CFRM_ITR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_ITR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_ITR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_ITR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_ITR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_ITR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_ITR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_ITR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_ITR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_ITR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_ITR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_ITR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_ITR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_ITR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_ITR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_ITR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_ITR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_ITR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_ITR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_ITR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_ITR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_ITR1, 0x194)
+REG32(CFRM_ITR2, 0x198)
+REG32(CFRM_ITR3, 0x19c)
+REG32(SEU_SYNDRM00, 0x1a0)
+REG32(SEU_SYNDRM01, 0x1a4)
+REG32(SEU_SYNDRM02, 0x1a8)
+REG32(SEU_SYNDRM03, 0x1ac)
+REG32(SEU_SYNDRM10, 0x1b0)
+REG32(SEU_SYNDRM11, 0x1b4)
+REG32(SEU_SYNDRM12, 0x1b8)
+REG32(SEU_SYNDRM13, 0x1bc)
+REG32(SEU_SYNDRM20, 0x1c0)
+REG32(SEU_SYNDRM21, 0x1c4)
+REG32(SEU_SYNDRM22, 0x1c8)
+REG32(SEU_SYNDRM23, 0x1cc)
+REG32(SEU_SYNDRM30, 0x1d0)
+REG32(SEU_SYNDRM31, 0x1d4)
+REG32(SEU_SYNDRM32, 0x1d8)
+REG32(SEU_SYNDRM33, 0x1dc)
+REG32(SEU_VIRTUAL_SYNDRM0, 0x1e0)
+REG32(SEU_VIRTUAL_SYNDRM1, 0x1e4)
+REG32(SEU_VIRTUAL_SYNDRM2, 0x1e8)
+REG32(SEU_VIRTUAL_SYNDRM3, 0x1ec)
+REG32(SEU_CRC0, 0x1f0)
+REG32(SEU_CRC1, 0x1f4)
+REG32(SEU_CRC2, 0x1f8)
+REG32(SEU_CRC3, 0x1fc)
+REG32(CFRAME_FAR_BOT0, 0x200)
+REG32(CFRAME_FAR_BOT1, 0x204)
+REG32(CFRAME_FAR_BOT2, 0x208)
+REG32(CFRAME_FAR_BOT3, 0x20c)
+REG32(CFRAME_FAR_TOP0, 0x210)
+REG32(CFRAME_FAR_TOP1, 0x214)
+REG32(CFRAME_FAR_TOP2, 0x218)
+REG32(CFRAME_FAR_TOP3, 0x21c)
+REG32(LAST_FRAME_BOT0, 0x220)
+    FIELD(LAST_FRAME_BOT0, BLOCKTYPE1_LAST_FRAME_LSB, 20, 12)
+    FIELD(LAST_FRAME_BOT0, BLOCKTYPE0_LAST_FRAME, 0, 20)
+REG32(LAST_FRAME_BOT1, 0x224)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE3_LAST_FRAME_LSB, 28, 4)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE2_LAST_FRAME, 8, 20)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE1_LAST_FRAME_MSB, 0, 8)
+REG32(LAST_FRAME_BOT2, 0x228)
+    FIELD(LAST_FRAME_BOT2, BLOCKTYPE3_LAST_FRAME_MSB, 0, 16)
+REG32(LAST_FRAME_BOT3, 0x22c)
+REG32(LAST_FRAME_TOP0, 0x230)
+    FIELD(LAST_FRAME_TOP0, BLOCKTYPE5_LAST_FRAME_LSB, 20, 12)
+    FIELD(LAST_FRAME_TOP0, BLOCKTYPE4_LAST_FRAME, 0, 20)
+REG32(LAST_FRAME_TOP1, 0x234)
+    FIELD(LAST_FRAME_TOP1, BLOCKTYPE6_LAST_FRAME, 8, 20)
+    FIELD(LAST_FRAME_TOP1, BLOCKTYPE5_LAST_FRAME_MSB, 0, 8)
+REG32(LAST_FRAME_TOP2, 0x238)
+REG32(LAST_FRAME_TOP3, 0x23c)
+
+#define CFRAME_REG_R_MAX (R_LAST_FRAME_TOP3 + 1)
+
+#define FRAME_NUM_QWORDS 25
+#define FRAME_NUM_WORDS (FRAME_NUM_QWORDS * 4) /* 25 * 128 bits */
+
+typedef struct XlnxCFrame {
+    uint32_t data[FRAME_NUM_WORDS];
+} XlnxCFrame;
+
+struct XlnxVersalCFrameReg {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    MemoryRegion iomem_fdri;
+    qemu_irq irq_cfrm_imr;
+
+    /* 128-bit wfifo.  */
+    uint32_t wfifo[WFIFO_SZ];
+
+    uint32_t regs[CFRAME_REG_R_MAX];
+    RegisterInfo regs_info[CFRAME_REG_R_MAX];
+
+    bool rowon;
+    bool wcfg;
+    bool rcfg;
+
+    GTree *cframes;
+    Fifo32 new_f_data;
+
+    struct {
+        XlnxCfiIf *cfu_fdro;
+        uint32_t blktype_num_frames[7];
+    } cfg;
+    bool row_configured;
+};
+
+struct XlnxVersalCFrameBcastReg {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_reg;
+    MemoryRegion iomem_fdri;
+
+    /* 128-bit wfifo. */
+    uint32_t wfifo[WFIFO_SZ];
+
+    struct {
+        XlnxCfiIf *cframe[15];
+    } cfg;
+};
+
+#endif
diff --git a/include/hw/misc/xlnx-versal-cfu.h b/include/hw/misc/xlnx-versal-cfu.h
new file mode 100644
index 0000000000..86fb841053
--- /dev/null
+++ b/include/hw/misc/xlnx-versal-cfu.h
@@ -0,0 +1,258 @@
+/*
+ * QEMU model of the CFU Configuration Unit.
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * References:
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://www.xilinx.com/htmldocs/registers/am012/am012-versal-register-reference.html
+ */
+#ifndef HW_MISC_XLNX_VERSAL_CFU_APB_H
+#define HW_MISC_XLNX_VERSAL_CFU_APB_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/misc/xlnx-cfi-if.h"
+#include "qemu/fifo32.h"
+
+#define TYPE_XLNX_VERSAL_CFU_APB "xlnx,versal-cfu-apb"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUAPB, XLNX_VERSAL_CFU_APB)
+
+#define TYPE_XLNX_VERSAL_CFU_FDRO "xlnx,versal-cfu-fdro"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUFDRO, XLNX_VERSAL_CFU_FDRO)
+
+#define TYPE_XLNX_VERSAL_CFU_SFR "xlnx,versal-cfu-sfr"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUSFR, XLNX_VERSAL_CFU_SFR)
+
+REG32(CFU_ISR, 0x0)
+    FIELD(CFU_ISR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_ISR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_ISR, SLVERR, 7, 1)
+    FIELD(CFU_ISR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_ISR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_ISR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_ISR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_ISR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_ISR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_ISR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IMR, 0x4)
+    FIELD(CFU_IMR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IMR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IMR, SLVERR, 7, 1)
+    FIELD(CFU_IMR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IMR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IMR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IMR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IMR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IMR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IMR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IER, 0x8)
+    FIELD(CFU_IER, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IER, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IER, SLVERR, 7, 1)
+    FIELD(CFU_IER, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IER, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IER, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IER, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IER, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IER, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IER, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IDR, 0xc)
+    FIELD(CFU_IDR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IDR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IDR, SLVERR, 7, 1)
+    FIELD(CFU_IDR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IDR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IDR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IDR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IDR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IDR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IDR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_ITR, 0x10)
+    FIELD(CFU_ITR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_ITR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_ITR, SLVERR, 7, 1)
+    FIELD(CFU_ITR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_ITR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_ITR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_ITR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_ITR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_ITR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_ITR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_PROTECT, 0x14)
+    FIELD(CFU_PROTECT, ACTIVE, 0, 1)
+REG32(CFU_FGCR, 0x18)
+    FIELD(CFU_FGCR, GCLK_CAL, 14, 1)
+    FIELD(CFU_FGCR, SC_HBC_TRIGGER, 13, 1)
+    FIELD(CFU_FGCR, GLOW, 12, 1)
+    FIELD(CFU_FGCR, GPWRDWN, 11, 1)
+    FIELD(CFU_FGCR, GCAP, 10, 1)
+    FIELD(CFU_FGCR, GSCWE, 9, 1)
+    FIELD(CFU_FGCR, GHIGH_B, 8, 1)
+    FIELD(CFU_FGCR, GMC_B, 7, 1)
+    FIELD(CFU_FGCR, GWE, 6, 1)
+    FIELD(CFU_FGCR, GRESTORE, 5, 1)
+    FIELD(CFU_FGCR, GTS_CFG_B, 4, 1)
+    FIELD(CFU_FGCR, GLUTMASK, 3, 1)
+    FIELD(CFU_FGCR, EN_GLOBS_B, 2, 1)
+    FIELD(CFU_FGCR, EOS, 1, 1)
+    FIELD(CFU_FGCR, INIT_COMPLETE, 0, 1)
+REG32(CFU_CTL, 0x1c)
+    FIELD(CFU_CTL, GSR_GSC, 15, 1)
+    FIELD(CFU_CTL, SLVERR_EN, 14, 1)
+    FIELD(CFU_CTL, CRC32_RESET, 13, 1)
+    FIELD(CFU_CTL, AXI_ERROR_EN, 12, 1)
+    FIELD(CFU_CTL, FLUSH_AXI, 11, 1)
+    FIELD(CFU_CTL, SSI_PER_SLR_PR, 10, 1)
+    FIELD(CFU_CTL, GCAP_CLK_EN, 9, 1)
+    FIELD(CFU_CTL, STATUS_SYNC_DISABLE, 8, 1)
+    FIELD(CFU_CTL, IGNORE_CFI_ERROR, 7, 1)
+    FIELD(CFU_CTL, CFRAME_DISABLE, 6, 1)
+    FIELD(CFU_CTL, QWORD_CNT_RESET, 5, 1)
+    FIELD(CFU_CTL, CRC8_DISABLE, 4, 1)
+    FIELD(CFU_CTL, CRC32_CHECK, 3, 1)
+    FIELD(CFU_CTL, DECOMPRESS, 2, 1)
+    FIELD(CFU_CTL, SEU_GO, 1, 1)
+    FIELD(CFU_CTL, CFI_LOCAL_RESET, 0, 1)
+REG32(CFU_CRAM_RW, 0x20)
+    FIELD(CFU_CRAM_RW, RFIFO_AFULL_DEPTH, 18, 9)
+    FIELD(CFU_CRAM_RW, RD_WAVE_CNT_LEFT, 12, 6)
+    FIELD(CFU_CRAM_RW, RD_WAVE_CNT, 6, 6)
+    FIELD(CFU_CRAM_RW, WR_WAVE_CNT, 0, 6)
+REG32(CFU_MASK, 0x28)
+REG32(CFU_CRC_EXPECT, 0x2c)
+REG32(CFU_CFRAME_LEFT_T0, 0x60)
+    FIELD(CFU_CFRAME_LEFT_T0, NUM, 0, 20)
+REG32(CFU_CFRAME_LEFT_T1, 0x64)
+    FIELD(CFU_CFRAME_LEFT_T1, NUM, 0, 20)
+REG32(CFU_CFRAME_LEFT_T2, 0x68)
+    FIELD(CFU_CFRAME_LEFT_T2, NUM, 0, 20)
+REG32(CFU_ROW_RANGE, 0x6c)
+    FIELD(CFU_ROW_RANGE, HALF_FSR, 5, 1)
+    FIELD(CFU_ROW_RANGE, NUM, 0, 5)
+REG32(CFU_STATUS, 0x100)
+    FIELD(CFU_STATUS, SEU_WRITE_ERROR, 30, 1)
+    FIELD(CFU_STATUS, FRCNT_ERROR, 29, 1)
+    FIELD(CFU_STATUS, RSVD_ERROR, 28, 1)
+    FIELD(CFU_STATUS, FDRO_ERROR, 27, 1)
+    FIELD(CFU_STATUS, FDRI_ERROR, 26, 1)
+    FIELD(CFU_STATUS, FDRI_READ_ERROR, 25, 1)
+    FIELD(CFU_STATUS, READ_FDRI_ERROR, 24, 1)
+    FIELD(CFU_STATUS, READ_SFR_ERROR, 23, 1)
+    FIELD(CFU_STATUS, READ_STREAM_ERROR, 22, 1)
+    FIELD(CFU_STATUS, UNKNOWN_STREAM_PKT, 21, 1)
+    FIELD(CFU_STATUS, USR_GTS, 20, 1)
+    FIELD(CFU_STATUS, USR_GSR, 19, 1)
+    FIELD(CFU_STATUS, AXI_BAD_WSTRB, 18, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AR_SIZE, 17, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AW_SIZE, 16, 1)
+    FIELD(CFU_STATUS, AXI_BAD_ARADDR, 15, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AWADDR, 14, 1)
+    FIELD(CFU_STATUS, SCAN_CLEAR_PASS, 13, 1)
+    FIELD(CFU_STATUS, HC_SEC_ERROR, 12, 1)
+    FIELD(CFU_STATUS, GHIGH_B_ISHIGH, 11, 1)
+    FIELD(CFU_STATUS, GHIGH_B_ISLOW, 10, 1)
+    FIELD(CFU_STATUS, GMC_B_ISHIGH, 9, 1)
+    FIELD(CFU_STATUS, GMC_B_ISLOW, 8, 1)
+    FIELD(CFU_STATUS, GPWRDWN_B_ISHIGH, 7, 1)
+    FIELD(CFU_STATUS, CFI_SEU_CRC_ERROR, 6, 1)
+    FIELD(CFU_STATUS, CFI_SEU_ECC_ERROR, 5, 1)
+    FIELD(CFU_STATUS, CFI_SEU_HEARTBEAT, 4, 1)
+    FIELD(CFU_STATUS, SCAN_CLEAR_DONE, 3, 1)
+    FIELD(CFU_STATUS, HC_COMPLETE, 2, 1)
+    FIELD(CFU_STATUS, CFI_CFRAME_BUSY, 1, 1)
+    FIELD(CFU_STATUS, CFU_STREAM_BUSY, 0, 1)
+REG32(CFU_INTERNAL_STATUS, 0x104)
+    FIELD(CFU_INTERNAL_STATUS, SSI_EOS, 22, 1)
+    FIELD(CFU_INTERNAL_STATUS, SSI_GWE, 21, 1)
+    FIELD(CFU_INTERNAL_STATUS, RFIFO_EMPTY, 20, 1)
+    FIELD(CFU_INTERNAL_STATUS, RFIFO_FULL, 19, 1)
+    FIELD(CFU_INTERNAL_STATUS, SEL_SFR, 18, 1)
+    FIELD(CFU_INTERNAL_STATUS, STREAM_CFRAME, 17, 1)
+    FIELD(CFU_INTERNAL_STATUS, FDRI_PHASE, 16, 1)
+    FIELD(CFU_INTERNAL_STATUS, CFI_PIPE_EN, 15, 1)
+    FIELD(CFU_INTERNAL_STATUS, AWFIFO_DCNT, 10, 5)
+    FIELD(CFU_INTERNAL_STATUS, WFIFO_DCNT, 5, 5)
+    FIELD(CFU_INTERNAL_STATUS, REPAIR_BUSY, 4, 1)
+    FIELD(CFU_INTERNAL_STATUS, TRIMU_BUSY, 3, 1)
+    FIELD(CFU_INTERNAL_STATUS, TRIMB_BUSY, 2, 1)
+    FIELD(CFU_INTERNAL_STATUS, HCLEANR_BUSY, 1, 1)
+    FIELD(CFU_INTERNAL_STATUS, HCLEAN_BUSY, 0, 1)
+REG32(CFU_QWORD_CNT, 0x108)
+REG32(CFU_CRC_LIVE, 0x10c)
+REG32(CFU_PENDING_READ_CNT, 0x110)
+    FIELD(CFU_PENDING_READ_CNT, NUM, 0, 25)
+REG32(CFU_FDRI_CNT, 0x114)
+REG32(CFU_ECO1, 0x118)
+REG32(CFU_ECO2, 0x11c)
+
+#define R_MAX (R_CFU_ECO2 + 1)
+
+#define NUM_STREAM 2
+#define WFIFO_SZ 4
+
+struct XlnxVersalCFUAPB {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    MemoryRegion iomem_stream[NUM_STREAM];
+    qemu_irq irq_cfu_imr;
+
+    /* 128-bit wfifo.  */
+    uint32_t wfifo[WFIFO_SZ];
+
+    uint32_t regs[R_MAX];
+    RegisterInfo regs_info[R_MAX];
+
+    uint8_t fdri_row_addr;
+
+    struct {
+        XlnxCfiIf *cframe[15];
+    } cfg;
+};
+
+
+struct XlnxVersalCFUFDRO {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_fdro;
+
+    Fifo32 fdro_data;
+};
+
+struct XlnxVersalCFUSFR {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_sfr;
+
+    /* 128-bit wfifo. */
+    uint32_t wfifo[WFIFO_SZ];
+
+    struct {
+        XlnxVersalCFUAPB *cfu;
+    } cfg;
+};
+
+/**
+ * This is a helper function for updating a CFI data write fifo, an array of 4
+ * uint32_t and 128 bits of data that are allowed to be written through 4
+ * sequential 32 bit accesses. After the last index has been written into the
+ * write fifo (wfifo), the data is copied to and returned in a secondary fifo
+ * provided to the function (wfifo_ret), and the write fifo is cleared
+ * (zeroized).
+ *
+ * @addr: the address used when calculating the wfifo array index to update
+ * @value: the value to write into the wfifo array
+ * @wfifo: the wfifo to update
+ * @wfifo_out: will return the wfifo data when all 128 bits have been written
+ *
+ * @return: true if all 128 bits have been updated.
+ */
+bool update_wfifo(hwaddr addr, uint64_t value,
+                  uint32_t *wfifo, uint32_t *wfifo_ret);
+
+#endif
diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h
index ed1bb52b0f..f7feddac9b 100644
--- a/include/hw/net/mii.h
+++ b/include/hw/net/mii.h
@@ -71,7 +71,7 @@
 #define MII_BMSR_JABBER     (1 << 1)  /* Jabber detected */
 #define MII_BMSR_EXTCAP     (1 << 0)  /* Ext-reg capability */
 
-#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymetric pause */
+#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymmetric pause */
 #define MII_ANAR_PAUSE      (1 << 10) /* Try for pause */
 #define MII_ANAR_TXFD       (1 << 8)
 #define MII_ANAR_TX         (1 << 7)
diff --git a/include/hw/nvram/npcm7xx_otp.h b/include/hw/nvram/npcm7xx_otp.h
index 156bbd151a..ea4b5d0731 100644
--- a/include/hw/nvram/npcm7xx_otp.h
+++ b/include/hw/nvram/npcm7xx_otp.h
@@ -73,7 +73,7 @@ typedef struct NPCM7xxOTPClass NPCM7xxOTPClass;
  * Each nibble of data is encoded into a byte, so the number of bytes written
  * to the array will be @len * 2.
  */
-extern void npcm7xx_otp_array_write(NPCM7xxOTPState *s, const void *data,
-                                    unsigned int offset, unsigned int len);
+void npcm7xx_otp_array_write(NPCM7xxOTPState *s, const void *data,
+                             unsigned int offset, unsigned int len);
 
 #endif /* NPCM7XX_OTP_H */
diff --git a/include/hw/pci-host/dino.h b/include/hw/pci-host/dino.h
index a1b0184940..fd7975c798 100644
--- a/include/hw/pci-host/dino.h
+++ b/include/hw/pci-host/dino.h
@@ -1,5 +1,5 @@
 /*
- * HP-PARISC Dino PCI chipset emulation, as in B160L and similiar machines
+ * HP-PARISC Dino PCI chipset emulation, as in B160L and similar machines
  *
  * (C) 2017-2019 by Helge Deller <deller@gmx.de>
  *
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 5b03a7b0eb..3778aac27b 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -47,8 +47,6 @@ typedef struct SpaprPciLsi {
     uint32_t irq;
 } SpaprPciLsi;
 
-typedef struct SpaprPhbPciNvGpuConfig SpaprPhbPciNvGpuConfig;
-
 struct SpaprPhbState {
     PCIHostState parent_obj;
 
@@ -90,9 +88,6 @@ struct SpaprPhbState {
     uint32_t mig_liobn;
     hwaddr mig_mem_win_addr, mig_mem_win_size;
     hwaddr mig_io_win_addr, mig_io_win_size;
-    hwaddr nv2_gpa_win_addr;
-    hwaddr nv2_atsd_win_addr;
-    SpaprPhbPciNvGpuConfig *nvgpus;
     bool pre_5_1_assoc;
 };
 
@@ -112,20 +107,6 @@ struct SpaprPhbState {
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
-#define SPAPR_PCI_NV2RAM64_WIN_BASE  SPAPR_PCI_LIMIT
-#define SPAPR_PCI_NV2RAM64_WIN_SIZE  (2 * TiB) /* For up to 6 GPUs 256GB each */
-
-/* Max number of NVLinks per GPU in any physical box */
-#define NVGPU_MAX_LINKS              3
-
-/*
- * GPU RAM starts at 64TiB so huge DMA window to cover it all ends at 128TiB
- * which is enough. We do not need DMA for ATSD so we put them at 128TiB.
- */
-#define SPAPR_PCI_NV2ATSD_WIN_BASE   (128 * TiB)
-#define SPAPR_PCI_NV2ATSD_WIN_SIZE   (NVGPU_MAX_NUM * NVGPU_MAX_LINKS * \
-                                      64 * KiB)
-
 int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
                  uint32_t intc_phandle, void *fdt, int *node_offset);
 
@@ -149,13 +130,6 @@ int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state);
 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option);
 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb);
 void spapr_phb_vfio_reset(DeviceState *qdev);
-void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp);
-void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
-void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
-                                 Error **errp);
-void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
-void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
-                                        SpaprPhbState *sphb);
 #else
 static inline bool spapr_phb_eeh_available(SpaprPhbState *sphb)
 {
@@ -182,25 +156,6 @@ static inline int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
 static inline void spapr_phb_vfio_reset(DeviceState *qdev)
 {
 }
-static inline void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
-{
-}
-static inline void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
-{
-}
-static inline void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt,
-                                               int bus_off, Error **errp)
-{
-}
-static inline void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb,
-                                                   void *fdt)
-{
-}
-static inline void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt,
-                                                      int offset,
-                                                      SpaprPhbState *sphb)
-{
-}
 #endif
 
 void spapr_phb_dma_reset(SpaprPhbState *sphb);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index abdc1ef103..b70a0b95ff 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -114,6 +114,7 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_NVME        0x0010
 #define PCI_DEVICE_ID_REDHAT_PVPANIC     0x0011
 #define PCI_DEVICE_ID_REDHAT_ACPI_ERST   0x0012
+#define PCI_DEVICE_ID_REDHAT_UFS         0x0013
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
 #define FMT_PCIBUS                      PRIx64
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index e4386ebb20..85469b9b53 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -26,6 +26,7 @@
 #define PCI_CLASS_STORAGE_SATA           0x0106
 #define PCI_CLASS_STORAGE_SAS            0x0107
 #define PCI_CLASS_STORAGE_EXPRESS        0x0108
+#define PCI_CLASS_STORAGE_UFS            0x0109
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
 #define PCI_BASE_CLASS_NETWORK           0x02
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 1234fdc4e2..4a9f0ea69d 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -40,7 +40,7 @@ struct PCIEAERLog {
      * The specified value will be clipped down to PCIE_AER_LOG_MAX_LIMIT
      * to avoid unreasonable memory usage.
      * I bet that 128 log size would be big enough, otherwise too many errors
-     * for system to function normaly. But could consecutive errors occur?
+     * for system to function normally. But could consecutive errors occur?
      */
 #define PCIE_AER_LOG_MAX_DEFAULT        8
 #define PCIE_AER_LOG_MAX_LIMIT          128
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
index bae8dafe16..9c6af8e207 100644
--- a/include/hw/ppc/openpic.h
+++ b/include/hw/ppc/openpic.h
@@ -14,7 +14,7 @@ enum {
     OPENPIC_OUTPUT_INT = 0, /* IRQ                       */
     OPENPIC_OUTPUT_CINT,    /* critical IRQ              */
     OPENPIC_OUTPUT_MCK,     /* Machine check event       */
-    OPENPIC_OUTPUT_DEBUG,   /* Inconditional debug event */
+    OPENPIC_OUTPUT_DEBUG,   /* Unconditional debug event */
     OPENPIC_OUTPUT_RESET,   /* Core reset event          */
     OPENPIC_OUTPUT_NB,
 };
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index e095c002dc..17a8dfc107 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -54,7 +54,8 @@ struct ppc_tb_t {
                                                */
 
 uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset);
-clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq);
+void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq);
+void cpu_ppc_tb_reset(CPUPPCState *env);
 void cpu_ppc_tb_free(CPUPPCState *env);
 void cpu_ppc_hdecr_init(CPUPPCState *env);
 void cpu_ppc_hdecr_exit(CPUPPCState *env);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 538b2dfb89..e91791a1a9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -103,11 +103,8 @@ typedef enum {
 
 #define FDT_MAX_SIZE                    0x200000
 
-/* Max number of GPUs per system */
-#define NVGPU_MAX_NUM              6
-
 /* Max number of NUMA nodes */
-#define NUMA_NODES_MAX_NUM         (MAX_NODES + NVGPU_MAX_NUM)
+#define NUMA_NODES_MAX_NUM         (MAX_NODES)
 
 /*
  * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from
@@ -160,8 +157,7 @@ struct SpaprMachineClass {
     bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio,
                           hwaddr *mmio32, hwaddr *mmio64,
-                          unsigned n_dma, uint32_t *liobns, hwaddr *nv2gpa,
-                          hwaddr *nv2atsd, Error **errp);
+                          unsigned n_dma, uint32_t *liobns, Error **errp);
     SpaprResizeHpt resize_hpt_default;
     SpaprCapabilities default_caps;
     SpaprIrq *irq;
@@ -197,13 +193,14 @@ struct SpaprMachineState {
     SpaprResizeHpt resize_hpt;
     void *htab;
     uint32_t htab_shift;
-    uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROC_TBL */
+    uint64_t patb_entry; /* Process tbl registered in H_REGISTER_PROC_TBL */
     SpaprPendingHpt *pending_hpt; /* in-progress resize */
 
     hwaddr rma_size;
     uint32_t fdt_size;
     uint32_t fdt_initial_size;
     void *fdt_blob;
+    uint8_t fdt_rng_seed[32];
     long kernel_size;
     bool kernel_le;
     uint64_t kernel_addr;
@@ -275,7 +272,6 @@ struct SpaprMachineState {
     bool cmd_line_caps[SPAPR_CAP_NUM];
     SpaprCapabilities def, eff, mig;
 
-    unsigned gpu_numa_id;
     SpaprTpmProxy *tpm_proxy;
 
     uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE];
@@ -1012,6 +1008,7 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
 #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
 
 void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
+void spapr_init_all_lpcrs(target_ulong value, target_ulong mask);
 hwaddr spapr_get_rtas_addr(void);
 bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
 
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 9f580a2699..f120874e0f 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -401,6 +401,7 @@ struct XiveRouterClass {
     int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
                      XiveNVT *nvt, uint8_t word_number);
     uint8_t (*get_block_id)(XiveRouter *xrtr);
+    void (*end_notify)(XiveRouter *xrtr, XiveEAS *eas);
 };
 
 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
@@ -414,6 +415,7 @@ int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
                           XiveNVT *nvt, uint8_t word_number);
 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
+void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas);
 
 /*
  * XIVE Presenter
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 884c726a87..151d968238 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -1086,7 +1086,7 @@ typedef enum MachineInitPhase {
     PHASE_MACHINE_READY,
 } MachineInitPhase;
 
-extern bool phase_check(MachineInitPhase phase);
-extern void phase_advance(MachineInitPhase phase);
+bool phase_check(MachineInitPhase phase);
+void phase_advance(MachineInitPhase phase);
 
 #endif
diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h
index bbc21cdc9a..912b4a2682 100644
--- a/include/hw/riscv/riscv_hart.h
+++ b/include/hw/riscv/riscv_hart.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2017 SiFive, Inc.
  *
- * Holds the state of a heterogenous array of RISC-V harts
+ * Holds the state of a heterogeneous array of RISC-V harts
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 3047adb2fc..2c8748fb9b 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -93,6 +93,9 @@ typedef struct {
 #define TYPE_SD_CARD "sd-card"
 OBJECT_DECLARE_TYPE(SDState, SDCardClass, SD_CARD)
 
+#define TYPE_SD_CARD_SPI "sd-card-spi"
+DECLARE_INSTANCE_CHECKER(SDState, SD_CARD_SPI, TYPE_SD_CARD_SPI)
+
 struct SDCardClass {
     /*< private >*/
     DeviceClass parent_class;
@@ -124,6 +127,8 @@ struct SDCardClass {
     void (*enable)(SDState *sd, bool enable);
     bool (*get_inserted)(SDState *sd);
     bool (*get_readonly)(SDState *sd);
+
+    const struct SDProto *proto;
 };
 
 #define TYPE_SD_BUS "sd-bus"
diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
index 6950f86810..3cdcbd5390 100644
--- a/include/hw/ssi/ssi.h
+++ b/include/hw/ssi/ssi.h
@@ -64,6 +64,9 @@ struct SSIPeripheral {
 
     /* Chip select state */
     bool cs;
+
+    /* Chip select index */
+    uint8_t cs_index;
 };
 
 extern const VMStateDescription vmstate_ssi_peripheral;
@@ -109,4 +112,6 @@ SSIBus *ssi_create_bus(DeviceState *parent, const char *name);
 
 uint32_t ssi_transfer(SSIBus *bus, uint32_t val);
 
+DeviceState *ssi_get_cs(SSIBus *bus, uint8_t cs_index);
+
 #endif
diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h
index 06bfd18312..1386d5ac8f 100644
--- a/include/hw/ssi/xilinx_spips.h
+++ b/include/hw/ssi/xilinx_spips.h
@@ -104,7 +104,7 @@ struct XlnxZynqMPQSPIPS {
 
     uint32_t regs[XLNX_ZYNQMP_SPIPS_R_MAX];
 
-    /* GQSPI has seperate tx/rx fifos */
+    /* GQSPI has separate tx/rx fifos */
     Fifo8 rx_fifo_g;
     Fifo8 tx_fifo_g;
     Fifo32 fifo_g;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..e9b8954595 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -230,6 +230,8 @@ void vfio_unblock_multiple_devices_migration(void);
 bool vfio_viommu_preset(VFIODevice *vbasedev);
 int64_t vfio_mig_bytes_transferred(void);
 void vfio_reset_bytes_transferred(void);
+bool vfio_device_state_is_running(VFIODevice *vbasedev);
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
 
 #ifdef CONFIG_LINUX
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 5f5dcb4572..e07a723027 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -109,7 +109,7 @@ typedef struct VirtioNetRscSeg {
     size_t size;
     uint16_t packets;
     uint16_t dup_ack;
-    bool is_coalesced;      /* need recal ipv4 header checksum, mark here */
+    bool is_coalesced;      /* need recall ipv4 header checksum, mark here */
     VirtioNetRscUnit unit;
     NetClientState *nc;
 } VirtioNetRscSeg;
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index 513c428fe4..ab15577d38 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -124,7 +124,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc,
  * qio_channel_socket_listen_sync:
  * @ioc: the socket channel object
  * @addr: the address to listen to
- * @num: the expected ammount of connections
+ * @num: the expected amount of connections
  * @errp: pointer to a NULL-initialized error object
  *
  * Attempt to listen to the address @addr. This method
@@ -141,7 +141,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc,
  * qio_channel_socket_listen_async:
  * @ioc: the socket channel object
  * @addr: the address to listen to
- * @num: the expected ammount of connections
+ * @num: the expected amount of connections
  * @callback: the function to invoke on completion
  * @opaque: user data to pass to @callback
  * @destroy: the function to free @opaque
diff --git a/include/io/channel-util.h b/include/io/channel-util.h
index a5d720d9a0..fa18a3756d 100644
--- a/include/io/channel-util.h
+++ b/include/io/channel-util.h
@@ -49,4 +49,27 @@
 QIOChannel *qio_channel_new_fd(int fd,
                                Error **errp);
 
+/**
+ * qio_channel_util_set_aio_fd_handler:
+ * @read_fd: the file descriptor for the read handler
+ * @read_ctx: the AioContext for the read handler
+ * @io_read: the read handler
+ * @write_fd: the file descriptor for the write handler
+ * @write_ctx: the AioContext for the write handler
+ * @io_write: the write handler
+ * @opaque: the opaque argument to the read and write handler
+ *
+ * Set the read and write handlers when @read_ctx and @write_ctx are non-NULL,
+ * respectively. To leave a handler in its current state, pass a NULL
+ * AioContext. To clear a handler, pass a non-NULL AioContext and a NULL
+ * handler.
+ */
+void qio_channel_util_set_aio_fd_handler(int read_fd,
+                                         AioContext *read_ctx,
+                                         IOHandler *io_read,
+                                         int write_fd,
+                                         AioContext *write_ctx,
+                                         IOHandler *io_write,
+                                         void *opaque);
+
 #endif /* QIO_CHANNEL_UTIL_H */
diff --git a/include/io/channel.h b/include/io/channel.h
index 229bf36910..5f9dbaab65 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -81,9 +81,11 @@ struct QIOChannel {
     Object parent;
     unsigned int features; /* bitmask of QIOChannelFeatures */
     char *name;
-    AioContext *ctx;
+    AioContext *read_ctx;
     Coroutine *read_coroutine;
+    AioContext *write_ctx;
     Coroutine *write_coroutine;
+    bool follow_coroutine_ctx;
 #ifdef _WIN32
     HANDLE event; /* For use with GSource on Win32 */
 #endif
@@ -140,8 +142,9 @@ struct QIOChannelClass {
                      int whence,
                      Error **errp);
     void (*io_set_aio_fd_handler)(QIOChannel *ioc,
-                                  AioContext *ctx,
+                                  AioContext *read_ctx,
                                   IOHandler *io_read,
+                                  AioContext *write_ctx,
                                   IOHandler *io_write,
                                   void *opaque);
     int (*io_flush)(QIOChannel *ioc,
@@ -498,6 +501,21 @@ int qio_channel_set_blocking(QIOChannel *ioc,
                              bool enabled,
                              Error **errp);
 
+/**
+ * qio_channel_set_follow_coroutine_ctx:
+ * @ioc: the channel object
+ * @enabled: whether or not to follow the coroutine's AioContext
+ *
+ * If @enabled is true, calls to qio_channel_yield() use the current
+ * coroutine's AioContext. Usually this is desirable.
+ *
+ * If @enabled is false, calls to qio_channel_yield() use the global iohandler
+ * AioContext. This is may be used by coroutines that run in the main loop and
+ * do not wish to respond to I/O during nested event loops. This is the
+ * default for compatibility with code that is not aware of AioContexts.
+ */
+void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled);
+
 /**
  * qio_channel_close:
  * @ioc: the channel object
@@ -703,41 +721,6 @@ GSource *qio_channel_add_watch_source(QIOChannel *ioc,
                                       GDestroyNotify notify,
                                       GMainContext *context);
 
-/**
- * qio_channel_attach_aio_context:
- * @ioc: the channel object
- * @ctx: the #AioContext to set the handlers on
- *
- * Request that qio_channel_yield() sets I/O handlers on
- * the given #AioContext.  If @ctx is %NULL, qio_channel_yield()
- * uses QEMU's main thread event loop.
- *
- * You can move a #QIOChannel from one #AioContext to another even if
- * I/O handlers are set for a coroutine.  However, #QIOChannel provides
- * no synchronization between the calls to qio_channel_yield() and
- * qio_channel_attach_aio_context().
- *
- * Therefore you should first call qio_channel_detach_aio_context()
- * to ensure that the coroutine is not entered concurrently.  Then,
- * while the coroutine has yielded, call qio_channel_attach_aio_context(),
- * and then aio_co_schedule() to place the coroutine on the new
- * #AioContext.  The calls to qio_channel_detach_aio_context()
- * and qio_channel_attach_aio_context() should be protected with
- * aio_context_acquire() and aio_context_release().
- */
-void qio_channel_attach_aio_context(QIOChannel *ioc,
-                                    AioContext *ctx);
-
-/**
- * qio_channel_detach_aio_context:
- * @ioc: the channel object
- *
- * Disable any I/O handlers set by qio_channel_yield().  With the
- * help of aio_co_schedule(), this allows moving a coroutine that was
- * paused by qio_channel_yield() to another context.
- */
-void qio_channel_detach_aio_context(QIOChannel *ioc);
-
 /**
  * qio_channel_yield:
  * @ioc: the channel object
@@ -785,8 +768,9 @@ void qio_channel_wait(QIOChannel *ioc,
 /**
  * qio_channel_set_aio_fd_handler:
  * @ioc: the channel object
- * @ctx: the AioContext to set the handlers on
+ * @read_ctx: the AioContext to set the read handler on or NULL
  * @io_read: the read handler
+ * @write_ctx: the AioContext to set the write handler on or NULL
  * @io_write: the write handler
  * @opaque: the opaque value passed to the handler
  *
@@ -794,10 +778,17 @@ void qio_channel_wait(QIOChannel *ioc,
  * be used by channel implementations to forward the handlers
  * to another channel (e.g. from #QIOChannelTLS to the
  * underlying socket).
+ *
+ * When @read_ctx is NULL, don't touch the read handler. When @write_ctx is
+ * NULL, don't touch the write handler. Note that setting the read handler
+ * clears the write handler, and vice versa, if they share the same AioContext.
+ * Therefore the caller must pass both handlers together when sharing the same
+ * AioContext.
  */
 void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
-                                    AioContext *ctx,
+                                    AioContext *read_ctx,
                                     IOHandler *io_read,
+                                    AioContext *write_ctx,
                                     IOHandler *io_write,
                                     void *opaque);
 
diff --git a/include/io/task.h b/include/io/task.h
index beec4f5cfd..dc7d32ebd0 100644
--- a/include/io/task.h
+++ b/include/io/task.h
@@ -145,7 +145,7 @@ typedef void (*QIOTaskWorker)(QIOTask *task,
  * The QIOTask module can also be used to perform operations
  * in a background thread context, while still reporting the
  * results in the main event thread. This allows code which
- * cannot easily be rewritten to be asychronous (such as DNS
+ * cannot easily be rewritten to be asynchronous (such as DNS
  * lookups) to be easily run non-blocking. Reporting the
  * results in the main thread context means that the caller
  * typically does not need to be concerned about thread
diff --git a/include/migration/register.h b/include/migration/register.h
index 90914f32f5..2b12c6adec 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -20,6 +20,11 @@ typedef struct SaveVMHandlers {
     /* This runs inside the iothread lock.  */
     SaveStateHandler *save_state;
 
+    /*
+     * save_prepare is called early, even before migration starts, and can be
+     * used to perform early checks.
+     */
+    int (*save_prepare)(void *opaque, Error **errp);
     void (*save_cleanup)(void *opaque);
     int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
     int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index d1b8abe08d..e4db910339 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -41,9 +41,11 @@ typedef struct VMStateField VMStateField;
  */
 struct VMStateInfo {
     const char *name;
-    int (*get)(QEMUFile *f, void *pv, size_t size, const VMStateField *field);
-    int (*put)(QEMUFile *f, void *pv, size_t size, const VMStateField *field,
-               JSONWriter *vmdesc);
+    int coroutine_mixed_fn (*get)(QEMUFile *f, void *pv, size_t size,
+                                  const VMStateField *field);
+    int coroutine_mixed_fn (*put)(QEMUFile *f, void *pv, size_t size,
+                                  const VMStateField *field,
+                                  JSONWriter *vmdesc);
 };
 
 enum VMStateFlags {
diff --git a/include/net/net.h b/include/net/net.h
index 1448d00afb..330d285930 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -54,11 +54,12 @@ typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
 typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasUso)(NetClientState *);
 typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
-typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
 typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
 typedef int (SetVnetLE)(NetClientState *, bool);
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
     QueryRxFilter *query_rx_filter;
     NetPoll *poll;
     HasUfo *has_ufo;
+    HasUso *has_uso;
     HasVnetHdr *has_vnet_hdr;
     HasVnetHdrLen *has_vnet_hdr_len;
     GetUsingVnetHdr *get_using_vnet_hdr;
@@ -187,12 +189,13 @@ void qemu_set_info_str(NetClientState *nc,
                        const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
 bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_uso(NetClientState *nc);
 bool qemu_has_vnet_hdr(NetClientState *nc);
 bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
 void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                      int ecn, int ufo);
+                      int ecn, int ufo, int uso4, int uso6);
 int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 35325f1995..b11161555b 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -25,6 +25,9 @@
 #endif
 
 /* Leaf 1, %ecx */
+#ifndef bit_PCLMUL
+#define bit_PCLMUL      (1 << 1)
+#endif
 #ifndef bit_SSE4_1
 #define bit_SSE4_1      (1 << 19)
 #endif
diff --git a/include/qemu/crc-ccitt.h b/include/qemu/crc-ccitt.h
index d6eb49146d..8918dafe07 100644
--- a/include/qemu/crc-ccitt.h
+++ b/include/qemu/crc-ccitt.h
@@ -17,8 +17,8 @@
 extern uint16_t const crc_ccitt_table[256];
 extern uint16_t const crc_ccitt_false_table[256];
 
-extern uint16_t crc_ccitt(uint16_t crc, const uint8_t *buffer, size_t len);
-extern uint16_t crc_ccitt_false(uint16_t crc, const uint8_t *buffer, size_t len);
+uint16_t crc_ccitt(uint16_t crc, const uint8_t *buffer, size_t len);
+uint16_t crc_ccitt_false(uint16_t crc, const uint8_t *buffer, size_t len);
 
 static inline uint16_t crc_ccitt_byte(uint16_t crc, const uint8_t c)
 {
diff --git a/include/qemu/fifo8.h b/include/qemu/fifo8.h
index 28bf2cee57..16be02f361 100644
--- a/include/qemu/fifo8.h
+++ b/include/qemu/fifo8.h
@@ -46,7 +46,7 @@ void fifo8_push(Fifo8 *fifo, uint8_t data);
  * fifo8_push_all:
  * @fifo: FIFO to push to
  * @data: data to push
- * @size: number of bytes to push
+ * @num: number of bytes to push
  *
  * Push a byte array to the FIFO. Behaviour is undefined if the FIFO is full.
  * Clients are responsible for checking the space left in the FIFO using
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 011618373e..ead97d354d 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -56,6 +56,11 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
+{
+    return ((__int128_t)a * b + c - 1) / c;
+}
+
 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
                                uint64_t divisor)
 {
@@ -83,7 +88,8 @@ void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
 
-static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c,
+                                  bool round_up)
 {
     union {
         uint64_t ll;
@@ -99,12 +105,25 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 
     u.ll = a;
     rl = (uint64_t)u.l.low * (uint64_t)b;
+    if (round_up) {
+        rl += c - 1;
+    }
     rh = (uint64_t)u.l.high * (uint64_t)b;
     rh += (rl >> 32);
     res.l.high = rh / c;
     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
     return res.ll;
 }
+
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    return muldiv64_rounding(a, b, c, false);
+}
+
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
+{
+    return muldiv64_rounding(a, b, c, true);
+}
 #endif
 
 /**
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
index 8528e5c98f..2a10a7052e 100644
--- a/include/qemu/iova-tree.h
+++ b/include/qemu/iova-tree.h
@@ -15,7 +15,7 @@
  * Currently the iova tree will only allow to keep ranges
  * information, and no extra user data is allowed for each element.  A
  * benefit is that we can merge adjacent ranges internally within the
- * tree.  It can save a lot of memory when the ranges are splitted but
+ * tree.  It can save a lot of memory when the ranges are split but
  * mostly continuous.
  *
  * Note that current implementation does not provide any thread
@@ -128,7 +128,7 @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
  * iova_tree_foreach:
  *
  * @tree: the iova tree to iterate on
- * @iterator: the interator for the mappings, return true to stop
+ * @iterator: the iterator for the mappings, return true to stop
  *
  * Iterate over the iova tree.
  *
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 2cae135280..2897720fac 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -250,7 +250,7 @@ extern "C" {
  * supports QEMU_ERROR, this will be reported at compile time; otherwise
  * this will be reported at link time due to the missing symbol.
  */
-G_NORETURN extern
+G_NORETURN
 void QEMU_ERROR("code path is reachable")
     qemu_build_not_reached_always(void);
 #if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
@@ -506,7 +506,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
  * See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for discussion
  * about Solaris missing the madvise() prototype.
  */
-extern int madvise(char *, size_t, int);
+int madvise(char *, size_t, int);
 #endif
 
 #if defined(CONFIG_LINUX)
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
index 43165f2452..71c1123308 100644
--- a/include/qemu/plugin-memory.h
+++ b/include/qemu/plugin-memory.h
@@ -15,15 +15,8 @@
 struct qemu_plugin_hwaddr {
     bool is_io;
     bool is_store;
-    union {
-        struct {
-            MemoryRegionSection *section;
-            hwaddr    offset;
-        } io;
-        struct {
-            void *hostaddr;
-        } ram;
-    } v;
+    hwaddr phys_addr;
+    MemoryRegion *mr;
 };
 
 /**
diff --git a/include/qemu/processor.h b/include/qemu/processor.h
index 8e16c9277d..9f0dcdf28f 100644
--- a/include/qemu/processor.h
+++ b/include/qemu/processor.h
@@ -7,8 +7,6 @@
 #ifndef QEMU_PROCESSOR_H
 #define QEMU_PROCESSOR_H
 
-#include "qemu/atomic.h"
-
 #if defined(__i386__) || defined(__x86_64__)
 # define cpu_relax() asm volatile("rep; nop" ::: "memory")
 
diff --git a/include/qemu/qemu-options.h b/include/qemu/qemu-options.h
deleted file mode 100644
index 4a62c83c45..0000000000
--- a/include/qemu/qemu-options.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * qemu-options.h
- *
- * Defines needed for command line argument processing.
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_OPTIONS_H
-#define QEMU_OPTIONS_H
-
-enum {
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
-    opt_enum,
-#define DEFHEADING(text)
-#define ARCHHEADING(text, arch_mask)
-
-#include "qemu-options.def"
-};
-
-#endif
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 661c1a1468..fea058aa9f 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -118,19 +118,19 @@ static inline void rcu_read_unlock(void)
     }
 }
 
-extern void synchronize_rcu(void);
+void synchronize_rcu(void);
 
 /*
  * Reader thread registration.
  */
-extern void rcu_register_thread(void);
-extern void rcu_unregister_thread(void);
+void rcu_register_thread(void);
+void rcu_unregister_thread(void);
 
 /*
  * Support for fork().  fork() support is enabled at startup.
  */
-extern void rcu_enable_atfork(void);
-extern void rcu_disable_atfork(void);
+void rcu_enable_atfork(void);
+void rcu_disable_atfork(void);
 
 struct rcu_head;
 typedef void RCUCBFunc(struct rcu_head *head);
@@ -140,8 +140,8 @@ struct rcu_head {
     RCUCBFunc *func;
 };
 
-extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
-extern void drain_call_rcu(void);
+void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
+void drain_call_rcu(void);
 
 /* The operands of the minus operator must have the same type,
  * which must be the one that we specify in the cast.
diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h
index 7d938945cb..1690a74f4b 100644
--- a/include/qemu/selfmap.h
+++ b/include/qemu/selfmap.h
@@ -20,10 +20,10 @@ typedef struct {
     bool is_exec;
     bool is_priv;
 
+    dev_t dev;
+    ino_t inode;
     uint64_t offset;
-    uint64_t inode;
     const char *path;
-    char dev[];
 } MapInfo;
 
 /**
diff --git a/include/qemu/sys_membarrier.h b/include/qemu/sys_membarrier.h
index b5bfa21d52..e7774891f8 100644
--- a/include/qemu/sys_membarrier.h
+++ b/include/qemu/sys_membarrier.h
@@ -14,8 +14,8 @@
  * side.  The slow side forces processor-level ordering on all other cores
  * through a system call.
  */
-extern void smp_mb_global_init(void);
-extern void smp_mb_global(void);
+void smp_mb_global_init(void);
+void smp_mb_global(void);
 #define smp_mb_placeholder()       barrier()
 #else
 /* Keep it simple, execute a real memory barrier on both sides.  */
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index 05f6346137..181245d29b 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -99,13 +99,18 @@ typedef struct ThrottleState {
     int64_t previous_leak;    /* timestamp of the last leak done */
 } ThrottleState;
 
+typedef enum {
+    THROTTLE_READ = 0,
+    THROTTLE_WRITE,
+    THROTTLE_MAX
+} ThrottleDirection;
+
 typedef struct ThrottleTimers {
-    QEMUTimer *timers[2];     /* timers used to do the throttling */
+    QEMUTimer *timers[THROTTLE_MAX];    /* timers used to do the throttling */
     QEMUClockType clock_type; /* the clock used */
 
     /* Callbacks */
-    QEMUTimerCB *read_timer_cb;
-    QEMUTimerCB *write_timer_cb;
+    QEMUTimerCB *timer_cb[THROTTLE_MAX];
     void *timer_opaque;
 } ThrottleTimers;
 
@@ -149,9 +154,10 @@ void throttle_config_init(ThrottleConfig *cfg);
 /* usage */
 bool throttle_schedule_timer(ThrottleState *ts,
                              ThrottleTimers *tt,
-                             bool is_write);
+                             ThrottleDirection direction);
 
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size);
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
+                      uint64_t size);
 void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
                                Error **errp);
 void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 834b0e47a0..5abdbc3874 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -129,7 +129,6 @@ typedef struct QString QString;
 typedef struct RAMBlock RAMBlock;
 typedef struct Range Range;
 typedef struct ReservedRegion ReservedRegion;
-typedef struct SavedIOTLB SavedIOTLB;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
 typedef struct TCGHelperInfo TCGHelperInfo;
diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index b43f35a6a6..1855b764f2 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h
@@ -96,8 +96,8 @@ typedef struct QueryParams {
   QueryParam *p;       /* array of parameters */
 } QueryParams;
 
-struct QueryParams *query_params_new (int init_alloc);
-extern QueryParams *query_params_parse (const char *query);
-extern void query_params_free (QueryParams *ps);
+QueryParams *query_params_new(int init_alloc);
+QueryParams *query_params_parse(const char *query);
+void query_params_free(QueryParams *ps);
 
 #endif /* QEMU_URI_H */
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
index b1c1cda886..64ad701015 100644
--- a/include/qemu/vhost-user-server.h
+++ b/include/qemu/vhost-user-server.h
@@ -43,6 +43,7 @@ typedef struct {
     unsigned int in_flight; /* atomic */
 
     /* Protected by ctx lock */
+    bool in_qio_channel_yield;
     bool wait_idle;
     VuDev vu_dev;
     QIOChannel *ioc; /* The I/O channel with the client */
diff --git a/include/qemu/yank.h b/include/qemu/yank.h
index 5375a1f195..1907150933 100644
--- a/include/qemu/yank.h
+++ b/include/qemu/yank.h
@@ -25,7 +25,7 @@ typedef void (YankFn)(void *opaque);
  * @instance: The instance.
  * @errp: Error object.
  *
- * Returns true on success or false if an error occured.
+ * Returns true on success or false if an error occurred.
  */
 bool yank_register_instance(const YankInstance *instance, Error **errp);
 
diff --git a/include/scsi/constants.h b/include/scsi/constants.h
index 6a8bad556a..9b98451912 100644
--- a/include/scsi/constants.h
+++ b/include/scsi/constants.h
@@ -231,6 +231,7 @@
 #define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY      0x05
 #define MODE_PAGE_CACHING                     0x08
 #define MODE_PAGE_AUDIO_CTL                   0x0e
+#define MODE_PAGE_CONTROL                     0x0a
 #define MODE_PAGE_POWER                       0x1a
 #define MODE_PAGE_FAULT_FAIL                  0x1c
 #define MODE_PAGE_TO_PROTECT                  0x1d
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index 35c131a107..6b9793842c 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -206,6 +206,11 @@
  *  - add extension header
  *  - add FUSE_EXT_GROUPS
  *  - add FUSE_CREATE_SUPP_GROUP
+ *  - add FUSE_HAS_EXPIRE_ONLY
+ *
+ *  7.39
+ *  - add FUSE_DIRECT_IO_RELAX
+ *  - add FUSE_STATX and related structures
  */
 
 #ifndef _LINUX_FUSE_H
@@ -237,7 +242,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 38
+#define FUSE_KERNEL_MINOR_VERSION 39
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -264,6 +269,40 @@ struct fuse_attr {
 	uint32_t	flags;
 };
 
+/*
+ * The following structures are bit-for-bit compatible with the statx(2) ABI in
+ * Linux.
+ */
+struct fuse_sx_time {
+	int64_t		tv_sec;
+	uint32_t	tv_nsec;
+	int32_t		__reserved;
+};
+
+struct fuse_statx {
+	uint32_t	mask;
+	uint32_t	blksize;
+	uint64_t	attributes;
+	uint32_t	nlink;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint16_t	mode;
+	uint16_t	__spare0[1];
+	uint64_t	ino;
+	uint64_t	size;
+	uint64_t	blocks;
+	uint64_t	attributes_mask;
+	struct fuse_sx_time	atime;
+	struct fuse_sx_time	btime;
+	struct fuse_sx_time	ctime;
+	struct fuse_sx_time	mtime;
+	uint32_t	rdev_major;
+	uint32_t	rdev_minor;
+	uint32_t	dev_major;
+	uint32_t	dev_minor;
+	uint64_t	__spare2[14];
+};
+
 struct fuse_kstatfs {
 	uint64_t	blocks;
 	uint64_t	bfree;
@@ -365,6 +404,9 @@ struct fuse_file_lock {
  * FUSE_HAS_INODE_DAX:  use per inode DAX
  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
  *			symlink and mknod (single group that matches parent)
+ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
+ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
+ *                       allow shared mmap
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -402,6 +444,8 @@ struct fuse_file_lock {
 #define FUSE_SECURITY_CTX	(1ULL << 32)
 #define FUSE_HAS_INODE_DAX	(1ULL << 33)
 #define FUSE_CREATE_SUPP_GROUP	(1ULL << 34)
+#define FUSE_HAS_EXPIRE_ONLY	(1ULL << 35)
+#define FUSE_DIRECT_IO_RELAX	(1ULL << 36)
 
 /**
  * CUSE INIT request/reply flags
@@ -568,6 +612,7 @@ enum fuse_opcode {
 	FUSE_REMOVEMAPPING	= 49,
 	FUSE_SYNCFS		= 50,
 	FUSE_TMPFILE		= 51,
+	FUSE_STATX		= 52,
 
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
@@ -632,6 +677,22 @@ struct fuse_attr_out {
 	struct fuse_attr attr;
 };
 
+struct fuse_statx_in {
+	uint32_t	getattr_flags;
+	uint32_t	reserved;
+	uint64_t	fh;
+	uint32_t	sx_flags;
+	uint32_t	sx_mask;
+};
+
+struct fuse_statx_out {
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	attr_valid_nsec;
+	uint32_t	flags;
+	uint64_t	spare[2];
+	struct fuse_statx stat;
+};
+
 #define FUSE_COMPAT_MKNOD_IN_SIZE 8
 
 struct fuse_mknod_in {
diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h
index 6691a3ce24..5ad07e134a 100644
--- a/include/standard-headers/linux/vhost_types.h
+++ b/include/standard-headers/linux/vhost_types.h
@@ -181,5 +181,9 @@ struct vhost_vdpa_iova_range {
 #define VHOST_BACKEND_F_SUSPEND  0x4
 /* Device can be resumed */
 #define VHOST_BACKEND_F_RESUME  0x5
+/* Device supports the driver enabling virtqueues both before and after
+ * DRIVER_OK
+ */
+#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK  0x6
 
 #endif
diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index 2325485f2c..0f88417742 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -56,6 +56,7 @@
 #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
+#define VIRTIO_NET_F_VQ_NOTF_COAL 52	/* Device supports virtqueue notification coalescing */
 #define VIRTIO_NET_F_NOTF_COAL	53	/* Device supports notifications coalescing */
 #define VIRTIO_NET_F_GUEST_USO4	54	/* Guest can handle USOv4 in. */
 #define VIRTIO_NET_F_GUEST_USO6	55	/* Guest can handle USOv6 in. */
@@ -391,5 +392,18 @@ struct virtio_net_ctrl_coal_rx {
 };
 
 #define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
+#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET		2
+#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET		3
+
+struct virtio_net_ctrl_coal {
+	uint32_t max_packets;
+	uint32_t max_usecs;
+};
+
+struct  virtio_net_ctrl_coal_vq {
+	uint16_t vqn;
+	uint16_t reserved;
+	struct virtio_net_ctrl_coal coal;
+};
 
 #endif /* _LINUX_VIRTIO_NET_H */
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
index 0733783bcc..f07f368358 100644
--- a/include/sysemu/accel-blocker.h
+++ b/include/sysemu/accel-blocker.h
@@ -16,7 +16,7 @@
 
 #include "sysemu/cpus.h"
 
-extern void accel_blocker_init(void);
+void accel_blocker_init(void);
 
 /*
  * accel_{cpu_}ioctl_begin/end:
@@ -26,10 +26,10 @@ extern void accel_blocker_init(void);
  * called, preventing new ioctls to run. They will continue only after
  * accel_ioctl_inibith_end().
  */
-extern void accel_ioctl_begin(void);
-extern void accel_ioctl_end(void);
-extern void accel_cpu_ioctl_begin(CPUState *cpu);
-extern void accel_cpu_ioctl_end(CPUState *cpu);
+void accel_ioctl_begin(void);
+void accel_ioctl_end(void);
+void accel_cpu_ioctl_begin(CPUState *cpu);
+void accel_cpu_ioctl_end(CPUState *cpu);
 
 /*
  * accel_ioctl_inhibit_begin: start critical section
@@ -42,7 +42,7 @@ extern void accel_cpu_ioctl_end(CPUState *cpu);
  * This allows the caller to access shared data or perform operations without
  * worrying of concurrent vcpus accesses.
  */
-extern void accel_ioctl_inhibit_begin(void);
+void accel_ioctl_inhibit_begin(void);
 
 /*
  * accel_ioctl_inhibit_end: end critical section started by
@@ -50,6 +50,6 @@ extern void accel_ioctl_inhibit_begin(void);
  *
  * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
  */
-extern void accel_ioctl_inhibit_end(void);
+void accel_ioctl_inhibit_end(void);
 
 #endif /* ACCEL_BLOCKER_H */
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
index 184e667ebd..d5f675493a 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -61,8 +61,8 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
 int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
 bool bdrv_has_blk(BlockDriverState *bs);
 bool bdrv_is_root_node(BlockDriverState *bs);
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp);
+int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm,
+                                uint64_t shared_perm, Error **errp);
 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
 
 void blk_iostatus_enable(BlockBackend *blk);
diff --git a/include/sysemu/cryptodev-vhost.h b/include/sysemu/cryptodev-vhost.h
index e8cab1356e..4c3c22acae 100644
--- a/include/sysemu/cryptodev-vhost.h
+++ b/include/sysemu/cryptodev-vhost.h
@@ -79,7 +79,7 @@ cryptodev_vhost_init(
  * cryptodev_vhost_cleanup:
  * @crypto: the cryptodev backend common vhost object
  *
- * Clean the resouce associated with @crypto that realizaed
+ * Clean the resource associated with @crypto that realizaed
  * by cryptodev_vhost_init()
  *
  */
diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h
index bc021ce847..96d3998b93 100644
--- a/include/sysemu/cryptodev.h
+++ b/include/sysemu/cryptodev.h
@@ -339,7 +339,7 @@ void cryptodev_backend_free_client(
  * @backend: the cryptodev backend object
  * @errp: pointer to a NULL-initialized error object
  *
- * Clean the resouce associated with @backend that realizaed
+ * Clean the resource associated with @backend that realizaed
  * by the specific backend's init() callback
  */
 void cryptodev_backend_cleanup(
@@ -407,7 +407,7 @@ int cryptodev_backend_crypto_operation(
 /**
  * cryptodev_backend_set_used:
  * @backend: the cryptodev backend object
- * @used: ture or false
+ * @used: true or false
  *
  * Set the cryptodev backend is used by virtio-crypto or not
  */
@@ -427,7 +427,7 @@ bool cryptodev_backend_is_used(CryptoDevBackend *backend);
 /**
  * cryptodev_backend_set_ready:
  * @backend: the cryptodev backend object
- * @ready: ture or false
+ * @ready: true or false
  *
  * Set the cryptodev backend is ready or not, which is called
  * by the children of the cryptodev banckend interface.
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
deleted file mode 100644
index 80fc716f80..0000000000
--- a/include/sysemu/hax.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * Copyright 2016 Google, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-/* header to be included in non-HAX-specific code */
-
-#ifndef QEMU_HAX_H
-#define QEMU_HAX_H
-
-int hax_sync_vcpus(void);
-
-#ifdef NEED_CPU_H
-# ifdef CONFIG_HAX
-#  define CONFIG_HAX_IS_POSSIBLE
-# endif
-#else /* !NEED_CPU_H */
-# define CONFIG_HAX_IS_POSSIBLE
-#endif
-
-#ifdef CONFIG_HAX_IS_POSSIBLE
-
-extern bool hax_allowed;
-
-#define hax_enabled()               (hax_allowed)
-
-#else /* !CONFIG_HAX_IS_POSSIBLE */
-
-#define hax_enabled()               (0)
-
-#endif /* CONFIG_HAX_IS_POSSIBLE */
-
-#endif /* QEMU_HAX_H */
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 22903a55f7..c71b77e71f 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -12,7 +12,6 @@
 #define QEMU_HW_ACCEL_H
 
 #include "hw/core/cpu.h"
-#include "sysemu/hax.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hvf.h"
 #include "sysemu/whpx.h"
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 8f8601d6ab..2102a90eca 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -59,7 +59,7 @@ void iothread_stop(IOThread *iothread);
 void iothread_destroy(IOThread *iothread);
 
 /*
- * Returns true if executing withing IOThread context,
+ * Returns true if executing within IOThread context,
  * false otherwise.
  */
 bool qemu_in_iothread(void);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index ebdca41052..ee9025f8e9 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -188,7 +188,6 @@ extern bool kvm_msi_use_devid;
 #endif  /* CONFIG_KVM_IS_POSSIBLE */
 
 struct kvm_run;
-struct kvm_lapic_state;
 struct kvm_irq_routing_entry;
 
 typedef struct KVMCapabilityInfo {
@@ -222,7 +221,6 @@ int kvm_has_vcpu_events(void);
 int kvm_has_robust_singlestep(void);
 int kvm_has_debugregs(void);
 int kvm_max_nested_state_length(void);
-int kvm_has_pit_state2(void);
 int kvm_has_many_ioeventfds(void);
 int kvm_has_gsi_routing(void);
 int kvm_has_intx_set_mask(void);
@@ -407,8 +405,6 @@ void kvm_irqchip_add_change_notifier(Notifier *n);
 void kvm_irqchip_remove_change_notifier(Notifier *n);
 void kvm_irqchip_change_notify(void);
 
-void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
-
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
 
@@ -464,11 +460,6 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension);
         kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap);                   \
     })
 
-uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
-                                      uint32_t index, int reg);
-uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
-
-
 void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
 
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
@@ -523,7 +514,6 @@ int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
                                       qemu_irq irq);
 void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi);
-void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
 
 bool kvm_kernel_irqchip_allowed(void);
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 511b42bde5..a5b9122cb8 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -116,6 +116,7 @@ struct KVMState
     uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
     uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
     bool kvm_dirty_ring_with_bitmap;
+    uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
     struct KVMDirtyRingReaper reaper;
     NotifyVmexitOption notify_vmexit;
     uint32_t notify_window;
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 1030d39904..6dfdcbb086 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -42,17 +42,17 @@
 extern "C" {
 #endif
 
-int os_parse_cmd_args(int index, const char *optarg);
 void os_set_line_buffering(void);
 void os_setup_early_signal_handling(void);
 void os_set_proc_name(const char *s);
 void os_setup_signal_handling(void);
-void os_daemonize(void);
-void os_setup_post(void);
-int os_mlock(void);
-
 int os_set_daemonize(bool d);
 bool is_daemonized(void);
+void os_daemonize(void);
+bool os_set_runas(const char *optarg);
+void os_set_chroot(const char *optarg);
+void os_setup_post(void);
+int os_mlock(void);
 
 /**
  * qemu_alloc_stack:
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 91aa0d7ec0..1047d260cb 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -66,8 +66,8 @@ extern "C" {
  * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a,
  * which gets linked automatically.
  */
-extern int __mingw_setjmp(jmp_buf);
-extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+int __mingw_setjmp(jmp_buf);
+void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
 #define setjmp(env) __mingw_setjmp(env)
 #define longjmp(env, val) __mingw_longjmp(env, val)
 #elif defined(_WIN64)
@@ -101,7 +101,6 @@ static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
 static inline void os_setup_post(void) {}
 static inline void os_set_proc_name(const char *dummy) {}
-static inline int os_parse_cmd_args(int index, const char *optarg) { return -1; }
 void os_set_line_buffering(void);
 void os_setup_early_signal_handling(void);
 
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 7beb29c2e2..08afb97695 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -16,9 +16,16 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
                                                      void *opaque);
 VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
         VMChangeStateHandler *cb, void *opaque, int priority);
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority);
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
                                                      VMChangeStateHandler *cb,
                                                      void *opaque);
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+    DeviceState *dev, VMChangeStateHandler *cb,
+    VMChangeStateHandler *prepare_cb, void *opaque);
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
 /**
  * vm_state_notify: Notify the state of the VM
diff --git a/include/sysemu/stats.h b/include/sysemu/stats.h
index fcf0983154..42c236c795 100644
--- a/include/sysemu/stats.h
+++ b/include/sysemu/stats.h
@@ -34,7 +34,7 @@ void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
                       StatsSchemaValueList *);
 
 /*
- * True if a string matches the filter passed to the stats_fn callabck,
+ * True if a string matches the filter passed to the stats_fn callback,
  * false otherwise.
  *
  * Note that an empty list means no filtering, i.e. all strings will
diff --git a/include/sysemu/tpm_backend.h b/include/sysemu/tpm_backend.h
index 8fd3269c11..7fabafefee 100644
--- a/include/sysemu/tpm_backend.h
+++ b/include/sysemu/tpm_backend.h
@@ -115,7 +115,7 @@ int tpm_backend_startup_tpm(TPMBackend *s, size_t buffersize);
 
 /**
  * tpm_backend_had_startup_error:
- * @s: the backend to query for a statup error
+ * @s: the backend to query for a startup error
  *
  * Check whether the backend had an error during startup. Returns
  * false if no error occurred and the backend can be used, true
diff --git a/include/tcg/helper-info.h b/include/tcg/helper-info.h
index 4b6c9b43e8..7c27d6164a 100644
--- a/include/tcg/helper-info.h
+++ b/include/tcg/helper-info.h
@@ -1,5 +1,5 @@
 /*
- * TCG Helper Infomation Structure
+ * TCG Helper Information Structure
  *
  * Copyright (c) 2023 Linaro Ltd
  *
diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
index e2683d487f..4db8a58c14 100644
--- a/include/tcg/tcg-op-gvec-common.h
+++ b/include/tcg/tcg-op-gvec-common.h
@@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
 void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
                       uint32_t aofs, uint32_t bofs,
                       uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c,
+                       uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c,
+                       uint32_t oprsz, uint32_t maxsz);
 
 /*
  * Perform vector bit select: d = (b & a) | (c & ~a).
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 61d7c81b44..c9c6d770d0 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -879,7 +879,7 @@ enum {
     /* Instruction operands are 64-bits (otherwise 32-bits).  */
     TCG_OPF_64BIT        = 0x10,
     /* Instruction is optional and not implemented by the host, or insn
-       is generic and should not be implemened by the host.  */
+       is generic and should not be implemented by the host.  */
     TCG_OPF_NOT_PRESENT  = 0x20,
     /* Instruction operands are vectors.  */
     TCG_OPF_VECTOR       = 0x40,
@@ -1123,7 +1123,7 @@ static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
 /* Expand the tuple (opc, type, vece) on the given arguments.  */
 void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
 
-/* Replicate a constant C accoring to the log2 of the element size.  */
+/* Replicate a constant C according to the log2 of the element size.  */
 uint64_t dup_const(unsigned vece, uint64_t c);
 
 #define dup_const(VECE, C)                                         \
diff --git a/include/ui/console.h b/include/ui/console.h
index 3e8b22d6c6..28882f15a5 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -6,11 +6,28 @@
 #include "qemu/notify.h"
 #include "qapi/qapi-types-ui.h"
 #include "ui/input.h"
+#include "ui/surface.h"
 
-#ifdef CONFIG_OPENGL
-# include <epoxy/gl.h>
-# include "ui/shader.h"
-#endif
+#define TYPE_QEMU_CONSOLE "qemu-console"
+OBJECT_DECLARE_TYPE(QemuConsole, QemuConsoleClass, QEMU_CONSOLE)
+
+#define TYPE_QEMU_GRAPHIC_CONSOLE "qemu-graphic-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuGraphicConsole, QEMU_GRAPHIC_CONSOLE)
+
+#define TYPE_QEMU_TEXT_CONSOLE "qemu-text-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuTextConsole, QEMU_TEXT_CONSOLE)
+
+#define TYPE_QEMU_FIXED_TEXT_CONSOLE "qemu-fixed-text-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuFixedTextConsole, QEMU_FIXED_TEXT_CONSOLE)
+
+#define QEMU_IS_GRAPHIC_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_GRAPHIC_CONSOLE)
+
+#define QEMU_IS_TEXT_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_TEXT_CONSOLE)
+
+#define QEMU_IS_FIXED_TEXT_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_FIXED_TEXT_CONSOLE)
 
 /* keyboard/mouse support */
 
@@ -91,10 +108,9 @@ bool qemu_mouse_set(int index, Error **errp);
 #define QEMU_KEY_CTRL_PAGEUP     0xe406
 #define QEMU_KEY_CTRL_PAGEDOWN   0xe407
 
-void kbd_put_keysym_console(QemuConsole *s, int keysym);
-bool kbd_put_qcode_console(QemuConsole *s, int qcode, bool ctrl);
-void kbd_put_string_console(QemuConsole *s, const char *str, int len);
-void kbd_put_keysym(int keysym);
+void qemu_text_console_put_keysym(QemuTextConsole *s, int keysym);
+bool qemu_text_console_put_qcode(QemuTextConsole *s, int qcode, bool ctrl);
+void qemu_text_console_put_string(QemuTextConsole *s, const char *str, int len);
 
 /* Touch devices */
 typedef struct touch_slot {
@@ -112,17 +128,10 @@ void console_handle_touch_event(QemuConsole *con,
                                 Error **errp);
 /* consoles */
 
-#define TYPE_QEMU_CONSOLE "qemu-console"
-OBJECT_DECLARE_TYPE(QemuConsole, QemuConsoleClass, QEMU_CONSOLE)
-
-
 struct QemuConsoleClass {
     ObjectClass parent_class;
 };
 
-#define QEMU_ALLOCATED_FLAG     0x01
-#define QEMU_PLACEHOLDER_FLAG   0x02
-
 typedef struct ScanoutTexture {
     uint32_t backing_id;
     bool backing_y_0_top;
@@ -135,21 +144,6 @@ typedef struct ScanoutTexture {
     void *d3d_tex2d;
 } ScanoutTexture;
 
-typedef struct DisplaySurface {
-    pixman_format_code_t format;
-    pixman_image_t *image;
-    uint8_t flags;
-#ifdef CONFIG_OPENGL
-    GLenum glformat;
-    GLenum gltype;
-    GLuint texture;
-#endif
-#ifdef WIN32
-    HANDLE handle;
-    uint32_t handle_offset;
-#endif
-} DisplaySurface;
-
 typedef struct QemuUIInfo {
     /* physical dimension */
     uint16_t width_mm;
@@ -329,37 +323,13 @@ struct DisplayGLCtx {
 };
 
 DisplayState *init_displaystate(void);
-DisplaySurface *qemu_create_displaysurface_from(int width, int height,
-                                                pixman_format_code_t format,
-                                                int linesize, uint8_t *data);
-DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image);
-DisplaySurface *qemu_create_placeholder_surface(int w, int h,
-                                                const char *msg);
-#ifdef WIN32
-void qemu_displaysurface_win32_set_handle(DisplaySurface *surface,
-                                          HANDLE h, uint32_t offset);
-#endif
-PixelFormat qemu_default_pixelformat(int bpp);
-
-DisplaySurface *qemu_create_displaysurface(int width, int height);
-void qemu_free_displaysurface(DisplaySurface *surface);
-
-static inline int is_buffer_shared(DisplaySurface *surface)
-{
-    return !(surface->flags & QEMU_ALLOCATED_FLAG);
-}
-
-static inline int is_placeholder(DisplaySurface *surface)
-{
-    return surface->flags & QEMU_PLACEHOLDER_FLAG;
-}
 
 void register_displaychangelistener(DisplayChangeListener *dcl);
 void update_displaychangelistener(DisplayChangeListener *dcl,
                                   uint64_t interval);
 void unregister_displaychangelistener(DisplayChangeListener *dcl);
 
-bool dpy_ui_info_supported(QemuConsole *con);
+bool dpy_ui_info_supported(const QemuConsole *con);
 const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con);
 int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info, bool delay);
 
@@ -400,43 +370,6 @@ int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx);
 
 bool console_has_gl(QemuConsole *con);
 
-static inline int surface_stride(DisplaySurface *s)
-{
-    return pixman_image_get_stride(s->image);
-}
-
-static inline void *surface_data(DisplaySurface *s)
-{
-    return pixman_image_get_data(s->image);
-}
-
-static inline int surface_width(DisplaySurface *s)
-{
-    return pixman_image_get_width(s->image);
-}
-
-static inline int surface_height(DisplaySurface *s)
-{
-    return pixman_image_get_height(s->image);
-}
-
-static inline int surface_bits_per_pixel(DisplaySurface *s)
-{
-    int bits = PIXMAN_FORMAT_BPP(s->format);
-    return bits;
-}
-
-static inline int surface_bytes_per_pixel(DisplaySurface *s)
-{
-    int bits = PIXMAN_FORMAT_BPP(s->format);
-    return DIV_ROUND_UP(bits, 8);
-}
-
-static inline pixman_format_code_t surface_format(DisplaySurface *s)
-{
-    return s->format;
-}
-
 typedef uint32_t console_ch_t;
 
 static inline void console_write_ch(console_ch_t *dest, uint32_t ch)
@@ -484,7 +417,6 @@ QemuConsole *qemu_console_lookup_by_index(unsigned int index);
 QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head);
 QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
                                                 uint32_t head, Error **errp);
-QemuConsole *qemu_console_lookup_unused(void);
 QEMUCursor *qemu_console_get_cursor(QemuConsole *con);
 bool qemu_console_is_visible(QemuConsole *con);
 bool qemu_console_is_graphic(QemuConsole *con);
@@ -504,6 +436,8 @@ void qemu_console_set_window_id(QemuConsole *con, int window_id);
 void console_select(unsigned int index);
 void qemu_console_resize(QemuConsole *con, int width, int height);
 DisplaySurface *qemu_console_surface(QemuConsole *con);
+void coroutine_fn qemu_console_co_wait_update(QemuConsole *con);
+int qemu_invalidate_text_consoles(void);
 
 /* console-gl.c */
 #ifdef CONFIG_OPENGL
diff --git a/include/ui/kbd-state.h b/include/ui/kbd-state.h
index eb9067dd53..fb79776128 100644
--- a/include/ui/kbd-state.h
+++ b/include/ui/kbd-state.h
@@ -65,7 +65,7 @@ void qkbd_state_key_event(QKbdState *kbd, QKeyCode qcode, bool down);
  * using qemu_input_event_send_key_delay().
  *
  * @kbd: state tracker state.
- * @delay_ms: the delay in miliseconds.
+ * @delay_ms: the delay in milliseconds.
  */
 void qkbd_state_set_delay(QKbdState *kbd, int delay_ms);
 
diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h
index 0c775604d1..51f8709327 100644
--- a/include/ui/qemu-pixman.h
+++ b/include/ui/qemu-pixman.h
@@ -47,6 +47,12 @@
 # define PIXMAN_LE_x8r8g8b8   PIXMAN_x8r8g8b8
 #endif
 
+#define QEMU_PIXMAN_COLOR(r, g, b)                                               \
+    { .red = r << 8, .green = g << 8, .blue = b << 8, .alpha = 0xffff }
+
+#define QEMU_PIXMAN_COLOR_BLACK QEMU_PIXMAN_COLOR(0x00, 0x00, 0x00)
+#define QEMU_PIXMAN_COLOR_GRAY QEMU_PIXMAN_COLOR(0xaa, 0xaa, 0xaa)
+
 /* -------------------------------------------------------------------- */
 
 typedef struct PixelFormat {
@@ -72,13 +78,10 @@ pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
                                            int width);
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
                               int width, int x, int y);
-void qemu_pixman_linebuf_copy(pixman_image_t *fb, int width, int x, int y,
-                              pixman_image_t *linebuf);
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
                                           pixman_image_t *image);
 void qemu_pixman_image_unref(pixman_image_t *image);
 
-pixman_color_t qemu_pixman_color(PixelFormat *pf, uint32_t color);
 pixman_image_t *qemu_pixman_glyph_from_vgafont(int height, const uint8_t *font,
                                                unsigned int ch);
 void qemu_pixman_glyph_render(pixman_image_t *glyph,
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 5aa13664d6..e1a9b36185 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -42,7 +42,7 @@
 #define NUM_MEMSLOTS_GROUPS 2
 
 /*
- * Internal enum to differenciate between options for
+ * Internal enum to differentiate between options for
  * io calls that have a sync (old) version and an _async (new)
  * version:
  *  QXL_SYNC: use the old version
diff --git a/include/ui/surface.h b/include/ui/surface.h
new file mode 100644
index 0000000000..4244e0ca4a
--- /dev/null
+++ b/include/ui/surface.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * QEMU UI Console
+ */
+#ifndef SURFACE_H
+#define SURFACE_H
+
+#include "ui/qemu-pixman.h"
+
+#ifdef CONFIG_OPENGL
+# include <epoxy/gl.h>
+# include "ui/shader.h"
+#endif
+
+#define QEMU_ALLOCATED_FLAG     0x01
+#define QEMU_PLACEHOLDER_FLAG   0x02
+
+typedef struct DisplaySurface {
+    pixman_image_t *image;
+    uint8_t flags;
+#ifdef CONFIG_OPENGL
+    GLenum glformat;
+    GLenum gltype;
+    GLuint texture;
+#endif
+#ifdef WIN32
+    HANDLE handle;
+    uint32_t handle_offset;
+#endif
+} DisplaySurface;
+
+PixelFormat qemu_default_pixelformat(int bpp);
+
+DisplaySurface *qemu_create_displaysurface_from(int width, int height,
+                                                pixman_format_code_t format,
+                                                int linesize, uint8_t *data);
+DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image);
+DisplaySurface *qemu_create_placeholder_surface(int w, int h,
+                                                const char *msg);
+#ifdef WIN32
+void qemu_displaysurface_win32_set_handle(DisplaySurface *surface,
+                                          HANDLE h, uint32_t offset);
+#endif
+
+DisplaySurface *qemu_create_displaysurface(int width, int height);
+void qemu_free_displaysurface(DisplaySurface *surface);
+
+static inline int is_buffer_shared(DisplaySurface *surface)
+{
+    return !(surface->flags & QEMU_ALLOCATED_FLAG);
+}
+
+static inline int is_placeholder(DisplaySurface *surface)
+{
+    return surface->flags & QEMU_PLACEHOLDER_FLAG;
+}
+
+static inline int surface_stride(DisplaySurface *s)
+{
+    return pixman_image_get_stride(s->image);
+}
+
+static inline void *surface_data(DisplaySurface *s)
+{
+    return pixman_image_get_data(s->image);
+}
+
+static inline int surface_width(DisplaySurface *s)
+{
+    return pixman_image_get_width(s->image);
+}
+
+static inline int surface_height(DisplaySurface *s)
+{
+    return pixman_image_get_height(s->image);
+}
+
+static inline pixman_format_code_t surface_format(DisplaySurface *s)
+{
+    return pixman_image_get_format(s->image);
+}
+
+static inline int surface_bits_per_pixel(DisplaySurface *s)
+{
+    int bits = PIXMAN_FORMAT_BPP(surface_format(s));
+    return bits;
+}
+
+static inline int surface_bytes_per_pixel(DisplaySurface *s)
+{
+    int bits = PIXMAN_FORMAT_BPP(surface_format(s));
+    return DIV_ROUND_UP(bits, 8);
+}
+
+#endif
diff --git a/include/user/safe-syscall.h b/include/user/safe-syscall.h
index ddceef12e2..27b71cdbd8 100644
--- a/include/user/safe-syscall.h
+++ b/include/user/safe-syscall.h
@@ -91,7 +91,7 @@
  *
  * The basic setup is that we make the host syscall via a known
  * section of host native assembly. If a signal occurs, our signal
- * handler checks the interrupted host PC against the addresse of that
+ * handler checks the interrupted host PC against the address of that
  * known section. If the PC is before or at the address of the syscall
  * instruction then we change the PC to point at a "return
  * -QEMU_ERESTARTSYS" code path instead, and then exit the signal handler
@@ -126,8 +126,8 @@
  */
 
 /* The core part of this function is implemented in assembly */
-extern long safe_syscall_base(int *pending, long number, ...);
-extern long safe_syscall_set_errno_tail(int value);
+long safe_syscall_base(int *pending, long number, ...);
+long safe_syscall_set_errno_tail(int value);
 
 /* These are defined by the safe-syscall.inc.S file */
 extern char safe_syscall_start[];
diff --git a/io/channel-command.c b/io/channel-command.c
index 7ed726c802..6d5f64e146 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "io/channel-command.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
@@ -331,14 +332,17 @@ static int qio_channel_command_close(QIOChannel *ioc,
 
 
 static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
-                                                   AioContext *ctx,
+                                                   AioContext *read_ctx,
                                                    IOHandler *io_read,
+                                                   AioContext *write_ctx,
                                                    IOHandler *io_write,
                                                    void *opaque)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
-    aio_set_fd_handler(ctx, cioc->readfd, io_read, NULL, NULL, NULL, opaque);
-    aio_set_fd_handler(ctx, cioc->writefd, NULL, io_write, NULL, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(cioc->readfd, read_ctx, io_read,
+                                        cioc->writefd, write_ctx, io_write,
+                                        opaque);
 }
 
 
diff --git a/io/channel-file.c b/io/channel-file.c
index 8b5821f452..4a12c61886 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "io/channel-file.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
@@ -192,13 +193,17 @@ static int qio_channel_file_close(QIOChannel *ioc,
 
 
 static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
-                                                AioContext *ctx,
+                                                AioContext *read_ctx,
                                                 IOHandler *io_read,
+                                                AioContext *write_ctx,
                                                 IOHandler *io_write,
                                                 void *opaque)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
-    aio_set_fd_handler(ctx, fioc->fd, io_read, io_write, NULL, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(fioc->fd, read_ctx, io_read,
+                                        fioc->fd, write_ctx, io_write,
+                                        opaque);
 }
 
 static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
diff --git a/io/channel-null.c b/io/channel-null.c
index 4fafdb770d..ef99586348 100644
--- a/io/channel-null.c
+++ b/io/channel-null.c
@@ -128,8 +128,9 @@ qio_channel_null_close(QIOChannel *ioc,
 
 static void
 qio_channel_null_set_aio_fd_handler(QIOChannel *ioc G_GNUC_UNUSED,
-                                    AioContext *ctx G_GNUC_UNUSED,
+                                    AioContext *read_ctx G_GNUC_UNUSED,
                                     IOHandler *io_read G_GNUC_UNUSED,
+                                    AioContext *write_ctx G_GNUC_UNUSED,
                                     IOHandler *io_write G_GNUC_UNUSED,
                                     void *opaque G_GNUC_UNUSED)
 {
diff --git a/io/channel-socket.c b/io/channel-socket.c
index d99945ebec..02ffb51e99 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -22,6 +22,7 @@
 #include "qapi/qapi-visit-sockets.h"
 #include "qemu/module.h"
 #include "io/channel-socket.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "trace.h"
 #include "qapi/clone-visitor.h"
@@ -893,13 +894,17 @@ qio_channel_socket_shutdown(QIOChannel *ioc,
 }
 
 static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
-                                                  AioContext *ctx,
+                                                  AioContext *read_ctx,
                                                   IOHandler *io_read,
+                                                  AioContext *write_ctx,
                                                   IOHandler *io_write,
                                                   void *opaque)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
-    aio_set_fd_handler(ctx, sioc->fd, io_read, io_write, NULL, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(sioc->fd, read_ctx, io_read,
+                                        sioc->fd, write_ctx, io_write,
+                                        opaque);
 }
 
 static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 847d5297c3..58fe1aceee 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -388,14 +388,16 @@ static int qio_channel_tls_close(QIOChannel *ioc,
 }
 
 static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
-                                               AioContext *ctx,
+                                               AioContext *read_ctx,
                                                IOHandler *io_read,
+                                               AioContext *write_ctx,
                                                IOHandler *io_write,
                                                void *opaque)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
 
-    qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
+    qio_channel_set_aio_fd_handler(tioc->master, read_ctx, io_read,
+            write_ctx, io_write, opaque);
 }
 
 typedef struct QIOChannelTLSSource QIOChannelTLSSource;
diff --git a/io/channel-util.c b/io/channel-util.c
index 848a7a43d6..4b340d46d7 100644
--- a/io/channel-util.c
+++ b/io/channel-util.c
@@ -36,3 +36,27 @@ QIOChannel *qio_channel_new_fd(int fd,
     }
     return ioc;
 }
+
+
+void qio_channel_util_set_aio_fd_handler(int read_fd,
+                                         AioContext *read_ctx,
+                                         IOHandler *io_read,
+                                         int write_fd,
+                                         AioContext *write_ctx,
+                                         IOHandler *io_write,
+                                         void *opaque)
+{
+    if (read_fd == write_fd && read_ctx == write_ctx) {
+        aio_set_fd_handler(read_ctx, read_fd, io_read, io_write,
+                NULL, NULL, opaque);
+    } else {
+        if (read_ctx) {
+            aio_set_fd_handler(read_ctx, read_fd, io_read, NULL,
+                    NULL, NULL, opaque);
+        }
+        if (write_ctx) {
+            aio_set_fd_handler(write_ctx, write_fd, NULL, io_write,
+                    NULL, NULL, opaque);
+        }
+    }
+}
diff --git a/io/channel.c b/io/channel.c
index 72f0066af5..86c5834510 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -365,6 +365,12 @@ int qio_channel_set_blocking(QIOChannel *ioc,
 }
 
 
+void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled)
+{
+    ioc->follow_coroutine_ctx = enabled;
+}
+
+
 int qio_channel_close(QIOChannel *ioc,
                       Error **errp)
 {
@@ -388,14 +394,16 @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
 
 
 void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
-                                    AioContext *ctx,
+                                    AioContext *read_ctx,
                                     IOHandler *io_read,
+                                    AioContext *write_ctx,
                                     IOHandler *io_write,
                                     void *opaque)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
 
-    klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque);
+    klass->io_set_aio_fd_handler(ioc, read_ctx, io_read, write_ctx, io_write,
+            opaque);
 }
 
 guint qio_channel_add_watch_full(QIOChannel *ioc,
@@ -542,56 +550,101 @@ static void qio_channel_restart_write(void *opaque)
     aio_co_wake(co);
 }
 
-static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
+static void coroutine_fn
+qio_channel_set_fd_handlers(QIOChannel *ioc, GIOCondition condition)
 {
-    IOHandler *rd_handler = NULL, *wr_handler = NULL;
+    AioContext *ctx = ioc->follow_coroutine_ctx ?
+        qemu_coroutine_get_aio_context(qemu_coroutine_self()) :
+        iohandler_get_aio_context();
+    AioContext *read_ctx = NULL;
+    IOHandler *io_read = NULL;
+    AioContext *write_ctx = NULL;
+    IOHandler *io_write = NULL;
+
+    if (condition == G_IO_IN) {
+        ioc->read_coroutine = qemu_coroutine_self();
+        ioc->read_ctx = ctx;
+        read_ctx = ctx;
+        io_read = qio_channel_restart_read;
+
+        /*
+         * Thread safety: if the other coroutine is set and its AioContext
+         * matches ours, then there is mutual exclusion between read and write
+         * because they share a single thread and it's safe to set both read
+         * and write fd handlers here. If the AioContext does not match ours,
+         * then both threads may run in parallel but there is no shared state
+         * to worry about.
+         */
+        if (ioc->write_coroutine && ioc->write_ctx == ctx) {
+            write_ctx = ctx;
+            io_write = qio_channel_restart_write;
+        }
+    } else if (condition == G_IO_OUT) {
+        ioc->write_coroutine = qemu_coroutine_self();
+        ioc->write_ctx = ctx;
+        write_ctx = ctx;
+        io_write = qio_channel_restart_write;
+        if (ioc->read_coroutine && ioc->read_ctx == ctx) {
+            read_ctx = ctx;
+            io_read = qio_channel_restart_read;
+        }
+    } else {
+        abort();
+    }
+
+    qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read,
+            write_ctx, io_write, ioc);
+}
+
+static void coroutine_fn
+qio_channel_clear_fd_handlers(QIOChannel *ioc, GIOCondition condition)
+{
+    AioContext *read_ctx = NULL;
+    IOHandler *io_read = NULL;
+    AioContext *write_ctx = NULL;
+    IOHandler *io_write = NULL;
     AioContext *ctx;
 
-    if (ioc->read_coroutine) {
-        rd_handler = qio_channel_restart_read;
-    }
-    if (ioc->write_coroutine) {
-        wr_handler = qio_channel_restart_write;
+    if (condition == G_IO_IN) {
+        ctx = ioc->read_ctx;
+        read_ctx = ctx;
+        io_read = NULL;
+        if (ioc->write_coroutine && ioc->write_ctx == ctx) {
+            write_ctx = ctx;
+            io_write = qio_channel_restart_write;
+        }
+    } else if (condition == G_IO_OUT) {
+        ctx = ioc->write_ctx;
+        write_ctx = ctx;
+        io_write = NULL;
+        if (ioc->read_coroutine && ioc->read_ctx == ctx) {
+            read_ctx = ctx;
+            io_read = qio_channel_restart_read;
+        }
+    } else {
+        abort();
     }
 
-    ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
-    qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
-}
-
-void qio_channel_attach_aio_context(QIOChannel *ioc,
-                                    AioContext *ctx)
-{
-    assert(!ioc->read_coroutine);
-    assert(!ioc->write_coroutine);
-    ioc->ctx = ctx;
-}
-
-void qio_channel_detach_aio_context(QIOChannel *ioc)
-{
-    ioc->read_coroutine = NULL;
-    ioc->write_coroutine = NULL;
-    qio_channel_set_aio_fd_handlers(ioc);
-    ioc->ctx = NULL;
+    qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read,
+            write_ctx, io_write, ioc);
 }
 
 void coroutine_fn qio_channel_yield(QIOChannel *ioc,
                                     GIOCondition condition)
 {
-    AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context();
+    AioContext *ioc_ctx;
 
     assert(qemu_in_coroutine());
-    assert(in_aio_context_home_thread(ioc_ctx));
+    ioc_ctx = qemu_coroutine_get_aio_context(qemu_coroutine_self());
 
     if (condition == G_IO_IN) {
         assert(!ioc->read_coroutine);
-        ioc->read_coroutine = qemu_coroutine_self();
     } else if (condition == G_IO_OUT) {
         assert(!ioc->write_coroutine);
-        ioc->write_coroutine = qemu_coroutine_self();
     } else {
         abort();
     }
-    qio_channel_set_aio_fd_handlers(ioc);
+    qio_channel_set_fd_handlers(ioc, condition);
     qemu_coroutine_yield();
     assert(in_aio_context_home_thread(ioc_ctx));
 
@@ -599,11 +652,10 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc,
      * through the aio_fd_handlers. */
     if (condition == G_IO_IN) {
         assert(ioc->read_coroutine == NULL);
-        qio_channel_set_aio_fd_handlers(ioc);
     } else if (condition == G_IO_OUT) {
         assert(ioc->write_coroutine == NULL);
-        qio_channel_set_aio_fd_handlers(ioc);
     }
+    qio_channel_clear_fd_handlers(ioc, condition);
 }
 
 void qio_channel_wake_read(QIOChannel *ioc)
@@ -653,6 +705,10 @@ static void qio_channel_finalize(Object *obj)
 {
     QIOChannel *ioc = QIO_CHANNEL(obj);
 
+    /* Must not have coroutines in qio_channel_yield() */
+    assert(!ioc->read_coroutine);
+    assert(!ioc->write_coroutine);
+
     g_free(ioc->name);
 
 #ifdef _WIN32
diff --git a/iothread.c b/iothread.c
index b41c305bd9..b753286414 100644
--- a/iothread.c
+++ b/iothread.c
@@ -138,12 +138,14 @@ static void iothread_instance_finalize(Object *obj)
     qemu_sem_destroy(&iothread->init_done_sem);
 }
 
-static void iothread_init_gcontext(IOThread *iothread)
+static void iothread_init_gcontext(IOThread *iothread, const char *thread_name)
 {
     GSource *source;
+    g_autofree char *name = g_strdup_printf("%s aio-context", thread_name);
 
     iothread->worker_context = g_main_context_new();
     source = aio_get_g_source(iothread_get_aio_context(iothread));
+    g_source_set_name(source, name);
     g_source_attach(source, iothread->worker_context);
     g_source_unref(source);
     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
@@ -180,7 +182,7 @@ static void iothread_init(EventLoopBase *base, Error **errp)
 {
     Error *local_error = NULL;
     IOThread *iothread = IOTHREAD(base);
-    char *thread_name;
+    g_autofree char *thread_name = NULL;
 
     iothread->stopping = false;
     iothread->running = true;
@@ -189,11 +191,14 @@ static void iothread_init(EventLoopBase *base, Error **errp)
         return;
     }
 
+    thread_name = g_strdup_printf("IO %s",
+                        object_get_canonical_path_component(OBJECT(base)));
+
     /*
      * Init one GMainContext for the iothread unconditionally, even if
      * it's not used
      */
-    iothread_init_gcontext(iothread);
+    iothread_init_gcontext(iothread, thread_name);
 
     iothread_set_aio_context_params(base, &local_error);
     if (local_error) {
@@ -206,11 +211,8 @@ static void iothread_init(EventLoopBase *base, Error **errp)
     /* This assumes we are called from a thread with useful CPU affinity for us
      * to inherit.
      */
-    thread_name = g_strdup_printf("IO %s",
-                        object_get_canonical_path_component(OBJECT(base)));
     qemu_thread_create(&iothread->thread, thread_name, iothread_run,
                        iothread, QEMU_THREAD_JOINABLE);
-    g_free(thread_name);
 
     /* Wait for initialization to complete */
     while (iothread->thread_id == -1) {
diff --git a/linux-headers/asm-arm64/bitsperlong.h b/linux-headers/asm-arm64/bitsperlong.h
index 6dc0bb0c13..485d60bee2 100644
--- a/linux-headers/asm-arm64/bitsperlong.h
+++ b/linux-headers/asm-arm64/bitsperlong.h
@@ -1 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
 #include <asm-generic/bitsperlong.h>
+
+#endif	/* __ASM_BITSPERLONG_H */
diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
index fd6c1cb585..abe087c53b 100644
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
 
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+
 #undef __NR_syscalls
-#define __NR_syscalls 452
+#define __NR_syscalls 453
 
 /*
  * 32 bit systems traditionally used different
diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
index 8233f061c4..46d8500654 100644
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -380,5 +380,6 @@
 #define __NR_futex_waitv (__NR_Linux + 449)
 #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
 #define __NR_cachestat (__NR_Linux + 451)
+#define __NR_fchmodat2 (__NR_Linux + 452)
 
 #endif /* _ASM_UNISTD_N32_H */
diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
index a174edc768..c2f7ac673b 100644
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -356,5 +356,6 @@
 #define __NR_futex_waitv (__NR_Linux + 449)
 #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
 #define __NR_cachestat (__NR_Linux + 451)
+#define __NR_fchmodat2 (__NR_Linux + 452)
 
 #endif /* _ASM_UNISTD_N64_H */
diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
index c1a5351d9b..757c68f2ad 100644
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -426,5 +426,6 @@
 #define __NR_futex_waitv (__NR_Linux + 449)
 #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
 #define __NR_cachestat (__NR_Linux + 451)
+#define __NR_fchmodat2 (__NR_Linux + 452)
 
 #endif /* _ASM_UNISTD_O32_H */
diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
index 8206758691..8ef94bbac1 100644
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -433,6 +433,7 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
 
 
 #endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
index 7be98c15f0..0e7ee43e88 100644
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -405,6 +405,7 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
 
 
 #endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h
index 6dc0bb0c13..cc5c45a9ce 100644
--- a/linux-headers/asm-riscv/bitsperlong.h
+++ b/linux-headers/asm-riscv/bitsperlong.h
@@ -1 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _ASM_RISCV_BITSPERLONG_H
+#define _ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
 #include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_RISCV_BITSPERLONG_H */
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 930fdc4101..992c5e4071 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -55,6 +55,7 @@ struct kvm_riscv_config {
 	unsigned long marchid;
 	unsigned long mimpid;
 	unsigned long zicboz_block_size;
+	unsigned long satp_mode;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_SSAIA,
 	KVM_RISCV_ISA_EXT_V,
 	KVM_RISCV_ISA_EXT_SVNAPOT,
+	KVM_RISCV_ISA_EXT_ZBA,
+	KVM_RISCV_ISA_EXT_ZBS,
+	KVM_RISCV_ISA_EXT_ZICNTR,
+	KVM_RISCV_ISA_EXT_ZICSR,
+	KVM_RISCV_ISA_EXT_ZIFENCEI,
+	KVM_RISCV_ISA_EXT_ZIHPM,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 
@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
 
 /* ISA Extension registers are mapped as type 7 */
 #define KVM_REG_RISCV_ISA_EXT		(0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_SINGLE	(0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_EN	(0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_DIS	(0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id)	\
+		((__ext_id) / __BITS_PER_LONG)
+#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id)	\
+		(1UL << ((__ext_id) % __BITS_PER_LONG))
+#define KVM_REG_RISCV_ISA_MULTI_REG_LAST	\
+		KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
 
 /* SBI extension registers are mapped as type 8 */
 #define KVM_REG_RISCV_SBI_EXT		(0x08 << KVM_REG_RISCV_TYPE_SHIFT)
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
index e2afd95420..023a2763a9 100644
--- a/linux-headers/asm-s390/kvm.h
+++ b/linux-headers/asm-s390/kvm.h
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
 	__u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
+struct kvm_s390_vm_cpu_uv_feat {
+	union {
+		struct {
+			__u64 : 4;
+			__u64 ap : 1;		/* bit 4 */
+			__u64 ap_intr : 1;	/* bit 5 */
+			__u64 : 58;
+		};
+		__u64 feat;
+	};
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
index ef772cc5f8..716fa368ca 100644
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -424,5 +424,6 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
 
 #endif /* _ASM_S390_UNISTD_32_H */
diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
index 32354a0459..b2a11b1d13 100644
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -372,5 +372,6 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
 
 #endif /* _ASM_S390_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h
index 775dbd3aff..46cdc941f9 100644
--- a/linux-headers/asm-x86/mman.h
+++ b/linux-headers/asm-x86/mman.h
@@ -3,14 +3,10 @@
 #define _ASM_X86_MMAN_H
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
+#define MAP_ABOVE4G	0x80		/* only map above 4GB */
 
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-#define arch_calc_vm_prot_bits(prot, key) (		\
-		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
-		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
-		((key) & 0x4 ? VM_PKEY_BIT2 : 0) |      \
-		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
-#endif
+/* Flags for map_shadow_stack(2) */
+#define SHADOW_STACK_SET_TOKEN	(1ULL << 0)	/* Set up a restore token in the shadow stack */
 
 #include <asm-generic/mman.h>
 
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index 37b32d8139..d749ad1c24 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -442,6 +442,7 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
 
 
 #endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index 5b55d6729a..cea67282eb 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -364,6 +364,8 @@
 #define __NR_futex_waitv 449
 #define __NR_set_mempolicy_home_node 450
 #define __NR_cachestat 451
+#define __NR_fchmodat2 452
+#define __NR_map_shadow_stack 453
 
 
 #endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index e8a007543d..5b2e79bf4c 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -317,6 +317,7 @@
 #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449)
 #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450)
 #define __NR_cachestat (__X32_SYSCALL_BIT + 451)
+#define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 1f3f3333a4..0d74ee999a 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1414,9 +1414,16 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define  KVM_DEV_VFIO_GROUP			1
-#define   KVM_DEV_VFIO_GROUP_ADD			1
-#define   KVM_DEV_VFIO_GROUP_DEL			2
+#define  KVM_DEV_VFIO_FILE			1
+
+#define   KVM_DEV_VFIO_FILE_ADD			1
+#define   KVM_DEV_VFIO_FILE_DEL			2
+
+/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
+#define  KVM_DEV_VFIO_GROUP	KVM_DEV_VFIO_FILE
+
+#define   KVM_DEV_VFIO_GROUP_ADD	KVM_DEV_VFIO_FILE_ADD
+#define   KVM_DEV_VFIO_GROUP_DEL	KVM_DEV_VFIO_FILE_DEL
 #define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE		3
 
 enum kvm_device_type {
diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h
index bb6ea517ef..9bb07083ac 100644
--- a/linux-headers/linux/stddef.h
+++ b/linux-headers/linux/stddef.h
@@ -45,3 +45,7 @@
 		TYPE NAME[]; \
 	}
 #endif
+
+#ifndef __counted_by
+#define __counted_by(m)
+#endif
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 14e402263a..59978fbaae 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -39,7 +39,8 @@
 			   UFFD_FEATURE_MINOR_SHMEM |		\
 			   UFFD_FEATURE_EXACT_ADDRESS |		\
 			   UFFD_FEATURE_WP_HUGETLBFS_SHMEM |	\
-			   UFFD_FEATURE_WP_UNPOPULATED)
+			   UFFD_FEATURE_WP_UNPOPULATED |	\
+			   UFFD_FEATURE_POISON)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -49,12 +50,14 @@
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE |		\
 	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
-	 (__u64)1 << _UFFDIO_CONTINUE)
+	 (__u64)1 << _UFFDIO_CONTINUE |		\
+	 (__u64)1 << _UFFDIO_POISON)
 #define UFFD_API_RANGE_IOCTLS_BASIC		\
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
+	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
 	 (__u64)1 << _UFFDIO_CONTINUE |		\
-	 (__u64)1 << _UFFDIO_WRITEPROTECT)
+	 (__u64)1 << _UFFDIO_POISON)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -71,6 +74,7 @@
 #define _UFFDIO_ZEROPAGE		(0x04)
 #define _UFFDIO_WRITEPROTECT		(0x06)
 #define _UFFDIO_CONTINUE		(0x07)
+#define _UFFDIO_POISON			(0x08)
 #define _UFFDIO_API			(0x3F)
 
 /* userfaultfd ioctl ids */
@@ -91,6 +95,8 @@
 				      struct uffdio_writeprotect)
 #define UFFDIO_CONTINUE		_IOWR(UFFDIO, _UFFDIO_CONTINUE,	\
 				      struct uffdio_continue)
+#define UFFDIO_POISON		_IOWR(UFFDIO, _UFFDIO_POISON, \
+				      struct uffdio_poison)
 
 /* read() structure */
 struct uffd_msg {
@@ -225,6 +231,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_EXACT_ADDRESS		(1<<11)
 #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM		(1<<12)
 #define UFFD_FEATURE_WP_UNPOPULATED		(1<<13)
+#define UFFD_FEATURE_POISON			(1<<14)
 	__u64 features;
 
 	__u64 ioctls;
@@ -321,6 +328,18 @@ struct uffdio_continue {
 	__s64 mapped;
 };
 
+struct uffdio_poison {
+	struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE		((__u64)1<<0)
+	__u64 mode;
+
+	/*
+	 * Fields below here are written by the ioctl and must be at the end:
+	 * the copy_from_user will not read past here.
+	 */
+	__s64 updated;
+};
+
 /*
  * Flags for the userfaultfd(2) system call itself.
  */
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 16db89071e..acf72b4999 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -217,6 +217,7 @@ struct vfio_device_info {
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32   pad;
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
@@ -677,11 +678,60 @@ enum {
  * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
  *					      struct vfio_pci_hot_reset_info)
  *
+ * This command is used to query the affected devices in the hot reset for
+ * a given device.
+ *
+ * This command always reports the segment, bus, and devfn information for
+ * each affected device, and selectively reports the group_id or devid per
+ * the way how the calling device is opened.
+ *
+ *	- If the calling device is opened via the traditional group/container
+ *	  API, group_id is reported.  User should check if it has owned all
+ *	  the affected devices and provides a set of group fds to prove the
+ *	  ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl.
+ *
+ *	- If the calling device is opened as a cdev, devid is reported.
+ *	  Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this
+ *	  data type.  All the affected devices should be represented in
+ *	  the dev_set, ex. bound to a vfio driver, and also be owned by
+ *	  this interface which is determined by the following conditions:
+ *	  1) Has a valid devid within the iommufd_ctx of the calling device.
+ *	     Ownership cannot be determined across separate iommufd_ctx and
+ *	     the cdev calling conventions do not support a proof-of-ownership
+ *	     model as provided in the legacy group interface.  In this case
+ *	     valid devid with value greater than zero is provided in the return
+ *	     structure.
+ *	  2) Does not have a valid devid within the iommufd_ctx of the calling
+ *	     device, but belongs to the same IOMMU group as the calling device
+ *	     or another opened device that has a valid devid within the
+ *	     iommufd_ctx of the calling device.  This provides implicit ownership
+ *	     for devices within the same DMA isolation context.  In this case
+ *	     the devid value of VFIO_PCI_DEVID_OWNED is provided in the return
+ *	     structure.
+ *
+ *	  A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return
+ *	  structure for affected devices where device is NOT represented in the
+ *	  dev_set or ownership is not available.  Such devices prevent the use
+ *	  of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership
+ *	  calling conventions (ie. via legacy group accessed devices).  Flag
+ *	  VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the
+ *	  affected devices are represented in the dev_set and also owned by
+ *	  the user.  This flag is available only when
+ *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
+ *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
+ *	  length fd array on the calling device as the ownership is validated
+ *	  by iommufd_ctx.
+ *
  * Return: 0 on success, -errno on failure:
  *	-enospc = insufficient buffer, -enodev = unsupported for device.
  */
 struct vfio_pci_dependent_device {
-	__u32	group_id;
+	union {
+		__u32   group_id;
+		__u32	devid;
+#define VFIO_PCI_DEVID_OWNED		0
+#define VFIO_PCI_DEVID_NOT_OWNED	-1
+	};
 	__u16	segment;
 	__u8	bus;
 	__u8	devfn; /* Use PCI_SLOT/PCI_FUNC */
@@ -690,6 +740,8 @@ struct vfio_pci_dependent_device {
 struct vfio_pci_hot_reset_info {
 	__u32	argsz;
 	__u32	flags;
+#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID		(1 << 0)
+#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED	(1 << 1)
 	__u32	count;
 	struct vfio_pci_dependent_device	devices[];
 };
@@ -700,6 +752,24 @@ struct vfio_pci_hot_reset_info {
  * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
  *				    struct vfio_pci_hot_reset)
  *
+ * A PCI hot reset results in either a bus or slot reset which may affect
+ * other devices sharing the bus/slot.  The calling user must have
+ * ownership of the full set of affected devices as determined by the
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl.
+ *
+ * When called on a device file descriptor acquired through the vfio
+ * group interface, the user is required to provide proof of ownership
+ * of those affected devices via the group_fds array in struct
+ * vfio_pci_hot_reset.
+ *
+ * When called on a direct cdev opened vfio device, the flags field of
+ * struct vfio_pci_hot_reset_info reports the ownership status of the
+ * affected devices and this ioctl must be called with an empty group_fds
+ * array.  See above INFO ioctl definition for ownership requirements.
+ *
+ * Mixed usage of legacy groups and cdevs across the set of affected
+ * devices is not supported.
+ *
  * Return: 0 on success, -errno on failure.
  */
 struct vfio_pci_hot_reset {
@@ -828,6 +898,83 @@ struct vfio_device_feature {
 
 #define VFIO_DEVICE_FEATURE		_IO(VFIO_TYPE, VFIO_BASE + 17)
 
+/*
+ * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18,
+ *				   struct vfio_device_bind_iommufd)
+ * @argsz:	 User filled size of this data.
+ * @flags:	 Must be 0.
+ * @iommufd:	 iommufd to bind.
+ * @out_devid:	 The device id generated by this bind. devid is a handle for
+ *		 this device/iommufd bond and can be used in IOMMUFD commands.
+ *
+ * Bind a vfio_device to the specified iommufd.
+ *
+ * User is restricted from accessing the device before the binding operation
+ * is completed.  Only allowed on cdev fds.
+ *
+ * Unbind is automatically conducted when device fd is closed.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_bind_iommufd {
+	__u32		argsz;
+	__u32		flags;
+	__s32		iommufd;
+	__u32		out_devid;
+};
+
+#define VFIO_DEVICE_BIND_IOMMUFD	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/*
+ * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
+ *					struct vfio_device_attach_iommufd_pt)
+ * @argsz:	User filled size of this data.
+ * @flags:	Must be 0.
+ * @pt_id:	Input the target id which can represent an ioas or a hwpt
+ *		allocated via iommufd subsystem.
+ *		Output the input ioas id or the attached hwpt id which could
+ *		be the specified hwpt itself or a hwpt automatically created
+ *		for the specified ioas by kernel during the attachment.
+ *
+ * Associate the device with an address space within the bound iommufd.
+ * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.  This is only
+ * allowed on cdev fds.
+ *
+ * If a vfio device is currently attached to a valid hw_pagetable, without doing
+ * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl
+ * passing in another hw_pagetable (hwpt) id is allowed. This action, also known
+ * as a hw_pagetable replacement, will replace the device's currently attached
+ * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_attach_iommufd_pt {
+	__u32	argsz;
+	__u32	flags;
+	__u32	pt_id;
+};
+
+#define VFIO_DEVICE_ATTACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/*
+ * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
+ *					struct vfio_device_detach_iommufd_pt)
+ * @argsz:	User filled size of this data.
+ * @flags:	Must be 0.
+ *
+ * Remove the association of the device and its current associated address
+ * space.  After it, the device should be in a blocking DMA state.  This is only
+ * allowed on cdev fds.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_detach_iommufd_pt {
+	__u32	argsz;
+	__u32	flags;
+};
+
+#define VFIO_DEVICE_DETACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 20)
+
 /*
  * Provide support for setting a PCI VF Token, which is used as a shared
  * secret between PF and VF drivers.  This feature may only be set on a
@@ -1304,6 +1451,7 @@ struct vfio_iommu_type1_info {
 #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
 	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32   pad;
 };
 
 /*
diff --git a/linux-user/aarch64/target_proc.h b/linux-user/aarch64/target_proc.h
new file mode 100644
index 0000000000..907df4dcd2
--- /dev/null
+++ b/linux-user/aarch64/target_proc.h
@@ -0,0 +1 @@
+#include "../arm/target_proc.h"
diff --git a/linux-user/alpha/target_proc.h b/linux-user/alpha/target_proc.h
new file mode 100644
index 0000000000..dac37dffc9
--- /dev/null
+++ b/linux-user/alpha/target_proc.h
@@ -0,0 +1,67 @@
+/*
+ * Alpha specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ALPHA_TARGET_PROC_H
+#define ALPHA_TARGET_PROC_H
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    int max_cpus = sysconf(_SC_NPROCESSORS_CONF);
+    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    unsigned long cpu_mask;
+    char model[32];
+    const char *p, *q;
+    int t;
+
+    p = object_class_get_name(OBJECT_CLASS(CPU_GET_CLASS(env_cpu(cpu_env))));
+    q = strchr(p, '-');
+    t = q - p;
+    assert(t < sizeof(model));
+    memcpy(model, p, t);
+    model[t] = 0;
+
+    t = sched_getaffinity(getpid(), sizeof(cpu_mask), (cpu_set_t *)&cpu_mask);
+    if (t < 0) {
+        if (num_cpus >= sizeof(cpu_mask) * 8) {
+            cpu_mask = -1;
+        } else {
+            cpu_mask = (1UL << num_cpus) - 1;
+        }
+    }
+
+    dprintf(fd,
+            "cpu\t\t\t: Alpha\n"
+            "cpu model\t\t: %s\n"
+            "cpu variation\t\t: 0\n"
+            "cpu revision\t\t: 0\n"
+            "cpu serial number\t: JA00000000\n"
+            "system type\t\t: QEMU\n"
+            "system variation\t: QEMU_v" QEMU_VERSION "\n"
+            "system revision\t\t: 0\n"
+            "system serial number\t: AY00000000\n"
+            "cycle frequency [Hz]\t: 250000000\n"
+            "timer frequency [Hz]\t: 250.00\n"
+            "page size [bytes]\t: %d\n"
+            "phys. address bits\t: %d\n"
+            "max. addr. space #\t: 255\n"
+            "BogoMIPS\t\t: 2500.00\n"
+            "kernel unaligned acc\t: 0 (pc=0,va=0)\n"
+            "user unaligned acc\t: 0 (pc=0,va=0)\n"
+            "platform string\t\t: AlphaServer QEMU user-mode VM\n"
+            "cpus detected\t\t: %d\n"
+            "cpus active\t\t: %d\n"
+            "cpu active mask\t\t: %016lx\n"
+            "L1 Icache\t\t: n/a\n"
+            "L1 Dcache\t\t: n/a\n"
+            "L2 cache\t\t: n/a\n"
+            "L3 cache\t\t: n/a\n",
+            model, TARGET_PAGE_SIZE, TARGET_PHYS_ADDR_SPACE_BITS,
+            max_cpus, num_cpus, cpu_mask);
+
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* ALPHA_TARGET_PROC_H */
diff --git a/linux-user/arm/target_proc.h b/linux-user/arm/target_proc.h
new file mode 100644
index 0000000000..ac75af9ca6
--- /dev/null
+++ b/linux-user/arm/target_proc.h
@@ -0,0 +1,101 @@
+/*
+ * Arm specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ARM_TARGET_PROC_H
+#define ARM_TARGET_PROC_H
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    ARMCPU *cpu = env_archcpu(cpu_env);
+    int arch, midr_rev, midr_part, midr_var, midr_impl;
+    target_ulong elf_hwcap = get_elf_hwcap();
+    target_ulong elf_hwcap2 = get_elf_hwcap2();
+    const char *elf_name;
+    int num_cpus, len_part, len_var;
+
+#if TARGET_BIG_ENDIAN
+# define END_SUFFIX "b"
+#else
+# define END_SUFFIX "l"
+#endif
+
+    arch = 8;
+    elf_name = "v8" END_SUFFIX;
+    midr_rev = FIELD_EX32(cpu->midr, MIDR_EL1, REVISION);
+    midr_part = FIELD_EX32(cpu->midr, MIDR_EL1, PARTNUM);
+    midr_var = FIELD_EX32(cpu->midr, MIDR_EL1, VARIANT);
+    midr_impl = FIELD_EX32(cpu->midr, MIDR_EL1, IMPLEMENTER);
+    len_part = 3;
+    len_var = 1;
+
+#ifndef TARGET_AARCH64
+    /* For simplicity, treat ARMv8 as an arm64 kernel with CONFIG_COMPAT. */
+    if (!arm_feature(&cpu->env, ARM_FEATURE_V8)) {
+        if (arm_feature(&cpu->env, ARM_FEATURE_V7)) {
+            arch = 7;
+            midr_var = (cpu->midr >> 16) & 0x7f;
+            len_var = 2;
+            if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
+                elf_name = "armv7m" END_SUFFIX;
+            } else {
+                elf_name = "armv7" END_SUFFIX;
+            }
+        } else {
+            midr_part = cpu->midr >> 4;
+            len_part = 7;
+            if (arm_feature(&cpu->env, ARM_FEATURE_V6)) {
+                arch = 6;
+                elf_name = "armv6" END_SUFFIX;
+            } else if (arm_feature(&cpu->env, ARM_FEATURE_V5)) {
+                arch = 5;
+                elf_name = "armv5t" END_SUFFIX;
+            } else {
+                arch = 4;
+                elf_name = "armv4" END_SUFFIX;
+            }
+        }
+    }
+#endif
+
+#undef END_SUFFIX
+
+    num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    for (int i = 0; i < num_cpus; i++) {
+        dprintf(fd,
+                "processor\t: %d\n"
+                "model name\t: ARMv%d Processor rev %d (%s)\n"
+                "BogoMIPS\t: 100.00\n"
+                "Features\t:",
+                i, arch, midr_rev, elf_name);
+
+        for (target_ulong j = elf_hwcap; j ; j &= j - 1) {
+            dprintf(fd, " %s", elf_hwcap_str(ctz64(j)));
+        }
+        for (target_ulong j = elf_hwcap2; j ; j &= j - 1) {
+            dprintf(fd, " %s", elf_hwcap2_str(ctz64(j)));
+        }
+
+        dprintf(fd, "\n"
+                "CPU implementer\t: 0x%02x\n"
+                "CPU architecture: %d\n"
+                "CPU variant\t: 0x%0*x\n",
+                midr_impl, arch, len_var, midr_var);
+        if (arch >= 7) {
+            dprintf(fd, "CPU part\t: 0x%0*x\n", len_part, midr_part);
+        }
+        dprintf(fd, "CPU revision\t: %d\n\n", midr_rev);
+    }
+
+    if (arch < 8) {
+        dprintf(fd, "Hardware\t: QEMU v%s %s\n", QEMU_VERSION,
+                cpu->dtb_compatible ? : "");
+        dprintf(fd, "Revision\t: 0000\n");
+        dprintf(fd, "Serial\t\t: 0000000000000000\n");
+    }
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* ARM_TARGET_PROC_H */
diff --git a/linux-user/cris/target_proc.h b/linux-user/cris/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/cris/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 161dd95584..e02ac29d1f 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -143,8 +143,6 @@ static uint32_t get_elf_hwcap(void)
 }
 
 #ifdef TARGET_X86_64
-#define ELF_START_MMAP 0x2aaaaab000ULL
-
 #define ELF_CLASS      ELFCLASS64
 #define ELF_ARCH       EM_X86_64
 
@@ -221,8 +219,6 @@ static bool init_guest_commpage(void)
 #endif
 #else
 
-#define ELF_START_MMAP 0x80000000
-
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
@@ -308,8 +304,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
 #ifndef TARGET_AARCH64
 /* 32 bit ARM definitions */
 
-#define ELF_START_MMAP 0x80000000
-
 #define ELF_ARCH        EM_ARM
 #define ELF_CLASS       ELFCLASS32
 #define EXSTACK_DEFAULT true
@@ -466,7 +460,7 @@ static bool init_guest_commpage(void)
 #define ELF_HWCAP get_elf_hwcap()
 #define ELF_HWCAP2 get_elf_hwcap2()
 
-static uint32_t get_elf_hwcap(void)
+uint32_t get_elf_hwcap(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
     uint32_t hwcaps = 0;
@@ -508,7 +502,7 @@ static uint32_t get_elf_hwcap(void)
     return hwcaps;
 }
 
-static uint32_t get_elf_hwcap2(void)
+uint32_t get_elf_hwcap2(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
     uint32_t hwcaps = 0;
@@ -521,6 +515,49 @@ static uint32_t get_elf_hwcap2(void)
     return hwcaps;
 }
 
+const char *elf_hwcap_str(uint32_t bit)
+{
+    static const char *hwcap_str[] = {
+    [__builtin_ctz(ARM_HWCAP_ARM_SWP      )] = "swp",
+    [__builtin_ctz(ARM_HWCAP_ARM_HALF     )] = "half",
+    [__builtin_ctz(ARM_HWCAP_ARM_THUMB    )] = "thumb",
+    [__builtin_ctz(ARM_HWCAP_ARM_26BIT    )] = "26bit",
+    [__builtin_ctz(ARM_HWCAP_ARM_FAST_MULT)] = "fast_mult",
+    [__builtin_ctz(ARM_HWCAP_ARM_FPA      )] = "fpa",
+    [__builtin_ctz(ARM_HWCAP_ARM_VFP      )] = "vfp",
+    [__builtin_ctz(ARM_HWCAP_ARM_EDSP     )] = "edsp",
+    [__builtin_ctz(ARM_HWCAP_ARM_JAVA     )] = "java",
+    [__builtin_ctz(ARM_HWCAP_ARM_IWMMXT   )] = "iwmmxt",
+    [__builtin_ctz(ARM_HWCAP_ARM_CRUNCH   )] = "crunch",
+    [__builtin_ctz(ARM_HWCAP_ARM_THUMBEE  )] = "thumbee",
+    [__builtin_ctz(ARM_HWCAP_ARM_NEON     )] = "neon",
+    [__builtin_ctz(ARM_HWCAP_ARM_VFPv3    )] = "vfpv3",
+    [__builtin_ctz(ARM_HWCAP_ARM_VFPv3D16 )] = "vfpv3d16",
+    [__builtin_ctz(ARM_HWCAP_ARM_TLS      )] = "tls",
+    [__builtin_ctz(ARM_HWCAP_ARM_VFPv4    )] = "vfpv4",
+    [__builtin_ctz(ARM_HWCAP_ARM_IDIVA    )] = "idiva",
+    [__builtin_ctz(ARM_HWCAP_ARM_IDIVT    )] = "idivt",
+    [__builtin_ctz(ARM_HWCAP_ARM_VFPD32   )] = "vfpd32",
+    [__builtin_ctz(ARM_HWCAP_ARM_LPAE     )] = "lpae",
+    [__builtin_ctz(ARM_HWCAP_ARM_EVTSTRM  )] = "evtstrm",
+    };
+
+    return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
+const char *elf_hwcap2_str(uint32_t bit)
+{
+    static const char *hwcap_str[] = {
+    [__builtin_ctz(ARM_HWCAP2_ARM_AES  )] = "aes",
+    [__builtin_ctz(ARM_HWCAP2_ARM_PMULL)] = "pmull",
+    [__builtin_ctz(ARM_HWCAP2_ARM_SHA1 )] = "sha1",
+    [__builtin_ctz(ARM_HWCAP2_ARM_SHA2 )] = "sha2",
+    [__builtin_ctz(ARM_HWCAP2_ARM_CRC32)] = "crc32",
+    };
+
+    return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
 #undef GET_FEATURE
 #undef GET_FEATURE_ID
 
@@ -557,7 +594,6 @@ static const char *get_elf_platform(void)
 
 #else
 /* 64 bit ARM definitions */
-#define ELF_START_MMAP 0x80000000
 
 #define ELF_ARCH        EM_AARCH64
 #define ELF_CLASS       ELFCLASS64
@@ -668,7 +704,7 @@ enum {
 #define GET_FEATURE_ID(feat, hwcap) \
     do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
 
-static uint32_t get_elf_hwcap(void)
+uint32_t get_elf_hwcap(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
     uint32_t hwcaps = 0;
@@ -706,7 +742,7 @@ static uint32_t get_elf_hwcap(void)
     return hwcaps;
 }
 
-static uint32_t get_elf_hwcap2(void)
+uint32_t get_elf_hwcap2(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
     uint32_t hwcaps = 0;
@@ -741,6 +777,85 @@ static uint32_t get_elf_hwcap2(void)
     return hwcaps;
 }
 
+const char *elf_hwcap_str(uint32_t bit)
+{
+    static const char *hwcap_str[] = {
+    [__builtin_ctz(ARM_HWCAP_A64_FP      )] = "fp",
+    [__builtin_ctz(ARM_HWCAP_A64_ASIMD   )] = "asimd",
+    [__builtin_ctz(ARM_HWCAP_A64_EVTSTRM )] = "evtstrm",
+    [__builtin_ctz(ARM_HWCAP_A64_AES     )] = "aes",
+    [__builtin_ctz(ARM_HWCAP_A64_PMULL   )] = "pmull",
+    [__builtin_ctz(ARM_HWCAP_A64_SHA1    )] = "sha1",
+    [__builtin_ctz(ARM_HWCAP_A64_SHA2    )] = "sha2",
+    [__builtin_ctz(ARM_HWCAP_A64_CRC32   )] = "crc32",
+    [__builtin_ctz(ARM_HWCAP_A64_ATOMICS )] = "atomics",
+    [__builtin_ctz(ARM_HWCAP_A64_FPHP    )] = "fphp",
+    [__builtin_ctz(ARM_HWCAP_A64_ASIMDHP )] = "asimdhp",
+    [__builtin_ctz(ARM_HWCAP_A64_CPUID   )] = "cpuid",
+    [__builtin_ctz(ARM_HWCAP_A64_ASIMDRDM)] = "asimdrdm",
+    [__builtin_ctz(ARM_HWCAP_A64_JSCVT   )] = "jscvt",
+    [__builtin_ctz(ARM_HWCAP_A64_FCMA    )] = "fcma",
+    [__builtin_ctz(ARM_HWCAP_A64_LRCPC   )] = "lrcpc",
+    [__builtin_ctz(ARM_HWCAP_A64_DCPOP   )] = "dcpop",
+    [__builtin_ctz(ARM_HWCAP_A64_SHA3    )] = "sha3",
+    [__builtin_ctz(ARM_HWCAP_A64_SM3     )] = "sm3",
+    [__builtin_ctz(ARM_HWCAP_A64_SM4     )] = "sm4",
+    [__builtin_ctz(ARM_HWCAP_A64_ASIMDDP )] = "asimddp",
+    [__builtin_ctz(ARM_HWCAP_A64_SHA512  )] = "sha512",
+    [__builtin_ctz(ARM_HWCAP_A64_SVE     )] = "sve",
+    [__builtin_ctz(ARM_HWCAP_A64_ASIMDFHM)] = "asimdfhm",
+    [__builtin_ctz(ARM_HWCAP_A64_DIT     )] = "dit",
+    [__builtin_ctz(ARM_HWCAP_A64_USCAT   )] = "uscat",
+    [__builtin_ctz(ARM_HWCAP_A64_ILRCPC  )] = "ilrcpc",
+    [__builtin_ctz(ARM_HWCAP_A64_FLAGM   )] = "flagm",
+    [__builtin_ctz(ARM_HWCAP_A64_SSBS    )] = "ssbs",
+    [__builtin_ctz(ARM_HWCAP_A64_SB      )] = "sb",
+    [__builtin_ctz(ARM_HWCAP_A64_PACA    )] = "paca",
+    [__builtin_ctz(ARM_HWCAP_A64_PACG    )] = "pacg",
+    };
+
+    return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
+const char *elf_hwcap2_str(uint32_t bit)
+{
+    static const char *hwcap_str[] = {
+    [__builtin_ctz(ARM_HWCAP2_A64_DCPODP       )] = "dcpodp",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVE2         )] = "sve2",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEAES       )] = "sveaes",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEPMULL     )] = "svepmull",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEBITPERM   )] = "svebitperm",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVESHA3      )] = "svesha3",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVESM4       )] = "svesm4",
+    [__builtin_ctz(ARM_HWCAP2_A64_FLAGM2       )] = "flagm2",
+    [__builtin_ctz(ARM_HWCAP2_A64_FRINT        )] = "frint",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEI8MM      )] = "svei8mm",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEF32MM     )] = "svef32mm",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEF64MM     )] = "svef64mm",
+    [__builtin_ctz(ARM_HWCAP2_A64_SVEBF16      )] = "svebf16",
+    [__builtin_ctz(ARM_HWCAP2_A64_I8MM         )] = "i8mm",
+    [__builtin_ctz(ARM_HWCAP2_A64_BF16         )] = "bf16",
+    [__builtin_ctz(ARM_HWCAP2_A64_DGH          )] = "dgh",
+    [__builtin_ctz(ARM_HWCAP2_A64_RNG          )] = "rng",
+    [__builtin_ctz(ARM_HWCAP2_A64_BTI          )] = "bti",
+    [__builtin_ctz(ARM_HWCAP2_A64_MTE          )] = "mte",
+    [__builtin_ctz(ARM_HWCAP2_A64_ECV          )] = "ecv",
+    [__builtin_ctz(ARM_HWCAP2_A64_AFP          )] = "afp",
+    [__builtin_ctz(ARM_HWCAP2_A64_RPRES        )] = "rpres",
+    [__builtin_ctz(ARM_HWCAP2_A64_MTE3         )] = "mte3",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME          )] = "sme",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64   )] = "sme_i16i64",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64   )] = "sme_f64f64",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32    )] = "sme_i8i32",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32   )] = "sme_f16f32",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32   )] = "sme_b16f32",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32   )] = "sme_f32f32",
+    [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64     )] = "sme_fa64",
+    };
+
+    return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
 #undef GET_FEATURE_ID
 
 #endif /* not TARGET_AARCH64 */
@@ -749,7 +864,6 @@ static uint32_t get_elf_hwcap2(void)
 #ifdef TARGET_SPARC
 #ifdef TARGET_SPARC64
 
-#define ELF_START_MMAP 0x80000000
 #define ELF_HWCAP  (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
                     | HWCAP_SPARC_MULDIV | HWCAP_SPARC_V9)
 #ifndef TARGET_ABI32
@@ -761,7 +875,6 @@ static uint32_t get_elf_hwcap2(void)
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCH    EM_SPARCV9
 #else
-#define ELF_START_MMAP 0x80000000
 #define ELF_HWCAP  (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
                     | HWCAP_SPARC_MULDIV)
 #define ELF_CLASS   ELFCLASS32
@@ -783,7 +896,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 #ifdef TARGET_PPC
 
 #define ELF_MACHINE    PPC_ELF_MACHINE
-#define ELF_START_MMAP 0x80000000
 
 #if defined(TARGET_PPC64)
 
@@ -986,8 +1098,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
 
 #ifdef TARGET_LOONGARCH64
 
-#define ELF_START_MMAP 0x80000000
-
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCH    EM_LOONGARCH
 #define EXSTACK_DEFAULT true
@@ -1078,8 +1188,6 @@ static uint32_t get_elf_hwcap(void)
 
 #ifdef TARGET_MIPS
 
-#define ELF_START_MMAP 0x80000000
-
 #ifdef TARGET_MIPS64
 #define ELF_CLASS   ELFCLASS64
 #else
@@ -1237,8 +1345,6 @@ static uint32_t get_elf_hwcap(void)
 
 #ifdef TARGET_MICROBLAZE
 
-#define ELF_START_MMAP 0x80000000
-
 #define elf_check_arch(x) ( (x) == EM_MICROBLAZE || (x) == EM_MICROBLAZE_OLD)
 
 #define ELF_CLASS   ELFCLASS32
@@ -1279,8 +1385,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMBState *env
 
 #ifdef TARGET_NIOS2
 
-#define ELF_START_MMAP 0x80000000
-
 #define elf_check_arch(x) ((x) == EM_ALTERA_NIOS2)
 
 #define ELF_CLASS   ELFCLASS32
@@ -1376,8 +1480,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
 
 #ifdef TARGET_OPENRISC
 
-#define ELF_START_MMAP 0x08000000
-
 #define ELF_ARCH EM_OPENRISC
 #define ELF_CLASS ELFCLASS32
 #define ELF_DATA  ELFDATA2MSB
@@ -1414,8 +1516,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
 
 #ifdef TARGET_SH4
 
-#define ELF_START_MMAP 0x80000000
-
 #define ELF_CLASS ELFCLASS32
 #define ELF_ARCH  EM_SH
 
@@ -1496,8 +1596,6 @@ static uint32_t get_elf_hwcap(void)
 
 #ifdef TARGET_CRIS
 
-#define ELF_START_MMAP 0x80000000
-
 #define ELF_CLASS ELFCLASS32
 #define ELF_ARCH  EM_CRIS
 
@@ -1513,8 +1611,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 
 #ifdef TARGET_M68K
 
-#define ELF_START_MMAP 0x80000000
-
 #define ELF_CLASS       ELFCLASS32
 #define ELF_ARCH        EM_68K
 
@@ -1564,8 +1660,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUM68KState *e
 
 #ifdef TARGET_ALPHA
 
-#define ELF_START_MMAP (0x30000000000ULL)
-
 #define ELF_CLASS      ELFCLASS64
 #define ELF_ARCH       EM_ALPHA
 
@@ -1583,8 +1677,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 
 #ifdef TARGET_S390X
 
-#define ELF_START_MMAP (0x20000000000ULL)
-
 #define ELF_CLASS	ELFCLASS64
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_S390
@@ -1695,7 +1787,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
 
 #ifdef TARGET_RISCV
 
-#define ELF_START_MMAP 0x80000000
 #define ELF_ARCH  EM_RISCV
 
 #ifdef TARGET_RISCV32
@@ -1731,7 +1822,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 
 #ifdef TARGET_HPPA
 
-#define ELF_START_MMAP  0x80000000
 #define ELF_CLASS       ELFCLASS32
 #define ELF_ARCH        EM_PARISC
 #define ELF_PLATFORM    "PARISC"
@@ -1783,8 +1873,6 @@ static bool init_guest_commpage(void)
 
 #ifdef TARGET_XTENSA
 
-#define ELF_START_MMAP 0x20000000
-
 #define ELF_CLASS       ELFCLASS32
 #define ELF_ARCH        EM_XTENSA
 
@@ -1850,8 +1938,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
 
 #ifdef TARGET_HEXAGON
 
-#define ELF_START_MMAP 0x20000000
-
 #define ELF_CLASS       ELFCLASS32
 #define ELF_ARCH        EM_HEXAGON
 
@@ -3209,7 +3295,7 @@ static void load_elf_image(const char *image_name, int image_fd,
     info->start_data = -1;
     info->end_data = 0;
     /* Usual start for brk is after all sections of the main executable. */
-    info->brk = TARGET_PAGE_ALIGN(hiaddr);
+    info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias);
     info->elf_flags = ehdr->e_flags;
 
     prot_exec = PROT_EXEC;
@@ -3581,8 +3667,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
     interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN;
 #endif
 
-    info->start_mmap = (abi_ulong)ELF_START_MMAP;
-
     load_elf_image(bprm->filename, bprm->fd, info,
                    &elf_interpreter, bprm->buf);
 
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 8f5e9f489b..4331a11bf0 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -780,7 +780,7 @@ int load_flt_binary(struct linux_binprm *bprm, struct image_info *info)
     /* Enforce final stack alignment of 16 bytes.  This is sufficient
        for all current targets, and excess alignment is harmless.  */
     stack_len = bprm->envc + bprm->argc + 2;
-    stack_len += flat_argvp_envp_on_stack() ? 2 : 0; /* arvg, argp */
+    stack_len += flat_argvp_envp_on_stack() ? 2 : 0; /* argv, argp */
     stack_len += 1; /* argc */
     stack_len *= sizeof(abi_ulong);
     sp -= (sp - stack_len) & 15;
diff --git a/linux-user/hexagon/target_proc.h b/linux-user/hexagon/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/hexagon/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index f253a15864..bda6e54655 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -149,16 +149,18 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         target_ulong *fdesc, dest;
 
         haddr &= -4;
-        if (!lock_user_struct(VERIFY_READ, fdesc, haddr, 1)) {
+        fdesc = lock_user(VERIFY_READ, haddr, 2 * sizeof(target_ulong), 1);
+        if (!fdesc) {
             goto give_sigsegv;
         }
         __get_user(dest, fdesc);
         __get_user(env->gr[19], fdesc + 1);
-        unlock_user_struct(fdesc, haddr, 1);
+        unlock_user(fdesc, haddr, 0);
         haddr = dest;
     }
     env->iaoq_f = haddr;
     env->iaoq_b = haddr + 4;
+    env->psw_n = 0;
     return;
 
  give_sigsegv:
diff --git a/linux-user/hppa/target_proc.h b/linux-user/hppa/target_proc.h
new file mode 100644
index 0000000000..9340c3b6af
--- /dev/null
+++ b/linux-user/hppa/target_proc.h
@@ -0,0 +1,26 @@
+/*
+ * HPPA specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HPPA_TARGET_PROC_H
+#define HPPA_TARGET_PROC_H
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    int i, num_cpus;
+
+    num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    for (i = 0; i < num_cpus; i++) {
+        dprintf(fd, "processor\t: %d\n", i);
+        dprintf(fd, "cpu family\t: PA-RISC 1.1e\n");
+        dprintf(fd, "cpu\t\t: PA7300LC (PCX-L2)\n");
+        dprintf(fd, "capabilities\t: os32\n");
+        dprintf(fd, "model\t\t: 9000/778/B160L - "
+                    "Merlin L2 160 QEMU (9000/778/B160L)\n\n");
+    }
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* HPPA_TARGET_PROC_H */
diff --git a/linux-user/i386/target_proc.h b/linux-user/i386/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/i386/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/loader.h b/linux-user/loader.h
index 59cbeacf24..324e5c872a 100644
--- a/linux-user/loader.h
+++ b/linux-user/loader.h
@@ -56,9 +56,13 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src,
 
 extern unsigned long guest_stack_size;
 
-#ifdef TARGET_S390X
+#if defined(TARGET_S390X) || defined(TARGET_AARCH64) || defined(TARGET_ARM)
 uint32_t get_elf_hwcap(void);
 const char *elf_hwcap_str(uint32_t bit);
 #endif
+#if defined(TARGET_AARCH64) || defined(TARGET_ARM)
+uint32_t get_elf_hwcap2(void);
+const char *elf_hwcap2_str(uint32_t bit);
+#endif
 
 #endif /* LINUX_USER_LOADER_H */
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index bb8efb1172..39572c1190 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -12,6 +12,7 @@
 #include "linux-user/trace.h"
 
 #include "target/loongarch/internals.h"
+#include "target/loongarch/vec.h"
 
 /* FP context was used */
 #define SC_USED_FP              (1 << 0)
diff --git a/linux-user/loongarch64/target_proc.h b/linux-user/loongarch64/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/loongarch64/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/m68k/target_proc.h b/linux-user/m68k/target_proc.h
new file mode 100644
index 0000000000..3df8f28e22
--- /dev/null
+++ b/linux-user/m68k/target_proc.h
@@ -0,0 +1,16 @@
+/*
+ * M68K specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef M68K_TARGET_PROC_H
+#define M68K_TARGET_PROC_H
+
+static int open_hardware(CPUArchState *cpu_env, int fd)
+{
+    dprintf(fd, "Model:\t\tqemu-m68k\n");
+    return 0;
+}
+#define HAVE_ARCH_PROC_HARDWARE
+
+#endif /* M68K_TARGET_PROC_H */
diff --git a/linux-user/microblaze/target_proc.h b/linux-user/microblaze/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/microblaze/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/mips/target_proc.h b/linux-user/mips/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/mips/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/mips64/target_proc.h b/linux-user/mips64/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/mips64/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 9aab48d4a3..8eaf57b208 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -17,12 +17,14 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include <sys/shm.h>
 #include "trace.h"
 #include "exec/log.h"
 #include "qemu.h"
 #include "user-internals.h"
 #include "user-mmap.h"
 #include "target_mman.h"
+#include "qemu/interval-tree.h"
 
 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
@@ -64,6 +66,44 @@ void mmap_fork_end(int child)
     }
 }
 
+/* Protected by mmap_lock. */
+static IntervalTreeRoot shm_regions;
+
+static void shm_region_add(abi_ptr start, abi_ptr last)
+{
+    IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
+
+    i->start = start;
+    i->last = last;
+    interval_tree_insert(i, &shm_regions);
+}
+
+static abi_ptr shm_region_find(abi_ptr start)
+{
+    IntervalTreeNode *i;
+
+    for (i = interval_tree_iter_first(&shm_regions, start, start); i;
+         i = interval_tree_iter_next(i, start, start)) {
+        if (i->start == start) {
+            return i->last;
+        }
+    }
+    return 0;
+}
+
+static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
+{
+    IntervalTreeNode *i, *n;
+
+    for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
+        n = interval_tree_iter_next(i, start, last);
+        if (i->start >= start && i->last <= last) {
+            interval_tree_remove(i, &shm_regions);
+            g_free(i);
+        }
+    }
+}
+
 /*
  * Validate target prot bitmask.
  * Return the prot bitmask for the host in *HOST_PROT.
@@ -720,6 +760,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
             page_set_flags(passthrough_last + 1, last, page_flags);
         }
     }
+    shm_region_rm_complete(start, last);
  the_end:
     trace_target_mmap_complete(start);
     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
@@ -817,6 +858,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
     mmap_lock();
     mmap_reserve_or_unmap(start, len);
     page_set_flags(start, start + len - 1, 0);
+    shm_region_rm_complete(start, start + len - 1);
     mmap_unlock();
 
     return 0;
@@ -906,8 +948,10 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
         new_addr = h2g(host_addr);
         prot = page_get_flags(old_addr);
         page_set_flags(old_addr, old_addr + old_size - 1, 0);
+        shm_region_rm_complete(old_addr, old_addr + old_size - 1);
         page_set_flags(new_addr, new_addr + new_size - 1,
                        prot | PAGE_VALID | PAGE_RESET);
+        shm_region_rm_complete(new_addr, new_addr + new_size - 1);
     }
     mmap_unlock();
     return new_addr;
@@ -981,3 +1025,127 @@ abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 
     return ret;
 }
+
+#ifndef TARGET_FORCE_SHMLBA
+/*
+ * For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
+{
+    return TARGET_PAGE_SIZE;
+}
+#endif
+
+abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
+                       abi_ulong shmaddr, int shmflg)
+{
+    CPUState *cpu = env_cpu(cpu_env);
+    abi_ulong raddr;
+    struct shmid_ds shm_info;
+    int ret;
+    abi_ulong shmlba;
+
+    /* shmat pointers are always untagged */
+
+    /* find out the length of the shared memory segment */
+    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
+    if (is_error(ret)) {
+        /* can't get length, bail out */
+        return ret;
+    }
+
+    shmlba = target_shmlba(cpu_env);
+
+    if (shmaddr & (shmlba - 1)) {
+        if (shmflg & SHM_RND) {
+            shmaddr &= ~(shmlba - 1);
+        } else {
+            return -TARGET_EINVAL;
+        }
+    }
+    if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
+        return -TARGET_EINVAL;
+    }
+
+    WITH_MMAP_LOCK_GUARD() {
+        void *host_raddr;
+        abi_ulong last;
+
+        if (shmaddr) {
+            host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
+        } else {
+            abi_ulong mmap_start;
+
+            /* In order to use the host shmat, we need to honor host SHMLBA.  */
+            mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
+                                       MAX(SHMLBA, shmlba));
+
+            if (mmap_start == -1) {
+                return -TARGET_ENOMEM;
+            }
+            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
+                               shmflg | SHM_REMAP);
+        }
+
+        if (host_raddr == (void *)-1) {
+            return get_errno(-1);
+        }
+        raddr = h2g(host_raddr);
+        last = raddr + shm_info.shm_segsz - 1;
+
+        page_set_flags(raddr, last,
+                       PAGE_VALID | PAGE_RESET | PAGE_READ |
+                       (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
+
+        shm_region_rm_complete(raddr, last);
+        shm_region_add(raddr, last);
+    }
+
+    /*
+     * We're mapping shared memory, so ensure we generate code for parallel
+     * execution and flush old translations.  This will work up to the level
+     * supported by the host -- anything that requires EXCP_ATOMIC will not
+     * be atomic with respect to an external process.
+     */
+    if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+        cpu->tcg_cflags |= CF_PARALLEL;
+        tb_flush(cpu);
+    }
+
+    return raddr;
+}
+
+abi_long target_shmdt(abi_ulong shmaddr)
+{
+    abi_long rv;
+
+    /* shmdt pointers are always untagged */
+
+    WITH_MMAP_LOCK_GUARD() {
+        abi_ulong last = shm_region_find(shmaddr);
+        if (last == 0) {
+            return -TARGET_EINVAL;
+        }
+
+        rv = get_errno(shmdt(g2h_untagged(shmaddr)));
+        if (rv == 0) {
+            abi_ulong size = last - shmaddr + 1;
+
+            page_set_flags(shmaddr, last, 0);
+            shm_region_rm_complete(shmaddr, last);
+            mmap_reserve_or_unmap(shmaddr, size);
+        }
+    }
+    return rv;
+}
diff --git a/linux-user/nios2/target_proc.h b/linux-user/nios2/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/nios2/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/openrisc/target_proc.h b/linux-user/openrisc/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/openrisc/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/ppc/target_proc.h b/linux-user/ppc/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/ppc/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 4f8b55e2fb..12f638336a 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -30,7 +30,6 @@ struct image_info {
         abi_ulong       start_data;
         abi_ulong       end_data;
         abi_ulong       brk;
-        abi_ulong       start_mmap;
         abi_ulong       start_stack;
         abi_ulong       stack_limit;
         abi_ulong       entry;
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index eaa168199a..f989f7f51f 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -38,8 +38,8 @@ struct target_sigcontext {
 }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */
 
 struct target_ucontext {
-    unsigned long uc_flags;
-    struct target_ucontext *uc_link;
+    abi_ulong uc_flags;
+    abi_ptr uc_link;
     target_stack_t uc_stack;
     target_sigset_t uc_sigmask;
     uint8_t   __unused[1024 / 8 - sizeof(target_sigset_t)];
diff --git a/linux-user/riscv/target_proc.h b/linux-user/riscv/target_proc.h
new file mode 100644
index 0000000000..c77c003d65
--- /dev/null
+++ b/linux-user/riscv/target_proc.h
@@ -0,0 +1,37 @@
+/*
+ * RISC-V specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef RISCV_TARGET_PROC_H
+#define RISCV_TARGET_PROC_H
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    int i;
+    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    RISCVCPU *cpu = env_archcpu(cpu_env);
+    const RISCVCPUConfig *cfg = riscv_cpu_cfg((CPURISCVState *) cpu_env);
+    char *isa_string = riscv_isa_string(cpu);
+    const char *mmu;
+
+    if (cfg->mmu) {
+        mmu = (cpu_env->xl == MXL_RV32) ? "sv32"  : "sv48";
+    } else {
+        mmu = "none";
+    }
+
+    for (i = 0; i < num_cpus; i++) {
+        dprintf(fd, "processor\t: %d\n", i);
+        dprintf(fd, "hart\t\t: %d\n", i);
+        dprintf(fd, "isa\t\t: %s\n", isa_string);
+        dprintf(fd, "mmu\t\t: %s\n", mmu);
+        dprintf(fd, "uarch\t\t: qemu\n\n");
+    }
+
+    g_free(isa_string);
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* RISCV_TARGET_PROC_H */
diff --git a/linux-user/s390x/target_proc.h b/linux-user/s390x/target_proc.h
new file mode 100644
index 0000000000..a4a4821ea5
--- /dev/null
+++ b/linux-user/s390x/target_proc.h
@@ -0,0 +1,109 @@
+/*
+ * S390X specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef S390X_TARGET_PROC_H
+#define S390X_TARGET_PROC_H
+
+/*
+ * Emulate what a Linux kernel running in qemu-system-s390x -M accel=tcg would
+ * show in /proc/cpuinfo.
+ *
+ * Skip the following in order to match the missing support in op_ecag():
+ * - show_cacheinfo().
+ * - show_cpu_topology().
+ * - show_cpu_mhz().
+ *
+ * Use fixed values for certain fields:
+ * - bogomips per cpu - from a qemu-system-s390x run.
+ * - max thread id = 0, since SMT / SIGP_SET_MULTI_THREADING is not supported.
+ *
+ * Keep the code structure close to arch/s390/kernel/processor.c.
+ */
+
+static void show_facilities(int fd)
+{
+    size_t sizeof_stfl_bytes = 2048;
+    g_autofree uint8_t *stfl_bytes = g_new0(uint8_t, sizeof_stfl_bytes);
+    unsigned int bit;
+
+    dprintf(fd, "facilities      :");
+    s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes);
+    for (bit = 0; bit < sizeof_stfl_bytes * 8; bit++) {
+        if (test_be_bit(bit, stfl_bytes)) {
+            dprintf(fd, " %d", bit);
+        }
+    }
+    dprintf(fd, "\n");
+}
+
+static int cpu_ident(unsigned long n)
+{
+    return deposit32(0, CPU_ID_BITS - CPU_PHYS_ADDR_BITS, CPU_PHYS_ADDR_BITS,
+                     n);
+}
+
+static void show_cpu_summary(CPUArchState *cpu_env, int fd)
+{
+    S390CPUModel *model = env_archcpu(cpu_env)->model;
+    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    uint32_t elf_hwcap = get_elf_hwcap();
+    const char *hwcap_str;
+    int i;
+
+    dprintf(fd, "vendor_id       : IBM/S390\n"
+                "# processors    : %i\n"
+                "bogomips per cpu: 13370.00\n",
+            num_cpus);
+    dprintf(fd, "max thread id   : 0\n");
+    dprintf(fd, "features\t: ");
+    for (i = 0; i < sizeof(elf_hwcap) * 8; i++) {
+        if (!(elf_hwcap & (1 << i))) {
+            continue;
+        }
+        hwcap_str = elf_hwcap_str(i);
+        if (hwcap_str) {
+            dprintf(fd, "%s ", hwcap_str);
+        }
+    }
+    dprintf(fd, "\n");
+    show_facilities(fd);
+    for (i = 0; i < num_cpus; i++) {
+        dprintf(fd, "processor %d: "
+               "version = %02X,  "
+               "identification = %06X,  "
+               "machine = %04X\n",
+               i, model->cpu_ver, cpu_ident(i), model->def->type);
+    }
+}
+
+static void show_cpu_ids(CPUArchState *cpu_env, int fd, unsigned long n)
+{
+    S390CPUModel *model = env_archcpu(cpu_env)->model;
+
+    dprintf(fd, "version         : %02X\n", model->cpu_ver);
+    dprintf(fd, "identification  : %06X\n", cpu_ident(n));
+    dprintf(fd, "machine         : %04X\n", model->def->type);
+}
+
+static void show_cpuinfo(CPUArchState *cpu_env, int fd, unsigned long n)
+{
+    dprintf(fd, "\ncpu number      : %ld\n", n);
+    show_cpu_ids(cpu_env, fd, n);
+}
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    int i;
+
+    show_cpu_summary(cpu_env, fd);
+    for (i = 0; i < num_cpus; i++) {
+        show_cpuinfo(cpu_env, fd, i);
+    }
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* S390X_TARGET_PROC_H */
diff --git a/linux-user/sh4/target_proc.h b/linux-user/sh4/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/sh4/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/sparc/target_proc.h b/linux-user/sparc/target_proc.h
new file mode 100644
index 0000000000..3bb3134a47
--- /dev/null
+++ b/linux-user/sparc/target_proc.h
@@ -0,0 +1,16 @@
+/*
+ * Sparc specific proc functions for linux-user
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef SPARC_TARGET_PROC_H
+#define SPARC_TARGET_PROC_H
+
+static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+{
+    dprintf(fd, "type\t\t: sun4u\n");
+    return 0;
+}
+#define HAVE_ARCH_PROC_CPUINFO
+
+#endif /* SPARC_TARGET_PROC_H */
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index c60828d6d0..a08d71d79a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1826,7 +1826,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh,
             uint32_t *dst = (uint32_t *)data;
 
             memcpy(dst, target_data, len);
-            /* fix endianess of first 32-bit word */
+            /* fix endianness of first 32-bit word */
             if (len >= sizeof(uint32_t)) {
                 *dst = tswap32(*dst);
             }
@@ -2937,7 +2937,7 @@ get_timeout:
                 unlock_user(results, optval_addr, 0);
                 return ret;
             }
-            /* swap host endianess to target endianess. */
+            /* swap host endianness to target endianness. */
             for (i = 0; i < (len / sizeof(uint32_t)); i++) {
                 results[i] = tswap32(results[i]);
             }
@@ -3742,14 +3742,6 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
 }
 #endif
 
-#define N_SHM_REGIONS	32
-
-static struct shm_region {
-    abi_ulong start;
-    abi_ulong size;
-    bool in_use;
-} shm_regions[N_SHM_REGIONS];
-
 #ifndef TARGET_SEMID64_DS
 /* asm-generic version of this struct */
 struct target_semid64_ds
@@ -4499,133 +4491,6 @@ static inline abi_long do_shmctl(int shmid, int cmd, abi_long buf)
     return ret;
 }
 
-#ifndef TARGET_FORCE_SHMLBA
-/* For most architectures, SHMLBA is the same as the page size;
- * some architectures have larger values, in which case they should
- * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
- * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
- * and defining its own value for SHMLBA.
- *
- * The kernel also permits SHMLBA to be set by the architecture to a
- * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
- * this means that addresses are rounded to the large size if
- * SHM_RND is set but addresses not aligned to that size are not rejected
- * as long as they are at least page-aligned. Since the only architecture
- * which uses this is ia64 this code doesn't provide for that oddity.
- */
-static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
-{
-    return TARGET_PAGE_SIZE;
-}
-#endif
-
-static abi_ulong do_shmat(CPUArchState *cpu_env, int shmid,
-                          abi_ulong shmaddr, int shmflg)
-{
-    CPUState *cpu = env_cpu(cpu_env);
-    abi_ulong raddr;
-    void *host_raddr;
-    struct shmid_ds shm_info;
-    int i, ret;
-    abi_ulong shmlba;
-
-    /* shmat pointers are always untagged */
-
-    /* find out the length of the shared memory segment */
-    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
-    if (is_error(ret)) {
-        /* can't get length, bail out */
-        return ret;
-    }
-
-    shmlba = target_shmlba(cpu_env);
-
-    if (shmaddr & (shmlba - 1)) {
-        if (shmflg & SHM_RND) {
-            shmaddr &= ~(shmlba - 1);
-        } else {
-            return -TARGET_EINVAL;
-        }
-    }
-    if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
-        return -TARGET_EINVAL;
-    }
-
-    mmap_lock();
-
-    /*
-     * We're mapping shared memory, so ensure we generate code for parallel
-     * execution and flush old translations.  This will work up to the level
-     * supported by the host -- anything that requires EXCP_ATOMIC will not
-     * be atomic with respect to an external process.
-     */
-    if (!(cpu->tcg_cflags & CF_PARALLEL)) {
-        cpu->tcg_cflags |= CF_PARALLEL;
-        tb_flush(cpu);
-    }
-
-    if (shmaddr)
-        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
-    else {
-        abi_ulong mmap_start;
-
-        /* In order to use the host shmat, we need to honor host SHMLBA.  */
-        mmap_start = mmap_find_vma(0, shm_info.shm_segsz, MAX(SHMLBA, shmlba));
-
-        if (mmap_start == -1) {
-            errno = ENOMEM;
-            host_raddr = (void *)-1;
-        } else
-            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
-                               shmflg | SHM_REMAP);
-    }
-
-    if (host_raddr == (void *)-1) {
-        mmap_unlock();
-        return get_errno((intptr_t)host_raddr);
-    }
-    raddr = h2g((uintptr_t)host_raddr);
-
-    page_set_flags(raddr, raddr + shm_info.shm_segsz - 1,
-                   PAGE_VALID | PAGE_RESET | PAGE_READ |
-                   (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
-
-    for (i = 0; i < N_SHM_REGIONS; i++) {
-        if (!shm_regions[i].in_use) {
-            shm_regions[i].in_use = true;
-            shm_regions[i].start = raddr;
-            shm_regions[i].size = shm_info.shm_segsz;
-            break;
-        }
-    }
-
-    mmap_unlock();
-    return raddr;
-}
-
-static inline abi_long do_shmdt(abi_ulong shmaddr)
-{
-    int i;
-    abi_long rv;
-
-    /* shmdt pointers are always untagged */
-
-    mmap_lock();
-
-    for (i = 0; i < N_SHM_REGIONS; ++i) {
-        if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
-            shm_regions[i].in_use = false;
-            page_set_flags(shmaddr, shmaddr + shm_regions[i].size - 1, 0);
-            break;
-        }
-    }
-    rv = get_errno(shmdt(g2h_untagged(shmaddr)));
-
-    mmap_unlock();
-
-    return rv;
-}
-
 #ifdef TARGET_NR_ipc
 /* ??? This only works with linear mappings.  */
 /* do_ipc() must return target values and target errnos. */
@@ -4712,7 +4577,7 @@ static abi_long do_ipc(CPUArchState *cpu_env,
         default:
         {
             abi_ulong raddr;
-            raddr = do_shmat(cpu_env, first, ptr, second);
+            raddr = target_shmat(cpu_env, first, ptr, second);
             if (is_error(raddr))
                 return get_errno(raddr);
             if (put_user_ual(raddr, third))
@@ -4725,7 +4590,7 @@ static abi_long do_ipc(CPUArchState *cpu_env,
         }
 	break;
     case IPCOP_shmdt:
-        ret = do_shmdt(ptr);
+        ret = target_shmdt(ptr);
 	break;
 
     case IPCOP_shmget:
@@ -8131,12 +7996,68 @@ static int open_self_cmdline(CPUArchState *cpu_env, int fd)
     return 0;
 }
 
-static void show_smaps(int fd, unsigned long size)
-{
-    unsigned long page_size_kb = TARGET_PAGE_SIZE >> 10;
-    unsigned long size_kb = size >> 10;
+struct open_self_maps_data {
+    TaskState *ts;
+    IntervalTreeRoot *host_maps;
+    int fd;
+    bool smaps;
+};
 
-    dprintf(fd, "Size:                  %lu kB\n"
+/*
+ * Subroutine to output one line of /proc/self/maps,
+ * or one region of /proc/self/smaps.
+ */
+
+#ifdef TARGET_HPPA
+# define test_stack(S, E, L)  (E == L)
+#else
+# define test_stack(S, E, L)  (S == L)
+#endif
+
+static void open_self_maps_4(const struct open_self_maps_data *d,
+                             const MapInfo *mi, abi_ptr start,
+                             abi_ptr end, unsigned flags)
+{
+    const struct image_info *info = d->ts->info;
+    const char *path = mi->path;
+    uint64_t offset;
+    int fd = d->fd;
+    int count;
+
+    if (test_stack(start, end, info->stack_limit)) {
+        path = "[stack]";
+    } else if (start == info->brk) {
+        path = "[heap]";
+    }
+
+    /* Except null device (MAP_ANON), adjust offset for this fragment. */
+    offset = mi->offset;
+    if (mi->dev) {
+        uintptr_t hstart = (uintptr_t)g2h_untagged(start);
+        offset += hstart - mi->itree.start;
+    }
+
+    count = dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
+                    " %c%c%c%c %08" PRIx64 " %02x:%02x %"PRId64,
+                    start, end,
+                    (flags & PAGE_READ) ? 'r' : '-',
+                    (flags & PAGE_WRITE_ORG) ? 'w' : '-',
+                    (flags & PAGE_EXEC) ? 'x' : '-',
+                    mi->is_priv ? 'p' : 's',
+                    offset, major(mi->dev), minor(mi->dev),
+                    (uint64_t)mi->inode);
+    if (path) {
+        dprintf(fd, "%*s%s\n", 73 - count, "", path);
+    } else {
+        dprintf(fd, "\n");
+    }
+
+    if (d->smaps) {
+        unsigned long size = end - start;
+        unsigned long page_size_kb = TARGET_PAGE_SIZE >> 10;
+        unsigned long size_kb = size >> 10;
+
+        dprintf(fd, "Size:                  %lu kB\n"
                 "KernelPageSize:        %lu kB\n"
                 "MMUPageSize:           %lu kB\n"
                 "Rss:                   0 kB\n"
@@ -8147,7 +8068,7 @@ static void show_smaps(int fd, unsigned long size)
                 "Private_Clean:         0 kB\n"
                 "Private_Dirty:         0 kB\n"
                 "Referenced:            0 kB\n"
-                "Anonymous:             0 kB\n"
+                "Anonymous:             %lu kB\n"
                 "LazyFree:              0 kB\n"
                 "AnonHugePages:         0 kB\n"
                 "ShmemPmdMapped:        0 kB\n"
@@ -8157,89 +8078,76 @@ static void show_smaps(int fd, unsigned long size)
                 "Swap:                  0 kB\n"
                 "SwapPss:               0 kB\n"
                 "Locked:                0 kB\n"
-                "THPeligible:    0\n", size_kb, page_size_kb, page_size_kb);
+                "THPeligible:    0\n"
+                "VmFlags:%s%s%s%s%s%s%s%s\n",
+                size_kb, page_size_kb, page_size_kb,
+                (flags & PAGE_ANON ? size_kb : 0),
+                (flags & PAGE_READ) ? " rd" : "",
+                (flags & PAGE_WRITE_ORG) ? " wr" : "",
+                (flags & PAGE_EXEC) ? " ex" : "",
+                mi->is_priv ? "" : " sh",
+                (flags & PAGE_READ) ? " mr" : "",
+                (flags & PAGE_WRITE_ORG) ? " mw" : "",
+                (flags & PAGE_EXEC) ? " me" : "",
+                mi->is_priv ? "" : " ms");
+    }
 }
 
-static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps)
+/*
+ * Callback for walk_memory_regions, when read_self_maps() fails.
+ * Proceed without the benefit of host /proc/self/maps cross-check.
+ */
+static int open_self_maps_3(void *opaque, target_ulong guest_start,
+                            target_ulong guest_end, unsigned long flags)
 {
-    CPUState *cpu = env_cpu(cpu_env);
-    TaskState *ts = cpu->opaque;
-    IntervalTreeRoot *map_info = read_self_maps();
-    IntervalTreeNode *s;
-    int count;
+    static const MapInfo mi = { .is_priv = true };
 
-    for (s = interval_tree_iter_first(map_info, 0, -1); s;
-         s = interval_tree_iter_next(s, 0, -1)) {
-        MapInfo *e = container_of(s, MapInfo, itree);
+    open_self_maps_4(opaque, &mi, guest_start, guest_end, flags);
+    return 0;
+}
 
-        if (h2g_valid(e->itree.start)) {
-            unsigned long min = e->itree.start;
-            unsigned long max = e->itree.last + 1;
-            int flags = page_get_flags(h2g(min));
-            const char *path;
+/*
+ * Callback for walk_memory_regions, when read_self_maps() succeeds.
+ */
+static int open_self_maps_2(void *opaque, target_ulong guest_start,
+                            target_ulong guest_end, unsigned long flags)
+{
+    const struct open_self_maps_data *d = opaque;
+    uintptr_t host_start = (uintptr_t)g2h_untagged(guest_start);
+    uintptr_t host_last = (uintptr_t)g2h_untagged(guest_end - 1);
 
-            max = h2g_valid(max - 1) ?
-                max : (uintptr_t) g2h_untagged(GUEST_ADDR_MAX) + 1;
+    while (1) {
+        IntervalTreeNode *n =
+            interval_tree_iter_first(d->host_maps, host_start, host_start);
+        MapInfo *mi = container_of(n, MapInfo, itree);
+        uintptr_t this_hlast = MIN(host_last, n->last);
+        target_ulong this_gend = h2g(this_hlast) + 1;
 
-            if (!page_check_range(h2g(min), max - min, flags)) {
-                continue;
-            }
+        open_self_maps_4(d, mi, guest_start, this_gend, flags);
 
-#ifdef TARGET_HPPA
-            if (h2g(max) == ts->info->stack_limit) {
-#else
-            if (h2g(min) == ts->info->stack_limit) {
-#endif
-                path = "[stack]";
-            } else {
-                path = e->path;
-            }
-
-            count = dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
-                            " %c%c%c%c %08" PRIx64 " %s %"PRId64,
-                            h2g(min), h2g(max - 1) + 1,
-                            (flags & PAGE_READ) ? 'r' : '-',
-                            (flags & PAGE_WRITE_ORG) ? 'w' : '-',
-                            (flags & PAGE_EXEC) ? 'x' : '-',
-                            e->is_priv ? 'p' : 's',
-                            (uint64_t) e->offset, e->dev, e->inode);
-            if (path) {
-                dprintf(fd, "%*s%s\n", 73 - count, "", path);
-            } else {
-                dprintf(fd, "\n");
-            }
-            if (smaps) {
-                show_smaps(fd, max - min);
-                dprintf(fd, "VmFlags:%s%s%s%s%s%s%s%s\n",
-                        (flags & PAGE_READ) ? " rd" : "",
-                        (flags & PAGE_WRITE_ORG) ? " wr" : "",
-                        (flags & PAGE_EXEC) ? " ex" : "",
-                        e->is_priv ? "" : " sh",
-                        (flags & PAGE_READ) ? " mr" : "",
-                        (flags & PAGE_WRITE_ORG) ? " mw" : "",
-                        (flags & PAGE_EXEC) ? " me" : "",
-                        e->is_priv ? "" : " ms");
-            }
+        if (this_hlast == host_last) {
+            return 0;
         }
+        host_start = this_hlast + 1;
+        guest_start = h2g(host_start);
     }
+}
 
-    free_self_maps(map_info);
+static int open_self_maps_1(CPUArchState *env, int fd, bool smaps)
+{
+    struct open_self_maps_data d = {
+        .ts = env_cpu(env)->opaque,
+        .host_maps = read_self_maps(),
+        .fd = fd,
+        .smaps = smaps
+    };
 
-#ifdef TARGET_VSYSCALL_PAGE
-    /*
-     * We only support execution from the vsyscall page.
-     * This is as if CONFIG_LEGACY_VSYSCALL_XONLY=y from v5.3.
-     */
-    count = dprintf(fd, TARGET_FMT_lx "-" TARGET_FMT_lx
-                    " --xp 00000000 00:00 0",
-                    TARGET_VSYSCALL_PAGE, TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE);
-    dprintf(fd, "%*s%s\n", 73 - count, "",  "[vsyscall]");
-    if (smaps) {
-        show_smaps(fd, TARGET_PAGE_SIZE);
-        dprintf(fd, "VmFlags: ex\n");
+    if (d.host_maps) {
+        walk_memory_regions(&d, open_self_maps_2);
+        free_self_maps(d.host_maps);
+    } else {
+        walk_memory_regions(&d, open_self_maps_3);
     }
-#endif
-
     return 0;
 }
 
@@ -8375,9 +8283,11 @@ void target_exception_dump(CPUArchState *env, const char *fmt, int code)
     }
 }
 
+#include "target_proc.h"
+
 #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN || \
-    defined(TARGET_SPARC) || defined(TARGET_M68K) || defined(TARGET_HPPA) || \
-    defined(TARGET_RISCV) || defined(TARGET_S390X)
+    defined(HAVE_ARCH_PROC_CPUINFO) || \
+    defined(HAVE_ARCH_PROC_HARDWARE)
 static int is_proc(const char *filename, const char *entry)
 {
     return strcmp(filename, entry) == 0;
@@ -8429,171 +8339,6 @@ static int open_net_route(CPUArchState *cpu_env, int fd)
 }
 #endif
 
-#if defined(TARGET_SPARC)
-static int open_cpuinfo(CPUArchState *cpu_env, int fd)
-{
-    dprintf(fd, "type\t\t: sun4u\n");
-    return 0;
-}
-#endif
-
-#if defined(TARGET_HPPA)
-static int open_cpuinfo(CPUArchState *cpu_env, int fd)
-{
-    int i, num_cpus;
-
-    num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-    for (i = 0; i < num_cpus; i++) {
-        dprintf(fd, "processor\t: %d\n", i);
-        dprintf(fd, "cpu family\t: PA-RISC 1.1e\n");
-        dprintf(fd, "cpu\t\t: PA7300LC (PCX-L2)\n");
-        dprintf(fd, "capabilities\t: os32\n");
-        dprintf(fd, "model\t\t: 9000/778/B160L - "
-                    "Merlin L2 160 QEMU (9000/778/B160L)\n\n");
-    }
-    return 0;
-}
-#endif
-
-#if defined(TARGET_RISCV)
-static int open_cpuinfo(CPUArchState *cpu_env, int fd)
-{
-    int i;
-    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-    RISCVCPU *cpu = env_archcpu(cpu_env);
-    const RISCVCPUConfig *cfg = riscv_cpu_cfg((CPURISCVState *) cpu_env);
-    char *isa_string = riscv_isa_string(cpu);
-    const char *mmu;
-
-    if (cfg->mmu) {
-        mmu = (cpu_env->xl == MXL_RV32) ? "sv32"  : "sv48";
-    } else {
-        mmu = "none";
-    }
-
-    for (i = 0; i < num_cpus; i++) {
-        dprintf(fd, "processor\t: %d\n", i);
-        dprintf(fd, "hart\t\t: %d\n", i);
-        dprintf(fd, "isa\t\t: %s\n", isa_string);
-        dprintf(fd, "mmu\t\t: %s\n", mmu);
-        dprintf(fd, "uarch\t\t: qemu\n\n");
-    }
-
-    g_free(isa_string);
-    return 0;
-}
-#endif
-
-#if defined(TARGET_S390X)
-/*
- * Emulate what a Linux kernel running in qemu-system-s390x -M accel=tcg would
- * show in /proc/cpuinfo.
- *
- * Skip the following in order to match the missing support in op_ecag():
- * - show_cacheinfo().
- * - show_cpu_topology().
- * - show_cpu_mhz().
- *
- * Use fixed values for certain fields:
- * - bogomips per cpu - from a qemu-system-s390x run.
- * - max thread id = 0, since SMT / SIGP_SET_MULTI_THREADING is not supported.
- *
- * Keep the code structure close to arch/s390/kernel/processor.c.
- */
-
-static void show_facilities(int fd)
-{
-    size_t sizeof_stfl_bytes = 2048;
-    g_autofree uint8_t *stfl_bytes = g_new0(uint8_t, sizeof_stfl_bytes);
-    unsigned int bit;
-
-    dprintf(fd, "facilities      :");
-    s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes);
-    for (bit = 0; bit < sizeof_stfl_bytes * 8; bit++) {
-        if (test_be_bit(bit, stfl_bytes)) {
-            dprintf(fd, " %d", bit);
-        }
-    }
-    dprintf(fd, "\n");
-}
-
-static int cpu_ident(unsigned long n)
-{
-    return deposit32(0, CPU_ID_BITS - CPU_PHYS_ADDR_BITS, CPU_PHYS_ADDR_BITS,
-                     n);
-}
-
-static void show_cpu_summary(CPUArchState *cpu_env, int fd)
-{
-    S390CPUModel *model = env_archcpu(cpu_env)->model;
-    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-    uint32_t elf_hwcap = get_elf_hwcap();
-    const char *hwcap_str;
-    int i;
-
-    dprintf(fd, "vendor_id       : IBM/S390\n"
-                "# processors    : %i\n"
-                "bogomips per cpu: 13370.00\n",
-            num_cpus);
-    dprintf(fd, "max thread id   : 0\n");
-    dprintf(fd, "features\t: ");
-    for (i = 0; i < sizeof(elf_hwcap) * 8; i++) {
-        if (!(elf_hwcap & (1 << i))) {
-            continue;
-        }
-        hwcap_str = elf_hwcap_str(i);
-        if (hwcap_str) {
-            dprintf(fd, "%s ", hwcap_str);
-        }
-    }
-    dprintf(fd, "\n");
-    show_facilities(fd);
-    for (i = 0; i < num_cpus; i++) {
-        dprintf(fd, "processor %d: "
-               "version = %02X,  "
-               "identification = %06X,  "
-               "machine = %04X\n",
-               i, model->cpu_ver, cpu_ident(i), model->def->type);
-    }
-}
-
-static void show_cpu_ids(CPUArchState *cpu_env, int fd, unsigned long n)
-{
-    S390CPUModel *model = env_archcpu(cpu_env)->model;
-
-    dprintf(fd, "version         : %02X\n", model->cpu_ver);
-    dprintf(fd, "identification  : %06X\n", cpu_ident(n));
-    dprintf(fd, "machine         : %04X\n", model->def->type);
-}
-
-static void show_cpuinfo(CPUArchState *cpu_env, int fd, unsigned long n)
-{
-    dprintf(fd, "\ncpu number      : %ld\n", n);
-    show_cpu_ids(cpu_env, fd, n);
-}
-
-static int open_cpuinfo(CPUArchState *cpu_env, int fd)
-{
-    int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-    int i;
-
-    show_cpu_summary(cpu_env, fd);
-    for (i = 0; i < num_cpus; i++) {
-        show_cpuinfo(cpu_env, fd, i);
-    }
-    return 0;
-}
-#endif
-
-#if defined(TARGET_M68K)
-static int open_hardware(CPUArchState *cpu_env, int fd)
-{
-    dprintf(fd, "Model:\t\tqemu-m68k\n");
-    return 0;
-}
-#endif
-
-
 int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
                     int flags, mode_t mode, bool safe)
 {
@@ -8614,11 +8359,10 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
 #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
         { "/proc/net/route", open_net_route, is_proc },
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_HPPA) || \
-    defined(TARGET_RISCV) || defined(TARGET_S390X)
+#if defined(HAVE_ARCH_PROC_CPUINFO)
         { "/proc/cpuinfo", open_cpuinfo, is_proc },
 #endif
-#if defined(TARGET_M68K)
+#if defined(HAVE_ARCH_PROC_HARDWARE)
         { "/proc/hardware", open_hardware, is_proc },
 #endif
         { NULL, NULL, NULL }
@@ -9085,6 +8829,10 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
 #define RISCV_HWPROBE_KEY_IMA_EXT_0     4
 #define     RISCV_HWPROBE_IMA_FD       (1 << 0)
 #define     RISCV_HWPROBE_IMA_C        (1 << 1)
+#define     RISCV_HWPROBE_IMA_V        (1 << 2)
+#define     RISCV_HWPROBE_EXT_ZBA      (1 << 3)
+#define     RISCV_HWPROBE_EXT_ZBB      (1 << 4)
+#define     RISCV_HWPROBE_EXT_ZBS      (1 << 5)
 
 #define RISCV_HWPROBE_KEY_CPUPERF_0     5
 #define     RISCV_HWPROBE_MISALIGNED_UNKNOWN     (0 << 0)
@@ -9132,7 +8880,15 @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env,
                     riscv_has_ext(env, RVD) ?
                     RISCV_HWPROBE_IMA_FD : 0;
             value |= riscv_has_ext(env, RVC) ?
-                     RISCV_HWPROBE_IMA_C : pair->value;
+                     RISCV_HWPROBE_IMA_C : 0;
+            value |= riscv_has_ext(env, RVV) ?
+                     RISCV_HWPROBE_IMA_V : 0;
+            value |= cfg->ext_zba ?
+                     RISCV_HWPROBE_EXT_ZBA : 0;
+            value |= cfg->ext_zbb ?
+                     RISCV_HWPROBE_EXT_ZBB : 0;
+            value |= cfg->ext_zbs ?
+                     RISCV_HWPROBE_EXT_ZBS : 0;
             __put_user(value, &pair->value);
             break;
         case RISCV_HWPROBE_KEY_CPUPERF_0:
@@ -11165,11 +10921,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_shmat
     case TARGET_NR_shmat:
-        return do_shmat(cpu_env, arg1, arg2, arg3);
+        return target_shmat(cpu_env, arg1, arg2, arg3);
 #endif
 #ifdef TARGET_NR_shmdt
     case TARGET_NR_shmdt:
-        return do_shmdt(arg1);
+        return target_shmdt(arg1);
 #endif
     case TARGET_NR_fsync:
         return get_errno(fsync(arg1));
diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
index 0f4883eb57..b94bcdcf83 100644
--- a/linux-user/user-mmap.h
+++ b/linux-user/user-mmap.h
@@ -58,4 +58,8 @@ abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
 
+abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
+                       abi_ulong shmaddr, int shmflg);
+abi_long target_shmdt(abi_ulong shmaddr);
+
 #endif /* LINUX_USER_USER_MMAP_H */
diff --git a/linux-user/x86_64/target_proc.h b/linux-user/x86_64/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/x86_64/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/linux-user/xtensa/target_proc.h b/linux-user/xtensa/target_proc.h
new file mode 100644
index 0000000000..43fe29ca72
--- /dev/null
+++ b/linux-user/xtensa/target_proc.h
@@ -0,0 +1 @@
+/* No target-specific /proc support */
diff --git a/meson.build b/meson.build
index d1869ba381..7024b347bd 100644
--- a/meson.build
+++ b/meson.build
@@ -140,7 +140,6 @@ if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
 endif
 if cpu in ['x86', 'x86_64']
   accelerator_targets += {
-    'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'],
     'CONFIG_HVF': ['x86_64-softmmu'],
     'CONFIG_NVMM': ['i386-softmmu', 'x86_64-softmmu'],
     'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
@@ -224,8 +223,10 @@ qemu_ldflags = []
 if targetos == 'darwin'
   # Disable attempts to use ObjectiveC features in os/object.h since they
   # won't work when we're compiling with gcc as a C compiler.
-  qemu_common_flags += '-DOS_OBJECT_USE_OBJC=0'
-elif targetos == 'solaris'
+  if compiler.get_id() == 'gcc'
+    qemu_common_flags += '-DOS_OBJECT_USE_OBJC=0'
+  endif
+elif targetos == 'sunos'
   # needed for CMSG_ macros in sys/socket.h
   qemu_common_flags += '-D_XOPEN_SOURCE=600'
   # needed for TIOCWIN* defines in termios.h
@@ -663,11 +664,6 @@ if get_option('hvf').allowed()
     accelerators += 'CONFIG_HVF'
   endif
 endif
-if get_option('hax').allowed()
-  if get_option('hax').enabled() or targetos in ['windows', 'darwin', 'netbsd']
-    accelerators += 'CONFIG_HAX'
-  endif
-endif
 if targetos == 'netbsd'
   nvmm = cc.find_library('nvmm', required: get_option('nvmm'))
   if nvmm.found()
@@ -678,9 +674,7 @@ endif
 tcg_arch = host_arch
 if get_option('tcg').allowed()
   if host_arch == 'unknown'
-    if get_option('tcg_interpreter')
-      warning('Unsupported CPU @0@, will use TCG with TCI (slow)'.format(cpu))
-    else
+    if not get_option('tcg_interpreter')
       error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu))
     endif
   elif get_option('tcg_interpreter')
@@ -693,7 +687,6 @@ if get_option('tcg').allowed()
   endif
   if get_option('tcg_interpreter')
     tcg_arch = 'tci'
-    config_host += { 'CONFIG_TCG_INTERPRETER': 'y' }
   elif host_arch == 'x86_64'
     tcg_arch = 'i386'
   elif host_arch == 'ppc64'
@@ -703,7 +696,6 @@ if get_option('tcg').allowed()
                         language: all_languages)
 
   accelerators += 'CONFIG_TCG'
-  config_host += { 'CONFIG_TCG': 'y' }
 endif
 
 if 'CONFIG_KVM' not in accelerators and get_option('kvm').enabled()
@@ -732,7 +724,7 @@ glib_cflags = []
 if enable_modules
   gmodule = dependency('gmodule-export-2.0', version: glib_req_ver, required: true,
                        method: 'pkg-config')
-elif config_host.has_key('CONFIG_PLUGIN')
+elif get_option('plugins')
   gmodule = dependency('gmodule-no-export-2.0', version: glib_req_ver, required: true,
                        method: 'pkg-config')
 else
@@ -1881,6 +1873,13 @@ if libbpf.found() and not cc.links('''
   endif
 endif
 
+# libxdp
+libxdp = not_found
+if not get_option('af_xdp').auto() or have_system
+    libxdp = dependency('libxdp', required: get_option('af_xdp'),
+                        version: '>=1.4.0', method: 'pkg-config')
+endif
+
 # libdw
 libdw = not_found
 if not get_option('libdw').auto() or \
@@ -2056,7 +2055,7 @@ have_slirp_smbd = get_option('slirp_smbd') \
 if have_slirp_smbd
   smbd_path = get_option('smbd')
   if smbd_path == ''
-    smbd_path = (targetos == 'solaris' ? '/usr/sfw/sbin/smbd' : '/usr/sbin/smbd')
+    smbd_path = (targetos == 'sunos' ? '/usr/sfw/sbin/smbd' : '/usr/sbin/smbd')
   endif
   config_host_data.set_quoted('CONFIG_SMBD_COMMAND', smbd_path)
 endif
@@ -2071,10 +2070,15 @@ config_host_data.set('CONFIG_MODULE_UPGRADES', get_option('module_upgrades'))
 config_host_data.set('CONFIG_ATTR', libattr.found())
 config_host_data.set('CONFIG_BDRV_WHITELIST_TOOLS', get_option('block_drv_whitelist_in_tools'))
 config_host_data.set('CONFIG_BRLAPI', brlapi.found())
+config_host_data.set('CONFIG_BSD', targetos in bsd_oses)
 config_host_data.set('CONFIG_COCOA', cocoa.found())
+config_host_data.set('CONFIG_DARWIN', targetos == 'darwin')
 config_host_data.set('CONFIG_FUZZ', get_option('fuzzing'))
 config_host_data.set('CONFIG_GCOV', get_option('b_coverage'))
 config_host_data.set('CONFIG_LIBUDEV', libudev.found())
+config_host_data.set('CONFIG_LINUX', targetos == 'linux')
+config_host_data.set('CONFIG_POSIX', targetos != 'windows')
+config_host_data.set('CONFIG_WIN32', targetos == 'windows')
 config_host_data.set('CONFIG_LZO', lzo.found())
 config_host_data.set('CONFIG_MPATH', mpathpersist.found())
 config_host_data.set('CONFIG_BLKIO', blkio.found())
@@ -2102,6 +2106,7 @@ config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_pars
 config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
 config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
 config_host_data.set('CONFIG_EBPF', libbpf.found())
+config_host_data.set('CONFIG_AF_XDP', libxdp.found())
 config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
 config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
 config_host_data.set('CONFIG_LIBNFS', libnfs.found())
@@ -2117,6 +2122,7 @@ if numa.found()
                                        dependencies: numa))
 endif
 config_host_data.set('CONFIG_OPENGL', opengl.found())
+config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_RDMA', rdma.found())
 config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
@@ -2127,6 +2133,11 @@ if seccomp.found()
   config_host_data.set('CONFIG_SECCOMP_SYSRAWRC', seccomp_has_sysrawrc)
 endif
 config_host_data.set('CONFIG_SNAPPY', snappy.found())
+config_host_data.set('CONFIG_SOLARIS', targetos == 'sunos')
+if get_option('tcg').allowed()
+  config_host_data.set('CONFIG_TCG', 1)
+  config_host_data.set('CONFIG_TCG_INTERPRETER', tcg_arch == 'tci')
+endif
 config_host_data.set('CONFIG_TPM', have_tpm)
 config_host_data.set('CONFIG_TSAN', get_option('tsan'))
 config_host_data.set('CONFIG_USB_LIBUSB', libusb.found())
@@ -2201,6 +2212,7 @@ config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool)
 config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
 config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
 config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
+config_host_data.set('CONFIG_DEBUG_TCG', get_option('debug_tcg'))
 config_host_data.set('CONFIG_GPROF', get_option('gprof'))
 config_host_data.set('CONFIG_LIVE_BLOCK_MIGRATION', get_option('live_block_migration').allowed())
 config_host_data.set('CONFIG_QOM_CAST_DEBUG', get_option('qom_cast_debug'))
@@ -2754,12 +2766,6 @@ if targetos == 'windows' and 'cpp' in all_languages
 endif
 config_host_data.set('HAVE_VSS_SDK', have_vss_sdk)
 
-foreach k, v: config_host
-  if k.startswith('CONFIG_')
-    config_host_data.set(k, v == 'y' ? 1 : v)
-  endif
-endforeach
-
 # Older versions of MinGW do not import _lock_file and _unlock_file properly.
 # This was fixed for v6.0.0 with commit b48e3ac8969d.
 if targetos == 'windows'
@@ -2798,6 +2804,18 @@ endif
 ########################
 
 minikconf = find_program('scripts/minikconf.py')
+config_targetos = {
+  (targetos == 'windows' ? 'CONFIG_WIN32' : 'CONFIG_POSIX'): 'y'
+}
+if targetos == 'darwin'
+  config_targetos += {'CONFIG_DARWIN': 'y'}
+elif targetos == 'linux'
+  config_targetos += {'CONFIG_LINUX': 'y'}
+endif
+if targetos in bsd_oses
+  config_targetos += {'CONFIG_BSD': 'y'}
+endif
+
 config_all = {}
 config_all_devices = {}
 config_all_disas = {}
@@ -2841,7 +2859,7 @@ host_kconfig = \
   (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \
   (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
   (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
-  ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
+  (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
   (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
   (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
   (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : [])
@@ -2862,7 +2880,7 @@ foreach target : target_dirs
     endif
     config_target += { 'CONFIG_LINUX_USER': 'y' }
   elif target.endswith('bsd-user')
-    if 'CONFIG_BSD' not in config_host
+    if targetos not in bsd_oses
       if default_targets
         continue
       endif
@@ -2993,7 +3011,7 @@ target_dirs = actual_target_dirs
 # pseudo symbol replaces it.
 
 config_all += config_all_devices
-config_all += config_host
+config_all += config_targetos
 config_all += config_all_disas
 config_all += {
   'CONFIG_XEN': xen.found(),
@@ -3281,6 +3299,7 @@ if have_system
     'hw/ssi',
     'hw/timer',
     'hw/tpm',
+    'hw/ufs',
     'hw/usb',
     'hw/vfio',
     'hw/virtio',
@@ -3344,7 +3363,7 @@ if enable_modules
   modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
 endif
 
-qom_ss = qom_ss.apply(config_host, strict: false)
+qom_ss = qom_ss.apply(config_targetos, strict: false)
 libqom = static_library('qom', qom_ss.sources() + genh,
                         dependencies: [qom_ss.dependencies()],
                         name_suffix: 'fa',
@@ -3522,7 +3541,7 @@ foreach d, list : target_modules
       foreach target : target_dirs
         if target.endswith('-softmmu')
           config_target = config_target_mak[target]
-          config_target += config_host
+          config_target += config_targetos
           target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])]
           c_args = ['-DNEED_CPU_H',
                     '-DCONFIG_TARGET="@0@-config-target.h"'.format(target),
@@ -3583,7 +3602,7 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
                              capture: true,
                              command: [undefsym, nm, '@INPUT@'])
 
-authz_ss = authz_ss.apply(config_host, strict: false)
+authz_ss = authz_ss.apply(config_targetos, strict: false)
 libauthz = static_library('authz', authz_ss.sources() + genh,
                           dependencies: [authz_ss.dependencies()],
                           name_suffix: 'fa',
@@ -3593,7 +3612,7 @@ libauthz = static_library('authz', authz_ss.sources() + genh,
 authz = declare_dependency(link_whole: libauthz,
                            dependencies: qom)
 
-crypto_ss = crypto_ss.apply(config_host, strict: false)
+crypto_ss = crypto_ss.apply(config_targetos, strict: false)
 libcrypto = static_library('crypto', crypto_ss.sources() + genh,
                            dependencies: [crypto_ss.dependencies()],
                            name_suffix: 'fa',
@@ -3603,7 +3622,7 @@ libcrypto = static_library('crypto', crypto_ss.sources() + genh,
 crypto = declare_dependency(link_whole: libcrypto,
                             dependencies: [authz, qom])
 
-io_ss = io_ss.apply(config_host, strict: false)
+io_ss = io_ss.apply(config_targetos, strict: false)
 libio = static_library('io', io_ss.sources() + genh,
                        dependencies: [io_ss.dependencies()],
                        link_with: libqemuutil,
@@ -3621,7 +3640,7 @@ migration = declare_dependency(link_with: libmigration,
                                dependencies: [zlib, qom, io])
 system_ss.add(migration)
 
-block_ss = block_ss.apply(config_host, strict: false)
+block_ss = block_ss.apply(config_targetos, strict: false)
 libblock = static_library('block', block_ss.sources() + genh,
                           dependencies: block_ss.dependencies(),
                           link_depends: block_syms,
@@ -3633,7 +3652,7 @@ block = declare_dependency(link_whole: [libblock],
                            link_args: '@block.syms',
                            dependencies: [crypto, io])
 
-blockdev_ss = blockdev_ss.apply(config_host, strict: false)
+blockdev_ss = blockdev_ss.apply(config_targetos, strict: false)
 libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
                              dependencies: blockdev_ss.dependencies(),
                              name_suffix: 'fa',
@@ -3643,7 +3662,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
 blockdev = declare_dependency(link_whole: [libblockdev],
                               dependencies: [block, event_loop_base])
 
-qmp_ss = qmp_ss.apply(config_host, strict: false)
+qmp_ss = qmp_ss.apply(config_targetos, strict: false)
 libqmp = static_library('qmp', qmp_ss.sources() + genh,
                         dependencies: qmp_ss.dependencies(),
                         name_suffix: 'fa',
@@ -3660,7 +3679,7 @@ libchardev = static_library('chardev', chardev_ss.sources() + genh,
 
 chardev = declare_dependency(link_whole: libchardev)
 
-hwcore_ss = hwcore_ss.apply(config_host, strict: false)
+hwcore_ss = hwcore_ss.apply(config_targetos, strict: false)
 libhwcore = static_library('hwcore', sources: hwcore_ss.sources() + genh,
                            name_suffix: 'fa',
                            build_by_default: false,
@@ -3719,7 +3738,7 @@ foreach target : target_dirs
             '-DCONFIG_DEVICES="@0@-config-devices.h"'.format(target)]
   link_args = emulator_link_args
 
-  config_target += config_host
+  config_target += config_targetos
   target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])]
   if targetos == 'linux'
     target_inc += include_directories('linux-headers', is_system: true)
@@ -3921,7 +3940,7 @@ endforeach
 
 # Other build targets
 
-if 'CONFIG_PLUGIN' in config_host
+if get_option('plugins')
   install_headers('include/qemu/qemu-plugin.h')
 endif
 
@@ -4177,7 +4196,6 @@ endif
 summary_info = {}
 if have_system
   summary_info += {'KVM support':       config_all.has_key('CONFIG_KVM')}
-  summary_info += {'HAX support':       config_all.has_key('CONFIG_HAX')}
   summary_info += {'HVF support':       config_all.has_key('CONFIG_HVF')}
   summary_info += {'WHPX support':      config_all.has_key('CONFIG_WHPX')}
   summary_info += {'NVMM support':      config_all.has_key('CONFIG_NVMM')}
@@ -4194,8 +4212,8 @@ if config_all.has_key('CONFIG_TCG')
   else
     summary_info += {'TCG backend':   'native (@0@)'.format(cpu)}
   endif
-  summary_info += {'TCG plugins': config_host.has_key('CONFIG_PLUGIN')}
-  summary_info += {'TCG debug enabled': config_host.has_key('CONFIG_DEBUG_TCG')}
+  summary_info += {'TCG plugins':       get_option('plugins')}
+  summary_info += {'TCG debug enabled': get_option('debug_tcg')}
 endif
 summary_info += {'target list':       ' '.join(target_dirs)}
 if have_system
@@ -4297,6 +4315,7 @@ summary_info = {}
 if targetos == 'darwin'
   summary_info += {'vmnet.framework support': vmnet}
 endif
+summary_info += {'AF_XDP support':    libxdp}
 summary_info += {'slirp support':     slirp}
 summary_info += {'vde support':       vde}
 summary_info += {'netmap support':    have_netmap}
@@ -4354,28 +4373,37 @@ summary_info += {'selinux':           selinux}
 summary_info += {'libdw':             libdw}
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
-if not supported_cpus.contains(cpu)
+if host_arch == 'unknown'
   message()
-  warning('SUPPORT FOR THIS HOST CPU WILL GO AWAY IN FUTURE RELEASES!')
+  warning('UNSUPPORTED HOST CPU')
   message()
-  message('CPU host architecture ' + cpu + ' support is not currently maintained.')
-  message('The QEMU project intends to remove support for this host CPU in')
-  message('a future release if nobody volunteers to maintain it and to')
-  message('provide a build host for our continuous integration setup.')
-  message('configure has succeeded and you can continue to build, but')
-  message('if you care about QEMU on this platform you should contact')
-  message('us upstream at qemu-devel@nongnu.org.')
+  message('Support for CPU host architecture ' + cpu + ' is not currently')
+  message('maintained. The QEMU project does not guarantee that QEMU will')
+  message('compile or work on this host CPU. You can help by volunteering')
+  message('to maintain it and providing a build host for our continuous')
+  message('integration setup.')
+  if get_option('tcg').allowed() and target_dirs.length() > 0
+    message()
+    message('configure has succeeded and you can continue to build, but')
+    message('QEMU will use a slow interpreter to emulate the target CPU.')
+  endif
 endif
 
 if not supported_oses.contains(targetos)
   message()
-  warning('WARNING: SUPPORT FOR THIS HOST OS WILL GO AWAY IN FUTURE RELEASES!')
+  warning('UNSUPPORTED HOST OS')
   message()
-  message('Host OS ' + targetos + 'support is not currently maintained.')
-  message('The QEMU project intends to remove support for this host OS in')
-  message('a future release if nobody volunteers to maintain it and to')
-  message('provide a build host for our continuous integration setup.')
+  message('Support for host OS ' + targetos + 'is not currently maintained.')
   message('configure has succeeded and you can continue to build, but')
-  message('if you care about QEMU on this platform you should contact')
-  message('us upstream at qemu-devel@nongnu.org.')
+  message('the QEMU project does not guarantee that QEMU will compile or')
+  message('work on this operating system. You can help by volunteering')
+  message('to maintain it and providing a build host for our continuous')
+  message('integration setup. This will ensure that future versions of QEMU')
+  message('will keep working on ' + targetos + '.')
+endif
+
+if host_arch == 'unknown' or not supported_oses.contains(targetos)
+  message()
+  message('If you want to help supporting QEMU on this platform, please')
+  message('contact the developers at qemu-devel@nongnu.org.')
 endif
diff --git a/meson_options.txt b/meson_options.txt
index aaea5ddd77..2ca40f22e9 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -69,8 +69,6 @@ option('malloc', type : 'combo', choices : ['system', 'tcmalloc', 'jemalloc'],
 
 option('kvm', type: 'feature', value: 'auto',
        description: 'KVM acceleration support')
-option('hax', type: 'feature', value: 'auto',
-       description: 'HAX acceleration support')
 option('whpx', type: 'feature', value: 'auto',
        description: 'WHPX acceleration support')
 option('hvf', type: 'feature', value: 'auto',
@@ -83,6 +81,10 @@ option('xen_pci_passthrough', type: 'feature', value: 'auto',
        description: 'Xen PCI passthrough support')
 option('tcg', type: 'feature', value: 'enabled',
        description: 'TCG support')
+option('plugins', type: 'boolean', value: false,
+       description: 'TCG plugins via shared library loading')
+option('debug_tcg', type: 'boolean', value: false,
+       description: 'TCG debugging')
 option('tcg_interpreter', type: 'boolean', value: false,
        description: 'TCG with bytecode interpreter (slow)')
 option('safe_stack', type: 'boolean', value: false,
@@ -120,6 +122,8 @@ option('avx512bw', type: 'feature', value: 'auto',
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')
 
+option('af_xdp', type : 'feature', value : 'auto',
+       description: 'AF_XDP network backend support')
 option('attr', type : 'feature', value : 'auto',
        description: 'attr/xattr support')
 option('auth_pam', type : 'feature', value : 'auto',
diff --git a/migration/channel-block.c b/migration/channel-block.c
index b7374363c3..fff8d87094 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -158,8 +158,9 @@ qio_channel_block_close(QIOChannel *ioc,
 
 static void
 qio_channel_block_set_aio_fd_handler(QIOChannel *ioc,
-                                     AioContext *ctx,
+                                     AioContext *read_ctx,
                                      IOHandler *io_read,
+                                     AioContext *write_ctx,
                                      IOHandler *io_write,
                                      void *opaque)
 {
diff --git a/migration/migration.c b/migration/migration.c
index 5528acb65e..d61e572742 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1039,7 +1039,7 @@ static void fill_source_migration_info(MigrationInfo *info)
         populate_time_info(info, s);
         populate_ram_info(info, s);
         populate_disk_info(info);
-        populate_vfio_info(info);
+        migration_populate_vfio_info(info);
         break;
     case MIGRATION_STATUS_COLO:
         info->has_status = true;
@@ -1048,7 +1048,7 @@ static void fill_source_migration_info(MigrationInfo *info)
     case MIGRATION_STATUS_COMPLETED:
         populate_time_info(info, s);
         populate_ram_info(info, s);
-        populate_vfio_info(info);
+        migration_populate_vfio_info(info);
         break;
     case MIGRATION_STATUS_FAILED:
         info->has_status = true;
@@ -1392,8 +1392,15 @@ bool migration_is_active(MigrationState *s)
             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
 }
 
-void migrate_init(MigrationState *s)
+int migrate_init(MigrationState *s, Error **errp)
 {
+    int ret;
+
+    ret = qemu_savevm_state_prepare(errp);
+    if (ret) {
+        return ret;
+    }
+
     /*
      * Reinitialise all migration state, except
      * parameters/capabilities that the user set, and
@@ -1425,6 +1432,15 @@ void migrate_init(MigrationState *s)
     s->iteration_initial_bytes = 0;
     s->threshold_size = 0;
     s->switchover_acked = false;
+    /*
+     * set mig_stats compression_counters memory to zero for a
+     * new migration
+     */
+    memset(&mig_stats, 0, sizeof(mig_stats));
+    memset(&compression_counters, 0, sizeof(compression_counters));
+    migration_reset_vfio_bytes_transferred();
+
+    return 0;
 }
 
 int migrate_add_blocker_internal(Error *reason, Error **errp)
@@ -1634,14 +1650,9 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
         migrate_set_block_incremental(true);
     }
 
-    migrate_init(s);
-    /*
-     * set mig_stats compression_counters memory to zero for a
-     * new migration
-     */
-    memset(&mig_stats, 0, sizeof(mig_stats));
-    memset(&compression_counters, 0, sizeof(compression_counters));
-    reset_vfio_bytes_transferred();
+    if (migrate_init(s, errp)) {
+        return false;
+    }
 
     return true;
 }
diff --git a/migration/migration.h b/migration/migration.h
index 6eea18db36..c390500604 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -472,7 +472,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in);
 bool migration_is_setup_or_active(int state);
 bool migration_is_running(int state);
 
-void migrate_init(MigrationState *s);
+int migrate_init(MigrationState *s, Error **errp);
 bool migration_is_blocked(Error **errp);
 /* True if outgoing migration has entered postcopy phase */
 bool migration_in_postcopy(void);
@@ -512,8 +512,8 @@ void migration_consume_urgent_request(void);
 bool migration_rate_limit(void);
 void migration_cancel(const Error *error);
 
-void populate_vfio_info(MigrationInfo *info);
-void reset_vfio_bytes_transferred(void);
+void migration_populate_vfio_info(MigrationInfo *info);
+void migration_reset_vfio_bytes_transferred(void);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 
 #endif
diff --git a/migration/rdma.c b/migration/rdma.c
index ca430d319d..a2a3db35b1 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3103,22 +3103,23 @@ static GSource *qio_channel_rdma_create_watch(QIOChannel *ioc,
 }
 
 static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
-                                                  AioContext *ctx,
-                                                  IOHandler *io_read,
-                                                  IOHandler *io_write,
-                                                  void *opaque)
+                                                AioContext *read_ctx,
+                                                IOHandler *io_read,
+                                                AioContext *write_ctx,
+                                                IOHandler *io_write,
+                                                void *opaque)
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     if (io_read) {
-        aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, io_read,
-                           io_write, NULL, NULL, opaque);
-        aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, io_read,
-                           io_write, NULL, NULL, opaque);
+        aio_set_fd_handler(read_ctx, rioc->rdmain->recv_comp_channel->fd,
+                           io_read, io_write, NULL, NULL, opaque);
+        aio_set_fd_handler(read_ctx, rioc->rdmain->send_comp_channel->fd,
+                           io_read, io_write, NULL, NULL, opaque);
     } else {
-        aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, io_read,
-                           io_write, NULL, NULL, opaque);
-        aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, io_read,
-                           io_write, NULL, NULL, opaque);
+        aio_set_fd_handler(write_ctx, rioc->rdmaout->recv_comp_channel->fd,
+                           io_read, io_write, NULL, NULL, opaque);
+        aio_set_fd_handler(write_ctx, rioc->rdmaout->send_comp_channel->fd,
+                           io_read, io_write, NULL, NULL, opaque);
     }
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index cb3dc5c79e..471f854775 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1210,6 +1210,30 @@ bool qemu_savevm_state_guest_unplug_pending(void)
     return false;
 }
 
+int qemu_savevm_state_prepare(Error **errp)
+{
+    SaveStateEntry *se;
+    int ret;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->ops || !se->ops->save_prepare) {
+            continue;
+        }
+        if (se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+
+        ret = se->ops->save_prepare(se->opaque, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 void qemu_savevm_state_setup(QEMUFile *f)
 {
     MigrationState *ms = migrate_get_current();
@@ -1596,10 +1620,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
         return -EINVAL;
     }
 
-    migrate_init(ms);
-    memset(&mig_stats, 0, sizeof(mig_stats));
-    memset(&compression_counters, 0, sizeof(compression_counters));
-    reset_vfio_bytes_transferred();
+    ret = migrate_init(ms, errp);
+    if (ret) {
+        return ret;
+    }
     ms->to_dst_file = f;
 
     qemu_mutex_unlock_iothread();
diff --git a/migration/savevm.h b/migration/savevm.h
index 31559e2198..aa15828e83 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -75,6 +75,7 @@ typedef struct SaveState {
 
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
+int qemu_savevm_state_prepare(Error **errp);
 void qemu_savevm_state_setup(QEMUFile *f);
 bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
diff --git a/migration/target.c b/migration/target.c
index f39c9a8d88..a6ffa9a5ce 100644
--- a/migration/target.c
+++ b/migration/target.c
@@ -15,7 +15,7 @@
 #endif
 
 #ifdef CONFIG_VFIO
-void populate_vfio_info(MigrationInfo *info)
+void migration_populate_vfio_info(MigrationInfo *info)
 {
     if (vfio_mig_active()) {
         info->vfio = g_malloc0(sizeof(*info->vfio));
@@ -23,16 +23,16 @@ void populate_vfio_info(MigrationInfo *info)
     }
 }
 
-void reset_vfio_bytes_transferred(void)
+void migration_reset_vfio_bytes_transferred(void)
 {
     vfio_reset_bytes_transferred();
 }
 #else
-void populate_vfio_info(MigrationInfo *info)
+void migration_populate_vfio_info(MigrationInfo *info)
 {
 }
 
-void reset_vfio_bytes_transferred(void)
+void migration_reset_vfio_bytes_transferred(void)
 {
 }
 #endif
diff --git a/monitor/monitor.c b/monitor/monitor.c
index dc352f9e9d..941f87815a 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -144,7 +144,7 @@ static gboolean monitor_unblocked(void *do_not_use, GIOCondition cond,
     QEMU_LOCK_GUARD(&mon->mon_lock);
     mon->out_watch = 0;
     monitor_flush_locked(mon);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 /* Caller must hold mon->mon_lock */
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index 3d14296c04..53a6549914 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -146,8 +146,7 @@ static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr,
         return 0;
     }
 
-    ret = nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc), tlscreds,
-                                tlshostname,
+    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, tlshostname,
                                 outioc, info, errp);
     if (ret < 0) {
         /*
@@ -197,7 +196,7 @@ static void *connect_thread_func(void *opaque)
          * conn->updated_info will finally be returned to the user. Clear the
          * pointers to our internally allocated strings, which are IN parameters
          * of nbd_receive_negotiate() and therefore nbd_connect(). Caller
-         * shoudn't be interested in these fields.
+         * shouldn't be interested in these fields.
          */
         conn->updated_info.x_dirty_bitmap = NULL;
         conn->updated_info.name = NULL;
diff --git a/nbd/client.c b/nbd/client.c
index 479208d5d9..bd7e200136 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -877,8 +877,7 @@ static int nbd_list_meta_contexts(QIOChannel *ioc,
  * Returns: negative errno: failure talking to server
  *          non-negative: enum NBDMode describing server abilities
  */
-static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                               QCryptoTLSCreds *tlscreds,
+static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                                const char *hostname, QIOChannel **outioc,
                                bool structured_reply, bool *zeroes,
                                Error **errp)
@@ -946,10 +945,6 @@ static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
                     return -EINVAL;
                 }
                 ioc = *outioc;
-                if (aio_context) {
-                    qio_channel_set_blocking(ioc, false, NULL);
-                    qio_channel_attach_aio_context(ioc, aio_context);
-                }
             } else {
                 error_setg(errp, "Server does not support STARTTLS");
                 return -EINVAL;
@@ -1014,8 +1009,7 @@ static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
  * Returns: negative errno: failure talking to server
  *          0: server is connected
  */
-int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                          QCryptoTLSCreds *tlscreds,
+int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                           const char *hostname, QIOChannel **outioc,
                           NBDExportInfo *info, Error **errp)
 {
@@ -1027,7 +1021,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
     assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
     trace_nbd_receive_negotiate_name(info->name);
 
-    result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
+    result = nbd_start_negotiate(ioc, tlscreds, hostname, outioc,
                                  info->structured_reply, &zeroes, errp);
     if (result < 0) {
         return result;
@@ -1150,7 +1144,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
     QIOChannel *sioc = NULL;
 
     *info = NULL;
-    result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
+    result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc, true,
                                  NULL, errp);
     if (tlscreds && sioc) {
         ioc = sioc;
diff --git a/nbd/server.c b/nbd/server.c
index 8486b64b15..b5f93a20c9 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1333,6 +1333,7 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
      */
 
     qio_channel_set_blocking(client->ioc, false, NULL);
+    qio_channel_set_follow_coroutine_ctx(client->ioc, true);
 
     trace_nbd_negotiate_begin();
     memcpy(buf, "NBDMAGIC", 8);
@@ -1352,11 +1353,6 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
         return ret;
     }
 
-    /* Attach the channel to the same AioContext as the export */
-    if (client->exp && client->exp->common.ctx) {
-        qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
-    }
-
     assert(!client->optlen);
     trace_nbd_negotiate_success();
 
@@ -1465,7 +1461,6 @@ void nbd_client_put(NBDClient *client)
          */
         assert(client->closing);
 
-        qio_channel_detach_aio_context(client->ioc);
         object_unref(OBJECT(client->sioc));
         object_unref(OBJECT(client->ioc));
         if (client->tlscreds) {
@@ -1544,8 +1539,6 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
     exp->common.ctx = ctx;
 
     QTAILQ_FOREACH(client, &exp->clients, next) {
-        qio_channel_attach_aio_context(client->ioc, ctx);
-
         assert(client->nb_requests == 0);
         assert(client->recv_coroutine == NULL);
         assert(client->send_coroutine == NULL);
@@ -1555,14 +1548,9 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
 static void blk_aio_detach(void *opaque)
 {
     NBDExport *exp = opaque;
-    NBDClient *client;
 
     trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
 
-    QTAILQ_FOREACH(client, &exp->clients, next) {
-        qio_channel_detach_aio_context(client->ioc);
-    }
-
     exp->common.ctx = NULL;
 }
 
diff --git a/net/af-xdp.c b/net/af-xdp.c
new file mode 100644
index 0000000000..6c65028fb0
--- /dev/null
+++ b/net/af-xdp.c
@@ -0,0 +1,526 @@
+/*
+ * AF_XDP network backend.
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * Authors:
+ *  Ilya Maximets <i.maximets@ovn.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+
+#include "qemu/osdep.h"
+#include <bpf/bpf.h>
+#include <inttypes.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+#include <net/if.h>
+#include <xdp/xsk.h>
+
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "net/net.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+#include "qemu/memalign.h"
+
+
+typedef struct AFXDPState {
+    NetClientState       nc;
+
+    struct xsk_socket    *xsk;
+    struct xsk_ring_cons rx;
+    struct xsk_ring_prod tx;
+    struct xsk_ring_cons cq;
+    struct xsk_ring_prod fq;
+
+    char                 ifname[IFNAMSIZ];
+    int                  ifindex;
+    bool                 read_poll;
+    bool                 write_poll;
+    uint32_t             outstanding_tx;
+
+    uint64_t             *pool;
+    uint32_t             n_pool;
+    char                 *buffer;
+    struct xsk_umem      *umem;
+
+    uint32_t             n_queues;
+    uint32_t             xdp_flags;
+    bool                 inhibit;
+} AFXDPState;
+
+#define AF_XDP_BATCH_SIZE 64
+
+static void af_xdp_send(void *opaque);
+static void af_xdp_writable(void *opaque);
+
+/* Set the event-loop handlers for the af-xdp backend. */
+static void af_xdp_update_fd_handler(AFXDPState *s)
+{
+    qemu_set_fd_handler(xsk_socket__fd(s->xsk),
+                        s->read_poll ? af_xdp_send : NULL,
+                        s->write_poll ? af_xdp_writable : NULL,
+                        s);
+}
+
+/* Update the read handler. */
+static void af_xdp_read_poll(AFXDPState *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+/* Update the write handler. */
+static void af_xdp_write_poll(AFXDPState *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+static void af_xdp_poll(NetClientState *nc, bool enable)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    if (s->read_poll != enable || s->write_poll != enable) {
+        s->write_poll = enable;
+        s->read_poll  = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+static void af_xdp_complete_tx(AFXDPState *s)
+{
+    uint32_t idx = 0;
+    uint32_t done, i;
+    uint64_t *addr;
+
+    done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
+
+    for (i = 0; i < done; i++) {
+        addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++);
+        s->pool[s->n_pool++] = *addr;
+        s->outstanding_tx--;
+    }
+
+    if (done) {
+        xsk_ring_cons__release(&s->cq, done);
+    }
+}
+
+/*
+ * The fd_write() callback, invoked if the fd is marked as writable
+ * after a poll.
+ */
+static void af_xdp_writable(void *opaque)
+{
+    AFXDPState *s = opaque;
+
+    /* Try to recover buffers that are already sent. */
+    af_xdp_complete_tx(s);
+
+    /*
+     * Unregister the handler, unless we still have packets to transmit
+     * and kernel needs a wake up.
+     */
+    if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) {
+        af_xdp_write_poll(s, false);
+    }
+
+    /* Flush any buffered packets. */
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static ssize_t af_xdp_receive(NetClientState *nc,
+                              const uint8_t *buf, size_t size)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+    struct xdp_desc *desc;
+    uint32_t idx;
+    void *data;
+
+    /* Try to recover buffers that are already sent. */
+    af_xdp_complete_tx(s);
+
+    if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) {
+        /* We can't transmit packet this size... */
+        return size;
+    }
+
+    if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) {
+        /*
+         * Out of buffers or space in tx ring.  Poll until we can write.
+         * This will also kick the Tx, if it was waiting on CQ.
+         */
+        af_xdp_write_poll(s, true);
+        return 0;
+    }
+
+    desc = xsk_ring_prod__tx_desc(&s->tx, idx);
+    desc->addr = s->pool[--s->n_pool];
+    desc->len = size;
+
+    data = xsk_umem__get_data(s->buffer, desc->addr);
+    memcpy(data, buf, size);
+
+    xsk_ring_prod__submit(&s->tx, 1);
+    s->outstanding_tx++;
+
+    if (xsk_ring_prod__needs_wakeup(&s->tx)) {
+        af_xdp_write_poll(s, true);
+    }
+
+    return size;
+}
+
+/*
+ * Complete a previous send (backend --> guest) and enable the
+ * fd_read callback.
+ */
+static void af_xdp_send_completed(NetClientState *nc, ssize_t len)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    af_xdp_read_poll(s, true);
+}
+
+static void af_xdp_fq_refill(AFXDPState *s, uint32_t n)
+{
+    uint32_t i, idx = 0;
+
+    /* Leave one packet for Tx, just in case. */
+    if (s->n_pool < n + 1) {
+        n = s->n_pool;
+    }
+
+    if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) {
+        return;
+    }
+
+    for (i = 0; i < n; i++) {
+        *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool];
+    }
+    xsk_ring_prod__submit(&s->fq, n);
+
+    if (xsk_ring_prod__needs_wakeup(&s->fq)) {
+        /* Receive was blocked by not having enough buffers.  Wake it up. */
+        af_xdp_read_poll(s, true);
+    }
+}
+
+static void af_xdp_send(void *opaque)
+{
+    uint32_t i, n_rx, idx = 0;
+    AFXDPState *s = opaque;
+
+    n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx);
+    if (!n_rx) {
+        return;
+    }
+
+    for (i = 0; i < n_rx; i++) {
+        const struct xdp_desc *desc;
+        struct iovec iov;
+
+        desc = xsk_ring_cons__rx_desc(&s->rx, idx++);
+
+        iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr);
+        iov.iov_len = desc->len;
+
+        s->pool[s->n_pool++] = desc->addr;
+
+        if (!qemu_sendv_packet_async(&s->nc, &iov, 1,
+                                     af_xdp_send_completed)) {
+            /*
+             * The peer does not receive anymore.  Packet is queued, stop
+             * reading from the backend until af_xdp_send_completed().
+             */
+            af_xdp_read_poll(s, false);
+
+            /* Return unused descriptors to not break the ring cache. */
+            xsk_ring_cons__cancel(&s->rx, n_rx - i - 1);
+            n_rx = i + 1;
+            break;
+        }
+    }
+
+    /* Release actually sent descriptors and try to re-fill. */
+    xsk_ring_cons__release(&s->rx, n_rx);
+    af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE);
+}
+
+/* Flush and close. */
+static void af_xdp_cleanup(NetClientState *nc)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    qemu_purge_queued_packets(nc);
+
+    af_xdp_poll(nc, false);
+
+    xsk_socket__delete(s->xsk);
+    s->xsk = NULL;
+    g_free(s->pool);
+    s->pool = NULL;
+    xsk_umem__delete(s->umem);
+    s->umem = NULL;
+    qemu_vfree(s->buffer);
+    s->buffer = NULL;
+
+    /* Remove the program if it's the last open queue. */
+    if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags
+        && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) {
+        fprintf(stderr,
+                "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n",
+                s->ifname, s->ifindex);
+    }
+}
+
+static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
+{
+    struct xsk_umem_config config = {
+        .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+        .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+        .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+        .frame_headroom = 0,
+    };
+    uint64_t n_descs;
+    uint64_t size;
+    int64_t i;
+    int ret;
+
+    /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */
+    n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS
+               + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2;
+    size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+    s->buffer = qemu_memalign(qemu_real_host_page_size(), size);
+    memset(s->buffer, 0, size);
+
+    if (sock_fd < 0) {
+        ret = xsk_umem__create(&s->umem, s->buffer, size,
+                               &s->fq, &s->cq, &config);
+    } else {
+        ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size,
+                                       &s->fq, &s->cq, &config);
+    }
+
+    if (ret) {
+        qemu_vfree(s->buffer);
+        error_setg_errno(errp, errno,
+                         "failed to create umem for %s queue_index: %d",
+                         s->ifname, s->nc.queue_index);
+        return -1;
+    }
+
+    s->pool = g_new(uint64_t, n_descs);
+    /* Fill the pool in the opposite order, because it's a LIFO queue. */
+    for (i = n_descs; i >= 0; i--) {
+        s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+    }
+    s->n_pool = n_descs;
+
+    af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+
+    return 0;
+}
+
+static int af_xdp_socket_create(AFXDPState *s,
+                                const NetdevAFXDPOptions *opts, Error **errp)
+{
+    struct xsk_socket_config cfg = {
+        .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+        .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+        .libxdp_flags = 0,
+        .bind_flags = XDP_USE_NEED_WAKEUP,
+        .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
+    };
+    int queue_id, error = 0;
+
+    s->inhibit = opts->has_inhibit && opts->inhibit;
+    if (s->inhibit) {
+        cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
+    }
+
+    if (opts->has_force_copy && opts->force_copy) {
+        cfg.bind_flags |= XDP_COPY;
+    }
+
+    queue_id = s->nc.queue_index;
+    if (opts->has_start_queue && opts->start_queue > 0) {
+        queue_id += opts->start_queue;
+    }
+
+    if (opts->has_mode) {
+        /* Specific mode requested. */
+        cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE)
+                         ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE;
+        if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                               s->umem, &s->rx, &s->tx, &cfg)) {
+            error = errno;
+        }
+    } else {
+        /* No mode requested, try native first. */
+        cfg.xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+        if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                               s->umem, &s->rx, &s->tx, &cfg)) {
+            /* Can't use native mode, try skb. */
+            cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE;
+            cfg.xdp_flags |= XDP_FLAGS_SKB_MODE;
+
+            if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                                   s->umem, &s->rx, &s->tx, &cfg)) {
+                error = errno;
+            }
+        }
+    }
+
+    if (error) {
+        error_setg_errno(errp, error,
+                         "failed to create AF_XDP socket for %s queue_id: %d",
+                         s->ifname, queue_id);
+        return -1;
+    }
+
+    s->xdp_flags = cfg.xdp_flags;
+
+    return 0;
+}
+
+/* NetClientInfo methods. */
+static NetClientInfo net_af_xdp_info = {
+    .type = NET_CLIENT_DRIVER_AF_XDP,
+    .size = sizeof(AFXDPState),
+    .receive = af_xdp_receive,
+    .poll = af_xdp_poll,
+    .cleanup = af_xdp_cleanup,
+};
+
+static int *parse_socket_fds(const char *sock_fds_str,
+                             int64_t n_expected, Error **errp)
+{
+    gchar **substrings = g_strsplit(sock_fds_str, ":", -1);
+    int64_t i, n_sock_fds = g_strv_length(substrings);
+    int *sock_fds = NULL;
+
+    if (n_sock_fds != n_expected) {
+        error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64,
+                   n_expected, n_sock_fds);
+        goto exit;
+    }
+
+    sock_fds = g_new(int, n_sock_fds);
+
+    for (i = 0; i < n_sock_fds; i++) {
+        sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp);
+        if (sock_fds[i] < 0) {
+            g_free(sock_fds);
+            sock_fds = NULL;
+            goto exit;
+        }
+    }
+
+exit:
+    g_strfreev(substrings);
+    return sock_fds;
+}
+
+/*
+ * The exported init function.
+ *
+ * ... -netdev af-xdp,ifname="..."
+ */
+int net_init_af_xdp(const Netdev *netdev,
+                    const char *name, NetClientState *peer, Error **errp)
+{
+    const NetdevAFXDPOptions *opts = &netdev->u.af_xdp;
+    NetClientState *nc, *nc0 = NULL;
+    unsigned int ifindex;
+    uint32_t prog_id = 0;
+    int *sock_fds = NULL;
+    int64_t i, queues;
+    Error *err = NULL;
+    AFXDPState *s;
+
+    ifindex = if_nametoindex(opts->ifname);
+    if (!ifindex) {
+        error_setg_errno(errp, errno, "failed to get ifindex for '%s'",
+                         opts->ifname);
+        return -1;
+    }
+
+    queues = opts->has_queues ? opts->queues : 1;
+    if (queues < 1) {
+        error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'",
+                   queues, opts->ifname);
+        return -1;
+    }
+
+    if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
+        error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
+        return -1;
+    }
+
+    if (opts->sock_fds) {
+        sock_fds = parse_socket_fds(opts->sock_fds, queues, errp);
+        if (!sock_fds) {
+            return -1;
+        }
+    }
+
+    for (i = 0; i < queues; i++) {
+        nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name);
+        qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname);
+        nc->queue_index = i;
+
+        if (!nc0) {
+            nc0 = nc;
+        }
+
+        s = DO_UPCAST(AFXDPState, nc, nc);
+
+        pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
+        s->ifindex = ifindex;
+        s->n_queues = queues;
+
+        if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
+            || af_xdp_socket_create(s, opts, errp)) {
+            /* Make sure the XDP program will be removed. */
+            s->n_queues = i;
+            error_propagate(errp, err);
+            goto err;
+        }
+    }
+
+    if (nc0) {
+        s = DO_UPCAST(AFXDPState, nc, nc0);
+        if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) {
+            error_setg_errno(errp, errno,
+                             "no XDP program loaded on '%s', ifindex: %d",
+                             s->ifname, s->ifindex);
+            goto err;
+        }
+    }
+
+    af_xdp_read_poll(s, true); /* Initially only poll for reads. */
+
+    return 0;
+
+err:
+    g_free(sock_fds);
+    if (nc0) {
+        qemu_del_net_client(nc0);
+    }
+
+    return -1;
+}
diff --git a/net/checksum.c b/net/checksum.c
index 68245fd748..1a957e4c0b 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -74,7 +74,7 @@ void net_checksum_calculate(uint8_t *data, int length, int csum_flag)
         return;
     }
 
-    /* Handle the optionnal VLAN headers */
+    /* Handle the optional VLAN headers */
     switch (lduw_be_p(&PKT_GET_ETH_HDR(data)->h_proto)) {
     case ETH_P_VLAN:
         mac_hdr_len = sizeof(struct eth_header) +
@@ -96,7 +96,7 @@ void net_checksum_calculate(uint8_t *data, int length, int csum_flag)
 
     length -= mac_hdr_len;
 
-    /* Now check we have an IP header (with an optionnal VLAN header) */
+    /* Now check we have an IP header (with an optional VLAN header) */
     if (length < sizeof(struct ip_header)) {
         return;
     }
diff --git a/net/clients.h b/net/clients.h
index ed8bdfff1e..be53794582 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -64,6 +64,11 @@ int net_init_netmap(const Netdev *netdev, const char *name,
                     NetClientState *peer, Error **errp);
 #endif
 
+#ifdef CONFIG_AF_XDP
+int net_init_af_xdp(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
+#endif
+
 int net_init_vhost_user(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp);
 
diff --git a/net/dump.c b/net/dump.c
index 7d05f16ca7..16073f2458 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -68,7 +68,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt,
     int64_t ts;
     int caplen;
     size_t size = iov_size(iov, cnt) - offset;
-    struct iovec dumpiov[cnt + 1];
+    g_autofree struct iovec *dumpiov = g_new(struct iovec, cnt + 1);
 
     /* Early return in case of previous error. */
     if (s->fd < 0) {
diff --git a/net/filter.c b/net/filter.c
index 3fe88fa43f..3335908771 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -91,8 +91,8 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
     next = netfilter_next(nf, direction);
     while (next) {
         /*
-         * if qemu_netfilter_pass_to_next been called, means that
-         * the packet has been hold by filter and has already retured size
+         * if qemu_netfilter_pass_to_next has been called, it means that
+         * the packet was held by  a filter and has already returned size
          * to the sender, so sent_cb shouldn't be called later, just
          * pass NULL to next.
          */
@@ -106,7 +106,7 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
 
     /*
      * We have gone through all filters, pass it to receiver.
-     * Do the valid check again incase sender or receiver been
+     * Do the valid check again in case sender or receiver been
      * deleted while we go through filters.
      */
     if (sender && sender->peer) {
diff --git a/net/meson.build b/net/meson.build
index bdf564a57b..ce99bd4447 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -36,20 +36,25 @@ system_ss.add(when: vde, if_true: files('vde.c'))
 if have_netmap
   system_ss.add(files('netmap.c'))
 endif
+
+system_ss.add(when: libxdp, if_true: files('af-xdp.c'))
+
 if have_vhost_net_user
   system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
   system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c'))
 endif
 
-system_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c'))
-system_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c'))
-system_ss.add(when: 'CONFIG_SOLARIS', if_true: files('tap-solaris.c'))
-tap_posix = ['tap.c']
-if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_BSD') and not config_host.has_key('CONFIG_SOLARIS')
-  tap_posix += 'tap-stub.c'
+if targetos == 'windows'
+  system_ss.add(files('tap-win32.c'))
+elif targetos == 'linux'
+  system_ss.add(files('tap.c', 'tap-linux.c'))
+elif targetos in bsd_oses
+  system_ss.add(files('tap.c', 'tap-bsd.c'))
+elif targetos == 'sunos'
+  system_ss.add(files('tap.c', 'tap-solaris.c'))
+else
+  system_ss.add(files('tap.c', 'tap-stub.c'))
 endif
-system_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
-system_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
 if have_vhost_net_vdpa
   system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c'))
   system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c'))
diff --git a/net/net.c b/net/net.c
index 6492ad530e..1c0bfdaa6c 100644
--- a/net/net.c
+++ b/net/net.c
@@ -495,6 +495,15 @@ bool qemu_has_ufo(NetClientState *nc)
     return nc->info->has_ufo(nc);
 }
 
+bool qemu_has_uso(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_uso) {
+        return false;
+    }
+
+    return nc->info->has_uso(nc);
+}
+
 bool qemu_has_vnet_hdr(NetClientState *nc)
 {
     if (!nc || !nc->info->has_vnet_hdr) {
@@ -532,13 +541,13 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
 }
 
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                          int ecn, int ufo)
+                          int ecn, int ufo, int uso4, int uso6)
 {
     if (!nc || !nc->info->set_offload) {
         return;
     }
 
-    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 int qemu_get_vnet_hdr_len(NetClientState *nc)
@@ -1082,6 +1091,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
 #ifdef CONFIG_NETMAP
         [NET_CLIENT_DRIVER_NETMAP]    = net_init_netmap,
 #endif
+#ifdef CONFIG_AF_XDP
+        [NET_CLIENT_DRIVER_AF_XDP]    = net_init_af_xdp,
+#endif
 #ifdef CONFIG_NET_BRIDGE
         [NET_CLIENT_DRIVER_BRIDGE]    = net_init_bridge,
 #endif
@@ -1186,6 +1198,9 @@ void show_netdevs(void)
 #ifdef CONFIG_NETMAP
         "netmap",
 #endif
+#ifdef CONFIG_AF_XDP
+        "af-xdp",
+#endif
 #ifdef CONFIG_POSIX
         "vhost-user",
 #endif
diff --git a/net/netmap.c b/net/netmap.c
index 9e0cec58d3..241b27c8e9 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -371,7 +371,7 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
 }
 
 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                               int ecn, int ufo)
+                               int ecn, int ufo, int uso4, int uso6)
 {
     NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4c98fdd337..274ea7bd2c 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -212,6 +212,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -232,7 +237,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
diff --git a/net/tap-linux.c b/net/tap-linux.c
index f54f308d35..c7e514ecb0 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -173,6 +173,18 @@ int tap_probe_has_ufo(int fd)
     return 1;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    unsigned offload;
+
+    offload = TUN_F_CSUM | TUN_F_USO4 | TUN_F_USO6;
+
+    if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) {
+        return 0;
+    }
+    return 1;
+}
+
 /* Verify that we can assign given length */
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
@@ -237,7 +249,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
     unsigned int offload = 0;
 
@@ -256,13 +268,22 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
             offload |= TUN_F_TSO_ECN;
         if (ufo)
             offload |= TUN_F_UFO;
+        if (uso4) {
+            offload |= TUN_F_USO4;
+        }
+        if (uso6) {
+            offload |= TUN_F_USO6;
+        }
     }
 
     if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
-        offload &= ~TUN_F_UFO;
+        offload &= ~(TUN_F_USO4 | TUN_F_USO6);
         if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
-            fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
+            offload &= ~TUN_F_UFO;
+            if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
+                fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
                     strerror(errno));
+            }
         }
     }
 }
diff --git a/net/tap-linux.h b/net/tap-linux.h
index bbbb62c2a7..9a58cecb7f 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -50,5 +50,7 @@
 #define TUN_F_TSO6    0x04    /* I can handle TSO for IPv6 packets */
 #define TUN_F_TSO_ECN 0x08    /* I can handle TSO with ECN bits. */
 #define TUN_F_UFO     0x10    /* I can handle UFO packets */
+#define TUN_F_USO4    0x20    /* I can handle USO for IPv4 packets */
+#define TUN_F_USO6    0x40    /* I can handle USO for IPv6 packets */
 
 #endif /* QEMU_TAP_LINUX_H */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 38e15028bf..08b13af512 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -216,6 +216,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -236,7 +241,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
diff --git a/net/tap-stub.c b/net/tap-stub.c
index a0fa25804b..4b24f61e3a 100644
--- a/net/tap-stub.c
+++ b/net/tap-stub.c
@@ -47,6 +47,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -67,7 +72,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
diff --git a/net/tap-win32.c b/net/tap-win32.c
index f327d62ab0..7b8b4be02c 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -741,7 +741,7 @@ static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 }
 
 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
+                     int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
diff --git a/net/tap.c b/net/tap.c
index 1bf085d422..c23d0323c2 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,6 +57,7 @@ typedef struct TAPState {
     bool write_poll;
     bool using_vnet_hdr;
     bool has_ufo;
+    bool has_uso;
     bool enabled;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
@@ -117,10 +118,11 @@ static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     const struct iovec *iovp = iov;
-    struct iovec iov_copy[iovcnt + 1];
+    g_autofree struct iovec *iov_copy = NULL;
     struct virtio_net_hdr_mrg_rxbuf hdr = { };
 
     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+        iov_copy = g_new(struct iovec, iovcnt + 1);
         iov_copy[0].iov_base = &hdr;
         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
@@ -237,6 +239,15 @@ static bool tap_has_ufo(NetClientState *nc)
     return s->has_ufo;
 }
 
+static bool tap_has_uso(NetClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
+
+    return s->has_uso;
+}
+
 static bool tap_has_vnet_hdr(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -307,14 +318,14 @@ static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 }
 
 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
+                     int tso6, int ecn, int ufo, int uso4, int uso6)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     if (s->fd < 0) {
         return;
     }
 
-    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
+    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 static void tap_exit_notify(Notifier *notifier, void *data)
@@ -384,6 +395,7 @@ static NetClientInfo net_tap_info = {
     .poll = tap_poll,
     .cleanup = tap_cleanup,
     .has_ufo = tap_has_ufo,
+    .has_uso = tap_has_uso,
     .has_vnet_hdr = tap_has_vnet_hdr,
     .has_vnet_hdr_len = tap_has_vnet_hdr_len,
     .get_using_vnet_hdr = tap_get_using_vnet_hdr,
@@ -413,8 +425,9 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
     s->using_vnet_hdr = false;
     s->has_ufo = tap_probe_has_ufo(s->fd);
+    s->has_uso = tap_probe_has_uso(s->fd);
     s->enabled = true;
-    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
+    tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0);
     /*
      * Make sure host header length is set correctly in tap:
      * it might have been modified by another instance of qemu.
diff --git a/net/tap_int.h b/net/tap_int.h
index 547f8a5a28..9a2175655b 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -37,7 +37,9 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
 int tap_probe_vnet_hdr(int fd, Error **errp);
 int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
-void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
+int tap_probe_has_uso(int fd);
+void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo,
+                        int uso4, int uso6);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
 int tap_fd_set_vnet_le(int fd, int vnet_is_le);
 int tap_fd_set_vnet_be(int fd, int vnet_is_be);
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 5993e4afca..12555518e8 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -239,7 +239,7 @@ static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,
 
     qemu_chr_fe_disconnect(&s->chr);
 
-    return TRUE;
+    return G_SOURCE_CONTINUE;
 }
 
 static void net_vhost_user_event(void *opaque, QEMUChrEvent event);
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 9795306742..4e94c50bc7 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,11 +75,14 @@ const int vdpa_feature_bits[] = {
     VIRTIO_NET_F_GUEST_TSO4,
     VIRTIO_NET_F_GUEST_TSO6,
     VIRTIO_NET_F_GUEST_UFO,
+    VIRTIO_NET_F_GUEST_USO4,
+    VIRTIO_NET_F_GUEST_USO6,
     VIRTIO_NET_F_HASH_REPORT,
     VIRTIO_NET_F_HOST_ECN,
     VIRTIO_NET_F_HOST_TSO4,
     VIRTIO_NET_F_HOST_TSO6,
     VIRTIO_NET_F_HOST_UFO,
+    VIRTIO_NET_F_HOST_USO,
     VIRTIO_NET_F_MQ,
     VIRTIO_NET_F_MRG_RXBUF,
     VIRTIO_NET_F_MTU,
@@ -821,7 +824,7 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
      * According to virtio_net_reset(), device turns promiscuous mode
      * on by default.
      *
-     * Addtionally, according to VirtIO standard, "Since there are
+     * Additionally, according to VirtIO standard, "Since there are
      * no guarantees, it can use a hash filter or silently switch to
      * allmulti or promiscuous mode if it is given too many addresses.".
      * QEMU marks `n->mac_table.uni_overflow` if guest sets too many
@@ -1130,7 +1133,7 @@ static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
      * Pack the non-multicast MAC addresses part for fake CVQ command.
      *
      * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
-     * addresses provieded in CVQ command. Therefore, only the entries
+     * addresses provided in CVQ command. Therefore, only the entries
      * field need to be prepared in the CVQ command.
      */
     mac_ptr = out->iov_base + cursor;
@@ -1141,7 +1144,7 @@ static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
      * Pack the multicast MAC addresses part for fake CVQ command.
      *
      * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
-     * addresses provieded in CVQ command. Therefore, only the entries
+     * addresses provided in CVQ command. Therefore, only the entries
      * field need to be prepared in the CVQ command.
      */
     mac_ptr = out->iov_base + cursor;
@@ -1202,7 +1205,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
          * rejects the flawed CVQ command.
          *
          * Therefore, QEMU must handle this situation instead of sending
-         * the CVQ command direclty.
+         * the CVQ command directly.
          */
         dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
                                                                   &out);
diff --git a/os-posix.c b/os-posix.c
index cfcb96533c..f90dfda9b0 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -29,33 +29,15 @@
 #include <grp.h>
 #include <libgen.h>
 
-/* Needed early for CONFIG_BSD etc. */
-#include "net/slirp.h"
-#include "qemu/qemu-options.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "sysemu/runstate.h"
 #include "qemu/cutils.h"
-#include "qemu/config-file.h"
-#include "qemu/option.h"
-#include "qemu/module.h"
 
 #ifdef CONFIG_LINUX
 #include <sys/prctl.h>
-#include "qemu/async-teardown.h"
 #endif
 
-/*
- * Must set all three of these at once.
- * Legal combinations are              unset   by name   by uid
- */
-static struct passwd *user_pwd;    /*   NULL   non-NULL   NULL   */
-static uid_t user_uid = (uid_t)-1; /*   -1      -1        >=0    */
-static gid_t user_gid = (gid_t)-1; /*   -1      -1        >=0    */
-
-static const char *chroot_dir;
-static int daemonize;
-static int daemon_pipe;
 
 void os_setup_early_signal_handling(void)
 {
@@ -103,7 +85,22 @@ void os_set_proc_name(const char *s)
 }
 
 
-static bool os_parse_runas_uid_gid(const char *optarg)
+/*
+ * Must set all three of these at once.
+ * Legal combinations are              unset   by name   by uid
+ */
+static struct passwd *user_pwd;    /*   NULL   non-NULL   NULL   */
+static uid_t user_uid = (uid_t)-1; /*   -1      -1        >=0    */
+static gid_t user_gid = (gid_t)-1; /*   -1      -1        >=0    */
+
+/*
+ * Prepare to change user ID. optarg can be one of 3 forms:
+ *   - a username, in which case user ID will be changed to its uid,
+ *     with primary and supplementary groups set up too;
+ *   - a numeric uid, in which case only the uid will be set;
+ *   - a pair of numeric uid:gid.
+ */
+bool os_set_runas(const char *optarg)
 {
     unsigned long lv;
     const char *ep;
@@ -111,6 +108,13 @@ static bool os_parse_runas_uid_gid(const char *optarg)
     gid_t got_gid;
     int rc;
 
+    user_pwd = getpwnam(optarg);
+    if (user_pwd) {
+        user_uid = -1;
+        user_gid = -1;
+        return true;
+    }
+
     rc = qemu_strtoul(optarg, &ep, 0, &lv);
     got_uid = lv; /* overflow here is ID in C99 */
     if (rc || *ep != ':' || got_uid != lv || got_uid == (uid_t)-1) {
@@ -129,63 +133,6 @@ static bool os_parse_runas_uid_gid(const char *optarg)
     return true;
 }
 
-/*
- * Parse OS specific command line options.
- * return 0 if option handled, -1 otherwise
- */
-int os_parse_cmd_args(int index, const char *optarg)
-{
-    switch (index) {
-    case QEMU_OPTION_runas:
-        user_pwd = getpwnam(optarg);
-        if (user_pwd) {
-            user_uid = -1;
-            user_gid = -1;
-        } else if (!os_parse_runas_uid_gid(optarg)) {
-            error_report("User \"%s\" doesn't exist"
-                         " (and is not <uid>:<gid>)",
-                         optarg);
-            exit(1);
-        }
-        break;
-    case QEMU_OPTION_chroot:
-        warn_report("option is deprecated, use '-run-with chroot=...' instead");
-        chroot_dir = optarg;
-        break;
-    case QEMU_OPTION_daemonize:
-        daemonize = 1;
-        break;
-#if defined(CONFIG_LINUX)
-    /* deprecated */
-    case QEMU_OPTION_asyncteardown:
-        init_async_teardown();
-        break;
-#endif
-    case QEMU_OPTION_run_with: {
-        const char *str;
-        QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"),
-                                                 optarg, false);
-        if (!opts) {
-            exit(1);
-        }
-#if defined(CONFIG_LINUX)
-        if (qemu_opt_get_bool(opts, "async-teardown", false)) {
-            init_async_teardown();
-        }
-#endif
-        str = qemu_opt_get(opts, "chroot");
-        if (str) {
-            chroot_dir = str;
-        }
-        break;
-    }
-    default:
-        return -1;
-    }
-
-    return 0;
-}
-
 static void change_process_uid(void)
 {
     assert((user_uid == (uid_t)-1) || user_pwd == NULL);
@@ -223,6 +170,14 @@ static void change_process_uid(void)
     }
 }
 
+
+static const char *chroot_dir;
+
+void os_set_chroot(const char *optarg)
+{
+    chroot_dir = optarg;
+}
+
 static void change_root(void)
 {
     if (chroot_dir) {
@@ -238,6 +193,21 @@ static void change_root(void)
 
 }
 
+
+static int daemonize;
+static int daemon_pipe;
+
+bool is_daemonized(void)
+{
+    return daemonize;
+}
+
+int os_set_daemonize(bool d)
+{
+    daemonize = d;
+    return 0;
+}
+
 void os_daemonize(void)
 {
     if (daemonize) {
@@ -331,17 +301,6 @@ void os_set_line_buffering(void)
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
-bool is_daemonized(void)
-{
-    return daemonize;
-}
-
-int os_set_daemonize(bool d)
-{
-    daemonize = d;
-    return 0;
-}
-
 int os_mlock(void)
 {
 #ifdef HAVE_MLOCKALL
@@ -357,27 +316,3 @@ int os_mlock(void)
     return -ENOSYS;
 #endif
 }
-
-static QemuOptsList qemu_run_with_opts = {
-    .name = "run-with",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
-    .desc = {
-#if defined(CONFIG_LINUX)
-        {
-            .name = "async-teardown",
-            .type = QEMU_OPT_BOOL,
-        },
-#endif
-        {
-            .name = "chroot",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
-static void register_runwith(void)
-{
-    qemu_add_opts(&qemu_run_with_opts);
-}
-opts_init(register_runwith);
diff --git a/pc-bios/Makefile b/pc-bios/Makefile
deleted file mode 100644
index 315288df84..0000000000
--- a/pc-bios/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# NOTE: only compilable with x86 cross compile tools
-#
-include ../config-host.mak
-
-DEFINES=
-
-TARGETS=
-
-all: $(TARGETS)
-
-%.o: %.S
-	$(CC) $(DEFINES) -c -o $@ $<
-
-%.dtb: %.dts
-	dtc -I dts -O dtb -o $@ $<
-
-clean:
-	rm -f $(TARGETS) *.o *~
diff --git a/pc-bios/edk2-aarch64-code.fd.bz2 b/pc-bios/edk2-aarch64-code.fd.bz2
index 4bc824e48d..985e69a66a 100644
Binary files a/pc-bios/edk2-aarch64-code.fd.bz2 and b/pc-bios/edk2-aarch64-code.fd.bz2 differ
diff --git a/pc-bios/edk2-arm-code.fd.bz2 b/pc-bios/edk2-arm-code.fd.bz2
index 7899fca426..ae797a8c8e 100644
Binary files a/pc-bios/edk2-arm-code.fd.bz2 and b/pc-bios/edk2-arm-code.fd.bz2 differ
diff --git a/pc-bios/edk2-i386-code.fd.bz2 b/pc-bios/edk2-i386-code.fd.bz2
index a68ae4fa15..e703c2f954 100644
Binary files a/pc-bios/edk2-i386-code.fd.bz2 and b/pc-bios/edk2-i386-code.fd.bz2 differ
diff --git a/pc-bios/edk2-i386-secure-code.fd.bz2 b/pc-bios/edk2-i386-secure-code.fd.bz2
index 91936ebbc9..7230d44615 100644
Binary files a/pc-bios/edk2-i386-secure-code.fd.bz2 and b/pc-bios/edk2-i386-secure-code.fd.bz2 differ
diff --git a/pc-bios/edk2-riscv-code.fd.bz2 b/pc-bios/edk2-riscv-code.fd.bz2
new file mode 100644
index 0000000000..c1cc08561d
Binary files /dev/null and b/pc-bios/edk2-riscv-code.fd.bz2 differ
diff --git a/pc-bios/edk2-riscv-vars.fd.bz2 b/pc-bios/edk2-riscv-vars.fd.bz2
new file mode 100644
index 0000000000..40da6591ad
Binary files /dev/null and b/pc-bios/edk2-riscv-vars.fd.bz2 differ
diff --git a/pc-bios/edk2-riscv.fd.bz2 b/pc-bios/edk2-riscv.fd.bz2
deleted file mode 100644
index ef566b374a..0000000000
Binary files a/pc-bios/edk2-riscv.fd.bz2 and /dev/null differ
diff --git a/pc-bios/edk2-x86_64-code.fd.bz2 b/pc-bios/edk2-x86_64-code.fd.bz2
index 35c2340e46..9b7767a3ac 100644
Binary files a/pc-bios/edk2-x86_64-code.fd.bz2 and b/pc-bios/edk2-x86_64-code.fd.bz2 differ
diff --git a/pc-bios/edk2-x86_64-microvm.fd.bz2 b/pc-bios/edk2-x86_64-microvm.fd.bz2
index 742abf06c5..17460dd380 100644
Binary files a/pc-bios/edk2-x86_64-microvm.fd.bz2 and b/pc-bios/edk2-x86_64-microvm.fd.bz2 differ
diff --git a/pc-bios/edk2-x86_64-secure-code.fd.bz2 b/pc-bios/edk2-x86_64-secure-code.fd.bz2
index 311b7b91d7..fd0efeacbf 100644
Binary files a/pc-bios/edk2-x86_64-secure-code.fd.bz2 and b/pc-bios/edk2-x86_64-secure-code.fd.bz2 differ
diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img
index 0fa3808f16..c7196143b1 100644
Binary files a/pc-bios/hppa-firmware.img and b/pc-bios/hppa-firmware.img differ
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index a7224ef469..e67fa433a1 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -57,10 +57,6 @@ blobs = [
   'efi-e1000e.rom',
   'efi-vmxnet3.rom',
   'qemu-nsis.bmp',
-  'bamboo.dtb',
-  'canyonlands.dtb',
-  'petalogix-s3adsp1800.dtb',
-  'petalogix-ml605.dtb',
   'multiboot.bin',
   'multiboot_dma.bin',
   'linuxboot.bin',
@@ -84,6 +80,27 @@ blobs = [
   'vof-nvram.bin',
 ]
 
+dtc = find_program('dtc', required: false)
+foreach f : [
+  'bamboo.dts',
+  'canyonlands.dts',
+  'petalogix-s3adsp1800.dts',
+  'petalogix-ml605.dts',
+]
+  out = fs.replace_suffix(f, '.dtb')
+  if dtc.found()
+    custom_target(f,
+        build_by_default: have_system,
+        input: files(f),
+        output: out,
+        install: get_option('install_blobs'),
+        install_dir: qemu_datadir,
+        command: [ dtc, '-I', 'dts', '-O', 'dtb', '-o', '@OUTPUT@', '@INPUT0@' ])
+  else
+    blobs += out
+  endif
+endforeach
+
 if get_option('install_blobs')
   install_data(blobs, install_dir: qemu_datadir)
 endif
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..5521b0ad36 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -316,22 +316,7 @@ uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
 {
 #ifdef CONFIG_SOFTMMU
     if (haddr) {
-        if (!haddr->is_io) {
-            RAMBlock *block;
-            ram_addr_t offset;
-            void *hostaddr = haddr->v.ram.hostaddr;
-
-            block = qemu_ram_block_from_host(hostaddr, false, &offset);
-            if (!block) {
-                error_report("Bad host ram pointer %p", haddr->v.ram.hostaddr);
-                abort();
-            }
-
-            return block->offset + offset + block->mr->addr;
-        } else {
-            MemoryRegionSection *mrs = haddr->v.io.section;
-            return mrs->offset_within_address_space + haddr->v.io.offset;
-        }
+        return haddr->phys_addr;
     }
 #endif
     return 0;
@@ -341,13 +326,13 @@ const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
 {
 #ifdef CONFIG_SOFTMMU
     if (h && h->is_io) {
-        MemoryRegionSection *mrs = h->v.io.section;
-        if (!mrs->mr->name) {
-            unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr;
-            g_autofree char *temp = g_strdup_printf("anon%08lx", maddr);
+        MemoryRegion *mr = h->mr;
+        if (!mr->name) {
+            unsigned maddr = (uintptr_t)mr;
+            g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
             return g_intern_string(temp);
         } else {
-            return g_intern_string(mrs->mr->name);
+            return g_intern_string(mr->name);
         }
     } else {
         return g_intern_static_string("RAM");
diff --git a/plugins/meson.build b/plugins/meson.build
index 752377c66d..71ed996ed3 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -13,8 +13,10 @@ if not enable_modules
   endif
 endif
 
-specific_ss.add(when: 'CONFIG_PLUGIN', if_true: [files(
-  'loader.c',
-  'core.c',
-  'api.c',
-), declare_dependency(link_args: plugin_ldflags)])
+if get_option('plugins')
+  specific_ss.add(files(
+    'loader.c',
+    'core.c',
+    'api.c',
+  ), declare_dependency(link_args: plugin_ldflags))
+endif
diff --git a/python/Makefile b/python/Makefile
index 7c70dcc8d1..1fa4ba2498 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -9,13 +9,13 @@ help:
 	@echo "make check-minreqs:"
 	@echo "    Run tests in the minreqs virtual environment."
 	@echo "    These tests use the oldest dependencies."
-	@echo "    Requires: Python 3.7"
-	@echo "    Hint (Fedora): 'sudo dnf install python3.7'"
+	@echo "    Requires: Python 3.8"
+	@echo "    Hint (Fedora): 'sudo dnf install python3.8'"
 	@echo ""
 	@echo "make check-tox:"
 	@echo "    Run tests against multiple python versions."
 	@echo "    These tests use the newest dependencies."
-	@echo "    Requires: Python 3.7 - 3.11, and tox."
+	@echo "    Requires: Python 3.8 - 3.11, and tox."
 	@echo "    Hint (Fedora): 'sudo dnf install python3-tox python3.11'"
 	@echo "    The variable QEMU_TOX_EXTRA_ARGS can be use to pass extra"
 	@echo "    arguments to tox".
@@ -59,7 +59,7 @@ PIP_INSTALL = pip install --disable-pip-version-check
 min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate
 $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt
 	@echo "VENV $(QEMU_MINVENV_DIR)"
-	@python3.7 -m venv $(QEMU_MINVENV_DIR)
+	@python3.8 -m venv $(QEMU_MINVENV_DIR)
 	@(								\
 		echo "ACTIVATE $(QEMU_MINVENV_DIR)";			\
 		. $(QEMU_MINVENV_DIR)/bin/activate;			\
diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index c16a0b6fed..35d5a672db 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -191,6 +191,7 @@ class QEMUMachine:
             self.sock_dir, f"{self._name}.con"
         )
         self._console_socket: Optional[socket.socket] = None
+        self._console_file: Optional[socket.SocketIO] = None
         self._remove_files: List[str] = []
         self._user_killed = False
         self._quit_issued = False
@@ -509,6 +510,11 @@ class QEMUMachine:
         # If we keep the console socket open, we may deadlock waiting
         # for QEMU to exit, while QEMU is waiting for the socket to
         # become writable.
+        if self._console_file is not None:
+            LOG.debug("Closing console file")
+            self._console_file.close()
+            self._console_file = None
+
         if self._console_socket is not None:
             LOG.debug("Closing console socket")
             self._console_socket.close()
@@ -874,12 +880,25 @@ class QEMUMachine:
         Returns a socket connected to the console
         """
         if self._console_socket is None:
+            LOG.debug("Opening console socket")
             self._console_socket = console_socket.ConsoleSocket(
                 self._console_address,
                 file=self._console_log_path,
                 drain=self._drain_console)
         return self._console_socket
 
+    @property
+    def console_file(self) -> socket.SocketIO:
+        """
+        Returns a file associated with the console socket
+        """
+        if self._console_file is None:
+            LOG.debug("Opening console file")
+            self._console_file = self.console_socket.makefile(mode='rb',
+                                                              buffering=0,
+                                                              encoding='utf-8')
+        return self._console_file
+
     @property
     def temp_dir(self) -> str:
         """
diff --git a/python/scripts/mkvenv.py b/python/scripts/mkvenv.py
index 4f2349fbb6..d0b9c215ca 100644
--- a/python/scripts/mkvenv.py
+++ b/python/scripts/mkvenv.py
@@ -61,9 +61,6 @@ options:
 
 """
 
-# The duplication between importlib and pkg_resources does not help
-# pylint: disable=too-many-lines
-
 # Copyright (C) 2022-2023 Red Hat, Inc.
 #
 # Authors:
@@ -74,6 +71,13 @@ options:
 # later. See the COPYING file in the top-level directory.
 
 import argparse
+from importlib.metadata import (
+    Distribution,
+    EntryPoint,
+    PackageNotFoundError,
+    distribution,
+    version,
+)
 from importlib.util import find_spec
 import logging
 import os
@@ -189,7 +193,7 @@ class QemuEnvBuilder(venv.EnvBuilder):
             ):
                 kwargs["with_pip"] = False
             else:
-                check_ensurepip(suggest_remedy=True)
+                check_ensurepip()
 
         super().__init__(*args, **kwargs)
 
@@ -294,7 +298,7 @@ def need_ensurepip() -> bool:
     return True
 
 
-def check_ensurepip(prefix: str = "", suggest_remedy: bool = False) -> None:
+def check_ensurepip() -> None:
     """
     Check that we have ensurepip.
 
@@ -305,15 +309,12 @@ def check_ensurepip(prefix: str = "", suggest_remedy: bool = False) -> None:
             "Python's ensurepip module is not found.\n"
             "It's normally part of the Python standard library, "
             "maybe your distribution packages it separately?\n"
-            "(Debian puts ensurepip in its python3-venv package.)\n"
+            "Either install ensurepip, or alleviate the need for it in the "
+            "first place by installing pip and setuptools for "
+            f"'{sys.executable}'.\n"
+            "(Hint: Debian puts ensurepip in its python3-venv package.)"
         )
-        if suggest_remedy:
-            msg += (
-                "Either install ensurepip, or alleviate the need for it in the"
-                " first place by installing pip and setuptools for "
-                f"'{sys.executable}'.\n"
-            )
-        raise Ouch(prefix + msg)
+        raise Ouch(msg)
 
     # ensurepip uses pyexpat, which can also go missing on us:
     if not find_spec("pyexpat"):
@@ -321,15 +322,12 @@ def check_ensurepip(prefix: str = "", suggest_remedy: bool = False) -> None:
             "Python's pyexpat module is not found.\n"
             "It's normally part of the Python standard library, "
             "maybe your distribution packages it separately?\n"
-            "(NetBSD's pkgsrc debundles this to e.g. 'py310-expat'.)\n"
+            "Either install pyexpat, or alleviate the need for it in the "
+            "first place by installing pip and setuptools for "
+            f"'{sys.executable}'.\n\n"
+            "(Hint: NetBSD's pkgsrc debundles this to e.g. 'py310-expat'.)"
         )
-        if suggest_remedy:
-            msg += (
-                "Either install pyexpat, or alleviate the need for it in the "
-                "first place by installing pip and setuptools for "
-                f"'{sys.executable}'.\n"
-            )
-        raise Ouch(prefix + msg)
+        raise Ouch(msg)
 
 
 def make_venv(  # pylint: disable=too-many-arguments
@@ -428,28 +426,13 @@ def make_venv(  # pylint: disable=too-many-arguments
     print(builder.get_value("env_exe"))
 
 
-def _gen_importlib(packages: Sequence[str]) -> Iterator[str]:
-    # pylint: disable=import-outside-toplevel
-    # pylint: disable=no-name-in-module
-    # pylint: disable=import-error
-    try:
-        # First preference: Python 3.8+ stdlib
-        from importlib.metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-        # Second preference: Commonly available PyPI backport
-        from importlib_metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
+def _get_entry_points(packages: Sequence[str]) -> Iterator[str]:
 
     def _generator() -> Iterator[str]:
         for package in packages:
             try:
-                entry_points = distribution(package).entry_points
+                entry_points: Iterator[EntryPoint] = \
+                    iter(distribution(package).entry_points)
             except PackageNotFoundError:
                 continue
 
@@ -465,24 +448,6 @@ def _gen_importlib(packages: Sequence[str]) -> Iterator[str]:
     return _generator()
 
 
-def _gen_pkg_resources(packages: Sequence[str]) -> Iterator[str]:
-    # pylint: disable=import-outside-toplevel
-    # Bundled with setuptools; has a good chance of being available.
-    import pkg_resources
-
-    def _generator() -> Iterator[str]:
-        for package in packages:
-            try:
-                eps = pkg_resources.get_entry_map(package, "console_scripts")
-            except pkg_resources.DistributionNotFound:
-                continue
-
-            for entry_point in eps.values():
-                yield str(entry_point)
-
-    return _generator()
-
-
 def generate_console_scripts(
     packages: Sequence[str],
     python_path: Optional[str] = None,
@@ -507,66 +472,15 @@ def generate_console_scripts(
     if not packages:
         return
 
-    def _get_entry_points() -> Iterator[str]:
-        """Python 3.7 compatibility shim for iterating entry points."""
-        # Python 3.8+, or Python 3.7 with importlib_metadata installed.
-        try:
-            return _gen_importlib(packages)
-        except ImportError as exc:
-            logger.debug("%s", str(exc))
-
-        # Python 3.7 with setuptools installed.
-        try:
-            return _gen_pkg_resources(packages)
-        except ImportError as exc:
-            logger.debug("%s", str(exc))
-            raise Ouch(
-                "Neither importlib.metadata nor pkg_resources found, "
-                "can't generate console script shims.\n"
-                "Use Python 3.8+, or install importlib-metadata or setuptools."
-            ) from exc
-
     maker = distlib.scripts.ScriptMaker(None, bin_path)
     maker.variants = {""}
     maker.clobber = False
 
-    for entry_point in _get_entry_points():
+    for entry_point in _get_entry_points(packages):
         for filename in maker.make(entry_point):
             logger.debug("wrote console_script '%s'", filename)
 
 
-def checkpip() -> bool:
-    """
-    Debian10 has a pip that's broken when used inside of a virtual environment.
-
-    We try to detect and correct that case here.
-    """
-    try:
-        # pylint: disable=import-outside-toplevel,unused-import,import-error
-        # pylint: disable=redefined-outer-name
-        import pip._internal  # type: ignore  # noqa: F401
-
-        logger.debug("pip appears to be working correctly.")
-        return False
-    except ModuleNotFoundError as exc:
-        if exc.name == "pip._internal":
-            # Uh, fair enough. They did say "internal".
-            # Let's just assume it's fine.
-            return False
-        logger.warning("pip appears to be malfunctioning: %s", str(exc))
-
-    check_ensurepip("pip appears to be non-functional, and ")
-
-    logger.debug("Attempting to repair pip ...")
-    subprocess.run(
-        (sys.executable, "-m", "ensurepip"),
-        stdout=subprocess.DEVNULL,
-        check=True,
-    )
-    logger.debug("Pip is now (hopefully) repaired!")
-    return True
-
-
 def pkgname_from_depspec(dep_spec: str) -> str:
     """
     Parse package name out of a PEP-508 depspec.
@@ -584,57 +498,6 @@ def pkgname_from_depspec(dep_spec: str) -> str:
     return match.group(0)
 
 
-def _get_path_importlib(package: str) -> Optional[str]:
-    # pylint: disable=import-outside-toplevel
-    # pylint: disable=no-name-in-module
-    # pylint: disable=import-error
-    try:
-        # First preference: Python 3.8+ stdlib
-        from importlib.metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-        # Second preference: Commonly available PyPI backport
-        from importlib_metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
-
-    try:
-        return str(distribution(package).locate_file("."))
-    except PackageNotFoundError:
-        return None
-
-
-def _get_path_pkg_resources(package: str) -> Optional[str]:
-    # pylint: disable=import-outside-toplevel
-    # Bundled with setuptools; has a good chance of being available.
-    import pkg_resources
-
-    try:
-        return str(pkg_resources.get_distribution(package).location)
-    except pkg_resources.DistributionNotFound:
-        return None
-
-
-def _get_path(package: str) -> Optional[str]:
-    try:
-        return _get_path_importlib(package)
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-
-    try:
-        return _get_path_pkg_resources(package)
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-        raise Ouch(
-            "Neither importlib.metadata nor pkg_resources found. "
-            "Use Python 3.8+, or install importlib-metadata or setuptools."
-        ) from exc
-
-
 def _path_is_prefix(prefix: Optional[str], path: str) -> bool:
     try:
         return (
@@ -644,65 +507,14 @@ def _path_is_prefix(prefix: Optional[str], path: str) -> bool:
         return False
 
 
-def _is_system_package(package: str) -> bool:
-    path = _get_path(package)
-    return path is not None and not (
+def _is_system_package(dist: Distribution) -> bool:
+    path = str(dist.locate_file("."))
+    return not (
         _path_is_prefix(sysconfig.get_path("purelib"), path)
         or _path_is_prefix(sysconfig.get_path("platlib"), path)
     )
 
 
-def _get_version_importlib(package: str) -> Optional[str]:
-    # pylint: disable=import-outside-toplevel
-    # pylint: disable=no-name-in-module
-    # pylint: disable=import-error
-    try:
-        # First preference: Python 3.8+ stdlib
-        from importlib.metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-        # Second preference: Commonly available PyPI backport
-        from importlib_metadata import (  # type: ignore
-            PackageNotFoundError,
-            distribution,
-        )
-
-    try:
-        return str(distribution(package).version)
-    except PackageNotFoundError:
-        return None
-
-
-def _get_version_pkg_resources(package: str) -> Optional[str]:
-    # pylint: disable=import-outside-toplevel
-    # Bundled with setuptools; has a good chance of being available.
-    import pkg_resources
-
-    try:
-        return str(pkg_resources.get_distribution(package).version)
-    except pkg_resources.DistributionNotFound:
-        return None
-
-
-def _get_version(package: str) -> Optional[str]:
-    try:
-        return _get_version_importlib(package)
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-
-    try:
-        return _get_version_pkg_resources(package)
-    except ImportError as exc:
-        logger.debug("%s", str(exc))
-        raise Ouch(
-            "Neither importlib.metadata nor pkg_resources found. "
-            "Use Python 3.8+, or install importlib-metadata or setuptools."
-        ) from exc
-
-
 def diagnose(
     dep_spec: str,
     online: bool,
@@ -728,7 +540,11 @@ def diagnose(
     bad = False
 
     pkg_name = pkgname_from_depspec(dep_spec)
-    pkg_version = _get_version(pkg_name)
+    pkg_version: Optional[str] = None
+    try:
+        pkg_version = version(pkg_name)
+    except PackageNotFoundError:
+        pass
 
     lines = []
 
@@ -865,19 +681,25 @@ def _do_ensure(
         constraint = _make_version_constraint(info, False)
         matcher = distlib.version.LegacyMatcher(name + constraint)
         print(f"mkvenv: checking for {matcher}", file=sys.stderr)
-        ver = _get_version(name)
+
+        dist: Optional[Distribution] = None
+        try:
+            dist = distribution(matcher.name)
+        except PackageNotFoundError:
+            pass
+
         if (
-            ver is None
+            dist is None
             # Always pass installed package to pip, so that they can be
             # updated if the requested version changes
-            or not _is_system_package(name)
-            or not matcher.match(distlib.version.LegacyVersion(ver))
+            or not _is_system_package(dist)
+            or not matcher.match(distlib.version.LegacyVersion(dist.version))
         ):
             absent.append(name + _make_version_constraint(info, True))
             if len(absent) == 1:
                 canary = info.get("canary", None)
         else:
-            logger.info("found %s %s", name, ver)
+            logger.info("found %s %s", name, dist.version)
             present.append(name)
 
     if present:
@@ -1015,12 +837,10 @@ def post_venv_setup() -> None:
     This is intended to be run *inside the venv* after it is created.
     """
     logger.debug("post_venv_setup()")
-    # Test for a broken pip (Debian 10 or derivative?) and fix it if needed
-    if not checkpip():
-        # Finally, generate a 'pip' script so the venv is usable in a normal
-        # way from the CLI. This only happens when we inherited pip from a
-        # parent/system-site and haven't run ensurepip in some way.
-        generate_console_scripts(["pip"])
+    # Generate a 'pip' script so the venv is usable in a normal
+    # way from the CLI. This only happens when we inherited pip from a
+    # parent/system-site and haven't run ensurepip in some way.
+    generate_console_scripts(["pip"])
 
 
 def _add_create_subcommand(subparsers: Any) -> None:
diff --git a/python/setup.cfg b/python/setup.cfg
index e74b58a8c2..8c67dce457 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -14,7 +14,6 @@ classifiers =
     Natural Language :: English
     Operating System :: OS Independent
     Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
@@ -22,7 +21,7 @@ classifiers =
     Typing :: Typed
 
 [options]
-python_requires = >= 3.7
+python_requires = >= 3.8
 packages =
     qemu.qmp
     qemu.machine
@@ -76,7 +75,7 @@ exclude = __pycache__,
 
 [mypy]
 strict = True
-python_version = 3.7
+python_version = 3.8
 warn_unused_configs = True
 namespace_packages = True
 warn_unused_ignores = False
@@ -109,15 +108,6 @@ ignore_missing_imports = True
 [mypy-pygments]
 ignore_missing_imports = True
 
-[mypy-importlib.metadata]
-ignore_missing_imports = True
-
-[mypy-importlib_metadata]
-ignore_missing_imports = True
-
-[mypy-pkg_resources]
-ignore_missing_imports = True
-
 [mypy-distlib]
 ignore_missing_imports = True
 
@@ -192,7 +182,7 @@ multi_line_output=3
 # of python available on your system to run this test.
 
 [tox:tox]
-envlist = py37, py38, py39, py310, py311
+envlist = py38, py39, py310, py311
 skip_missing_interpreters = true
 
 [testenv]
diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt
index 979461be6b..a3f423efd8 100644
--- a/python/tests/minreqs.txt
+++ b/python/tests/minreqs.txt
@@ -1,5 +1,5 @@
 # This file lists the ***oldest possible dependencies*** needed to run
-# "make check" successfully under ***Python 3.7***. It is used primarily
+# "make check" successfully under ***Python 3.8***. It is used primarily
 # by GitLab CI to ensure that our stated minimum versions in setup.cfg
 # are truthful and regularly validated.
 #
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2b1d493d6e..89751d81f2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -409,6 +409,8 @@
 #
 # @zero: whether the virtual blocks read as zeroes
 #
+# @compressed: true if the data is stored compressed (since 8.2)
+#
 # @depth: number of layers (0 = top image, 1 = top image's backing
 #     file, ..., n - 1 = bottom image (where n is the number of images
 #     in the chain)) before reaching one for which the range is
@@ -426,8 +428,8 @@
 ##
 { 'struct': 'MapEntry',
   'data': {'start': 'int', 'length': 'int', 'data': 'bool',
-           'zero': 'bool', 'depth': 'int', 'present': 'bool',
-           '*offset': 'int', '*filename': 'str' } }
+           'zero': 'bool', 'compressed': 'bool', 'depth': 'int',
+           'present': 'bool', '*offset': 'int', '*filename': 'str' } }
 
 ##
 # @BlockdevCacheInfo:
diff --git a/qapi/char.json b/qapi/char.json
index 52aaff25eb..c1bab7b855 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -390,6 +390,10 @@
 #
 # @rows: console height, in chars
 #
+# Note: the options are only effective when the VNC or SDL graphical
+# display backend is active. They are ignored with the GTK, Spice, VNC
+# and D-Bus display backends.
+#
 # Since: 1.5
 ##
 { 'struct': 'ChardevVC',
diff --git a/qapi/net.json b/qapi/net.json
index 313c8a606e..8095b68fa8 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -408,6 +408,60 @@
     'ifname':     'str',
     '*devname':    'str' } }
 
+##
+# @AFXDPMode:
+#
+# Attach mode for a default XDP program
+#
+# @skb: generic mode, no driver support necessary
+#
+# @native: DRV mode, program is attached to a driver, packets are passed to
+#     the socket without allocation of skb.
+#
+# Since: 8.2
+##
+{ 'enum': 'AFXDPMode',
+  'data': [ 'native', 'skb' ],
+  'if': 'CONFIG_AF_XDP' }
+
+##
+# @NetdevAFXDPOptions:
+#
+# AF_XDP network backend
+#
+# @ifname: The name of an existing network interface.
+#
+# @mode: Attach mode for a default XDP program.  If not specified, then
+#     'native' will be tried first, then 'skb'.
+#
+# @force-copy: Force XDP copy mode even if device supports zero-copy.
+#     (default: false)
+#
+# @queues: number of queues to be used for multiqueue interfaces (default: 1).
+#
+# @start-queue: Use @queues starting from this queue number (default: 0).
+#
+# @inhibit: Don't load a default XDP program, use one already loaded to
+#     the interface (default: false).  Requires @sock-fds.
+#
+# @sock-fds: A colon (:) separated list of file descriptors for already open
+#     but not bound AF_XDP sockets in the queue order.  One fd per queue.
+#     These descriptors should already be added into XDP socket map for
+#     corresponding queues.  Requires @inhibit.
+#
+# Since: 8.2
+##
+{ 'struct': 'NetdevAFXDPOptions',
+  'data': {
+    'ifname':       'str',
+    '*mode':        'AFXDPMode',
+    '*force-copy':  'bool',
+    '*queues':      'int',
+    '*start-queue': 'int',
+    '*inhibit':     'bool',
+    '*sock-fds':    'str' },
+  'if': 'CONFIG_AF_XDP' }
+
 ##
 # @NetdevVhostUserOptions:
 #
@@ -642,6 +696,7 @@
 # @vmnet-bridged: since 7.1
 # @stream: since 7.2
 # @dgram: since 7.2
+# @af-xdp: since 8.2
 #
 # Since: 2.7
 ##
@@ -649,6 +704,7 @@
   'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream',
             'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user',
             'vhost-vdpa',
+            { 'name': 'af-xdp', 'if': 'CONFIG_AF_XDP' },
             { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' },
             { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' },
             { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] }
@@ -679,6 +735,8 @@
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
     'netmap':   'NetdevNetmapOptions',
+    'af-xdp':   { 'type': 'NetdevAFXDPOptions',
+                  'if': 'CONFIG_AF_XDP' },
     'vhost-user': 'NetdevVhostUserOptions',
     'vhost-vdpa': 'NetdevVhostVDPAOptions',
     'vmnet-host': { 'type': 'NetdevVmnetHostOptions',
diff --git a/qapi/qom.json b/qapi/qom.json
index fa3e88c8e6..c53ef978ff 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -668,6 +668,20 @@
 # @readonly: if true, the backing file is opened read-only; if false,
 #     it is opened read-write.  (default: false)
 #
+# @rom: whether to create Read Only Memory (ROM) that cannot be modified
+#       by the VM.  Any write attempts to such ROM will be denied.  Most
+#       use cases want writable RAM instead of ROM.  However, selected use
+#       cases, like R/O NVDIMMs, can benefit from ROM.  If set to 'on',
+#       create ROM; if set to 'off', create writable RAM;  if set to
+#       'auto', the value of the @readonly property is used.  This
+#       property is primarily helpful when we want to have proper RAM in
+#       configurations that would traditionally create ROM before this
+#       property was introduced: VM templating, where we want to open a
+#       file readonly (@readonly set to true) and mark the memory to be
+#       private for QEMU (@share set to false).  For this use case, we need
+#       writable RAM instead of ROM, and want to set this property to 'off'.
+#       (default: auto, since 8.2)
+#
 # Since: 2.1
 ##
 { 'struct': 'MemoryBackendFileProperties',
@@ -677,7 +691,8 @@
             '*discard-data': 'bool',
             'mem-path': 'str',
             '*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
-            '*readonly': 'bool' } }
+            '*readonly': 'bool',
+            '*rom': 'OnOffAuto' } }
 
 ##
 # @MemoryBackendMemfdProperties:
diff --git a/qemu-img.c b/qemu-img.c
index 27f48051b0..a48edb7101 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3108,10 +3108,12 @@ static int dump_map_entry(OutputFormat output_format, MapEntry *e,
     case OFORMAT_JSON:
         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
-               " \"data\": %s", e->start, e->length, e->depth,
+               " \"data\": %s, \"compressed\": %s",
+               e->start, e->length, e->depth,
                e->present ? "true" : "false",
                e->zero ? "true" : "false",
-               e->data ? "true" : "false");
+               e->data ? "true" : "false",
+               e->compressed ? "true" : "false");
         if (e->has_offset) {
             printf(", \"offset\": %"PRId64"", e->offset);
         }
@@ -3172,6 +3174,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
         .length = bytes,
         .data = !!(ret & BDRV_BLOCK_DATA),
         .zero = !!(ret & BDRV_BLOCK_ZERO),
+        .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
         .offset = map,
         .has_offset = has_offset,
         .depth = depth,
@@ -3189,6 +3192,7 @@ static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
     }
     if (curr->zero != next->zero ||
         curr->data != next->data ||
+        curr->compressed != next->compressed ||
         curr->depth != next->depth ||
         curr->present != next->present ||
         !curr->filename != !next->filename ||
@@ -3468,8 +3472,8 @@ static int img_snapshot(int argc, char **argv)
 
         ret = bdrv_snapshot_create(bs, &sn);
         if (ret) {
-            error_report("Could not create snapshot '%s': %d (%s)",
-                snapshot_name, ret, strerror(-ret));
+            error_report("Could not create snapshot '%s': %s",
+                snapshot_name, strerror(-ret));
         }
         break;
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index aaccaa3318..30eeb6f3c7 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -73,8 +73,6 @@
 
 #define MBR_SIZE 512
 
-static char *srcpath;
-static SocketAddress *saddr;
 static int persistent = 0;
 static enum { RUNNING, TERMINATE, TERMINATED } state;
 static int shared = 1;
@@ -253,6 +251,29 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
 }
 
 
+struct NbdClientOpts {
+    char *device;
+    char *srcpath;
+    SocketAddress *saddr;
+    int old_stderr;
+    bool fork_process;
+    bool verbose;
+};
+
+static void nbd_client_release_pipe(int old_stderr)
+{
+    /* Close stderr so that the qemu-nbd process exits.  */
+    if (dup2(old_stderr, STDERR_FILENO) < 0) {
+        error_report("Could not release pipe to parent: %s",
+                     strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+    if (old_stderr != STDOUT_FILENO && close(old_stderr) < 0) {
+        error_report("Could not release qemu-nbd: %s", strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+}
+
 #if HAVE_NBD_DEVICE
 static void *show_parts(void *arg)
 {
@@ -271,12 +292,6 @@ static void *show_parts(void *arg)
     return NULL;
 }
 
-struct NbdClientOpts {
-    char *device;
-    bool fork_process;
-    bool verbose;
-};
-
 static void *nbd_client_thread(void *arg)
 {
     struct NbdClientOpts *opts = arg;
@@ -289,14 +304,14 @@ static void *nbd_client_thread(void *arg)
 
     sioc = qio_channel_socket_new();
     if (qio_channel_socket_connect_sync(sioc,
-                                        saddr,
+                                        opts->saddr,
                                         &local_error) < 0) {
         error_report_err(local_error);
         goto out;
     }
 
-    if (nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc),
-                              NULL, NULL, NULL, &info, &local_error) < 0) {
+    if (nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, NULL, NULL,
+                              &info, &local_error) < 0) {
         if (local_error) {
             error_report_err(local_error);
         }
@@ -320,14 +335,9 @@ static void *nbd_client_thread(void *arg)
 
     if (opts->verbose && !opts->fork_process) {
         fprintf(stderr, "NBD device %s is now connected to %s\n",
-                opts->device, srcpath);
+                opts->device, opts->srcpath);
     } else {
-        /* Close stderr so that the qemu-nbd process exits.  */
-        if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
-            error_report("Could not set stderr to /dev/null: %s",
-                         strerror(errno));
-            exit(EXIT_FAILURE);
-        }
+        nbd_client_release_pipe(opts->old_stderr);
     }
 
     if (nbd_client(fd) < 0) {
@@ -519,7 +529,6 @@ int main(int argc, char **argv)
     const char *bindto = NULL;
     const char *port = NULL;
     char *sockpath = NULL;
-    char *device = NULL;
     QemuOpts *sn_opts = NULL;
     const char *sn_id_or_name = NULL;
     const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L";
@@ -582,16 +591,19 @@ int main(int argc, char **argv)
     const char *tlshostname = NULL;
     bool imageOpts = false;
     bool writethrough = false; /* Client will flush as needed. */
-    bool verbose = false;
-    bool fork_process = false;
     bool list = false;
     unsigned socket_activation;
     const char *pid_file_name = NULL;
     const char *selinux_label = NULL;
     BlockExportOptions *export_opts;
-#if HAVE_NBD_DEVICE
-    struct NbdClientOpts opts;
-#endif
+    struct NbdClientOpts opts = {
+        .fork_process = false,
+        .verbose = false,
+        .device = NULL,
+        .srcpath = NULL,
+        .saddr = NULL,
+        .old_stderr = STDOUT_FILENO,
+    };
 
 #ifdef CONFIG_POSIX
     os_setup_early_signal_handling();
@@ -719,7 +731,7 @@ int main(int argc, char **argv)
             disconnect = true;
             break;
         case 'c':
-            device = optarg;
+            opts.device = optarg;
             break;
         case 'e':
             if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 ||
@@ -750,7 +762,7 @@ int main(int argc, char **argv)
             }
             break;
         case 'v':
-            verbose = true;
+            opts.verbose = true;
             break;
         case 'V':
             version(argv[0]);
@@ -782,7 +794,7 @@ int main(int argc, char **argv)
             tlsauthz = optarg;
             break;
         case QEMU_NBD_OPT_FORK:
-            fork_process = true;
+            opts.fork_process = true;
             break;
         case 'L':
             list = true;
@@ -802,12 +814,12 @@ int main(int argc, char **argv)
             exit(EXIT_FAILURE);
         }
         if (export_name || export_description || dev_offset ||
-            device || disconnect || fmt || sn_id_or_name || bitmaps ||
+            opts.device || disconnect || fmt || sn_id_or_name || bitmaps ||
             alloc_depth || seen_aio || seen_discard || seen_cache) {
             error_report("List mode is incompatible with per-device settings");
             exit(EXIT_FAILURE);
         }
-        if (fork_process) {
+        if (opts.fork_process) {
             error_report("List mode is incompatible with forking");
             exit(EXIT_FAILURE);
         }
@@ -832,7 +844,8 @@ int main(int argc, char **argv)
         }
     } else {
         /* Using socket activation - check user didn't use -p etc. */
-        const char *err_msg = socket_activation_validate_opts(device, sockpath,
+        const char *err_msg = socket_activation_validate_opts(opts.device,
+                                                              sockpath,
                                                               bindto, port,
                                                               selinux_label,
                                                               list);
@@ -850,7 +863,7 @@ int main(int argc, char **argv)
     }
 
     if (tlscredsid) {
-        if (device) {
+        if (opts.device) {
             error_report("TLS is not supported with a host device");
             exit(EXIT_FAILURE);
         }
@@ -880,7 +893,7 @@ int main(int argc, char **argv)
 
     if (selinux_label) {
 #ifdef CONFIG_SELINUX
-        if (sockpath == NULL && device == NULL) {
+        if (sockpath == NULL && opts.device == NULL) {
             error_report("--selinux-label is not permitted without --socket");
             exit(EXIT_FAILURE);
         }
@@ -891,13 +904,13 @@ int main(int argc, char **argv)
     }
 
     if (list) {
-        saddr = nbd_build_socket_address(sockpath, bindto, port);
-        return qemu_nbd_client_list(saddr, tlscreds,
+        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
+        return qemu_nbd_client_list(opts.saddr, tlscreds,
                                     tlshostname ? tlshostname : bindto);
     }
 
 #if !HAVE_NBD_DEVICE
-    if (disconnect || device) {
+    if (disconnect || opts.device) {
         error_report("Kernel /dev/nbdN support not available");
         exit(EXIT_FAILURE);
     }
@@ -919,7 +932,7 @@ int main(int argc, char **argv)
     }
 #endif
 
-    if ((device && !verbose) || fork_process) {
+    if ((opts.device && !opts.verbose) || opts.fork_process) {
 #ifndef WIN32
         g_autoptr(GError) err = NULL;
         int stderr_fd[2];
@@ -944,6 +957,16 @@ int main(int argc, char **argv)
 
             close(stderr_fd[0]);
 
+            /* Remember parent's stderr if we will be restoring it. */
+            if (opts.verbose /* fork_process is set */) {
+                opts.old_stderr = dup(STDERR_FILENO);
+                if (opts.old_stderr < 0) {
+                    error_report("Could not dup original stderr: %s",
+                                 strerror(errno));
+                    exit(EXIT_FAILURE);
+                }
+            }
+
             ret = qemu_daemon(1, 0);
             saved_errno = errno;    /* dup2 will overwrite error below */
 
@@ -1002,9 +1025,9 @@ int main(int argc, char **argv)
 #endif /* WIN32 */
     }
 
-    if (device != NULL && sockpath == NULL) {
+    if (opts.device != NULL && sockpath == NULL) {
         sockpath = g_malloc(128);
-        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
+        snprintf(sockpath, 128, SOCKET_PATH, basename(opts.device));
     }
 
     server = qio_net_listener_new();
@@ -1023,8 +1046,8 @@ int main(int argc, char **argv)
             exit(EXIT_FAILURE);
         }
 #endif
-        saddr = nbd_build_socket_address(sockpath, bindto, port);
-        if (qio_net_listener_open_sync(server, saddr, backlog,
+        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
+        if (qio_net_listener_open_sync(server, opts.saddr, backlog,
                                        &local_err) < 0) {
             object_unref(OBJECT(server));
             error_report_err(local_err);
@@ -1059,19 +1082,19 @@ int main(int argc, char **argv)
     bdrv_init();
     atexit(qemu_nbd_shutdown);
 
-    srcpath = argv[optind];
+    opts.srcpath = argv[optind];
     if (imageOpts) {
-        QemuOpts *opts;
+        QemuOpts *o;
         if (fmt) {
             error_report("--image-opts and -f are mutually exclusive");
             exit(EXIT_FAILURE);
         }
-        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
-        if (!opts) {
+        o = qemu_opts_parse_noisily(&file_opts, opts.srcpath, true);
+        if (!o) {
             qemu_opts_reset(&file_opts);
             exit(EXIT_FAILURE);
         }
-        options = qemu_opts_to_qdict(opts, NULL);
+        options = qemu_opts_to_qdict(o, NULL);
         qemu_opts_reset(&file_opts);
         blk = blk_new_open(NULL, NULL, options, flags, &local_err);
     } else {
@@ -1079,7 +1102,7 @@ int main(int argc, char **argv)
             options = qdict_new();
             qdict_put_str(options, "driver", fmt);
         }
-        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
+        blk = blk_new_open(opts.srcpath, NULL, options, flags, &local_err);
     }
 
     if (!blk) {
@@ -1145,15 +1168,9 @@ int main(int argc, char **argv)
     blk_exp_add(export_opts, &error_fatal);
     qapi_free_BlockExportOptions(export_opts);
 
-    if (device) {
+    if (opts.device) {
 #if HAVE_NBD_DEVICE
         int ret;
-        opts = (struct NbdClientOpts) {
-            .device = device,
-            .fork_process = fork_process,
-            .verbose = verbose,
-        };
-
         ret = pthread_create(&client_thread, NULL, nbd_client_thread, &opts);
         if (ret != 0) {
             error_report("Failed to create client thread: %s", strerror(ret));
@@ -1179,12 +1196,8 @@ int main(int argc, char **argv)
         exit(EXIT_FAILURE);
     }
 
-    if (fork_process) {
-        if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
-            error_report("Could not set stderr to /dev/null: %s",
-                         strerror(errno));
-            exit(EXIT_FAILURE);
-        }
+    if (opts.fork_process) {
+        nbd_client_release_pipe(opts.old_stderr);
     }
 
     state = RUNNING;
@@ -1203,7 +1216,7 @@ int main(int argc, char **argv)
 
     qemu_opts_del(sn_opts);
 
-    if (device) {
+    if (opts.device) {
         void *ret;
         pthread_join(client_thread, &ret);
         exit(ret != NULL);
diff --git a/qemu-options.hx b/qemu-options.hx
index 29b98c3d4c..bcd77255cb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -26,7 +26,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "-machine [type=]name[,prop[=value][,...]]\n"
     "                selects emulated machine ('-machine help' for list)\n"
     "                property accel=accel1[:accel2[:...]] selects accelerator\n"
-    "                supported accelerators are kvm, xen, hax, hvf, nvmm, whpx or tcg (default: tcg)\n"
+    "                supported accelerators are kvm, xen, hvf, nvmm, whpx or tcg (default: tcg)\n"
     "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
     "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
     "                mem-merge=on|off controls memory merge support (default: on)\n"
@@ -59,7 +59,7 @@ SRST
 
     ``accel=accels1[:accels2[:...]]``
         This is used to enable an accelerator. Depending on the target
-        architecture, kvm, xen, hax, hvf, nvmm, whpx or tcg can be available.
+        architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available.
         By default, tcg is used. If there is more than one accelerator
         specified, the next one is used if the previous one fails to
         initialize.
@@ -178,7 +178,7 @@ ERST
 
 DEF("accel", HAS_ARG, QEMU_OPTION_accel,
     "-accel [accel=]accelerator[,prop[=value][,...]]\n"
-    "                select accelerator (kvm, xen, hax, hvf, nvmm, whpx or tcg; use 'help' for a list)\n"
+    "                select accelerator (kvm, xen, hvf, nvmm, whpx or tcg; use 'help' for a list)\n"
     "                igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
     "                kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
     "                kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
@@ -186,12 +186,13 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
     "                split-wx=on|off (enable TCG split w^x mapping)\n"
     "                tb-size=n (TCG translation block cache size)\n"
     "                dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
+    "                eager-split-size=n (KVM Eager Page Split chunk size, default 0, disabled. ARM only)\n"
     "                notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n"
     "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
 SRST
 ``-accel name[,prop=value[,...]]``
     This is used to enable an accelerator. Depending on the target
-    architecture, kvm, xen, hax, hvf, nvmm, whpx or tcg can be available. By
+    architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available. By
     default, tcg is used. If there is more than one accelerator
     specified, the next one is used if the previous one fails to
     initialize.
@@ -244,6 +245,20 @@ SRST
         is disabled (dirty-ring-size=0).  When enabled, KVM will instead
         record dirty pages in a bitmap.
 
+    ``eager-split-size=n``
+        KVM implements dirty page logging at the PAGE_SIZE granularity and
+        enabling dirty-logging on a huge-page requires breaking it into
+        PAGE_SIZE pages in the first place. KVM on ARM does this splitting
+        lazily by default. There are performance benefits in doing huge-page
+        split eagerly, especially in situations where TLBI costs associated
+        with break-before-make sequences are considerable and also if guest
+        workloads are read intensive. The size here specifies how many pages
+        to break at a time and needs to be a valid block size which is
+        1GB/2MB/4KB, 32MB/16KB and 512MB/64KB for 4KB/16KB/64KB PAGE_SIZE
+        respectively. Be wary of specifying a higher size as it will have an
+        impact on the memory. By default, this feature is disabled
+        (eager-split-size=0).
+
     ``notify-vmexit=run|internal-error|disable,notify-window=n``
         Enables or disables notify VM exit support on x86 host and specify
         the corresponding notify window to trigger the VM exit if enabled.
@@ -1209,10 +1224,10 @@ SRST
 ERST
 
 DEF("hda", HAS_ARG, QEMU_OPTION_hda,
-    "-hda/-hdb file  use 'file' as IDE hard disk 0/1 image\n", QEMU_ARCH_ALL)
+    "-hda/-hdb file  use 'file' as hard disk 0/1 image\n", QEMU_ARCH_ALL)
 DEF("hdb", HAS_ARG, QEMU_OPTION_hdb, "", QEMU_ARCH_ALL)
 DEF("hdc", HAS_ARG, QEMU_OPTION_hdc,
-    "-hdc/-hdd file  use 'file' as IDE hard disk 2/3 image\n", QEMU_ARCH_ALL)
+    "-hdc/-hdd file  use 'file' as hard disk 2/3 image\n", QEMU_ARCH_ALL)
 DEF("hdd", HAS_ARG, QEMU_OPTION_hdd, "", QEMU_ARCH_ALL)
 SRST
 ``-hda file``
@@ -1222,18 +1237,22 @@ SRST
 ``-hdc file``
   \ 
 ``-hdd file``
-    Use file as hard disk 0, 1, 2 or 3 image (see the :ref:`disk images`
-    chapter in the System Emulation Users Guide).
+    Use file as hard disk 0, 1, 2 or 3 image on the default bus of the
+    emulated machine (this is for example the IDE bus on most x86 machines,
+    but it can also be SCSI, virtio or something else on other target
+    architectures). See also the :ref:`disk images` chapter in the System
+    Emulation Users Guide.
 ERST
 
 DEF("cdrom", HAS_ARG, QEMU_OPTION_cdrom,
-    "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n",
+    "-cdrom file     use 'file' as CD-ROM image\n",
     QEMU_ARCH_ALL)
 SRST
 ``-cdrom file``
-    Use file as CD-ROM image (you cannot use ``-hdc`` and ``-cdrom`` at
-    the same time). You can use the host CD-ROM by using ``/dev/cdrom``
-    as filename.
+    Use file as CD-ROM image on the default bus of the emulated machine
+    (which is IDE1 master on x86, so you cannot use ``-hdc`` and ``-cdrom``
+    at the same time there). On systems that support it, you can use the
+    host CD-ROM by using ``/dev/cdrom`` as filename.
 ERST
 
 DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev,
@@ -2863,6 +2882,19 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
     "                netmap device, defaults to '/dev/netmap')\n"
 #endif
+#ifdef CONFIG_AF_XDP
+    "-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off]\n"
+    "         [,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]\n"
+    "                attach to the existing network interface 'name' with AF_XDP socket\n"
+    "                use 'mode=MODE' to specify an XDP program attach mode\n"
+    "                use 'force-copy=on|off' to force XDP copy mode even if device supports zero-copy (default: off)\n"
+    "                use 'inhibit=on|off' to inhibit loading of a default XDP program (default: off)\n"
+    "                with inhibit=on,\n"
+    "                  use 'sock-fds' to provide file descriptors for already open AF_XDP sockets\n"
+    "                  added to a socket map in XDP program.  One socket per queue.\n"
+    "                use 'queues=n' to specify how many queues of a multiqueue interface should be used\n"
+    "                use 'start-queue=m' to specify the first queue that should be used\n"
+#endif
 #ifdef CONFIG_POSIX
     "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
     "                configure a vhost-user network, backed by a chardev 'dev'\n"
@@ -2908,6 +2940,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
 #ifdef CONFIG_POSIX
     "vhost-user|"
 #endif
@@ -2936,6 +2971,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
 #ifdef CONFIG_VMNET
     "vmnet-host|vmnet-shared|vmnet-bridged|"
 #endif
@@ -2943,7 +2981,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                old way to initialize a host network interface\n"
     "                (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
 SRST
-``-nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
+``-nic [tap|bridge|user|l2tpv3|vde|netmap|af-xdp|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
     This option is a shortcut for configuring both the on-board
     (default) guest NIC hardware and the host network backend in one go.
     The host backend options are the same as with the corresponding
@@ -3357,6 +3395,55 @@ SRST
         # launch QEMU instance
         |qemu_system| linux.img -nic vde,sock=/tmp/myswitch
 
+``-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off][,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]``
+    Configure AF_XDP backend to connect to a network interface 'name'
+    using AF_XDP socket.  A specific program attach mode for a default
+    XDP program can be forced with 'mode', defaults to best-effort,
+    where the likely most performant mode will be in use.  Number of queues
+    'n' should generally match the number or queues in the interface,
+    defaults to 1.  Traffic arriving on non-configured device queues will
+    not be delivered to the network backend.
+
+    .. parsed-literal::
+
+        # set number of queues to 4
+        ethtool -L eth0 combined 4
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=4
+
+    'start-queue' option can be specified if a particular range of queues
+    [m, m + n] should be in use.  For example, this is may be necessary in
+    order to use certain NICs in native mode.  Kernel allows the driver to
+    create a separate set of XDP queues on top of regular ones, and only
+    these queues can be used for AF_XDP sockets.  NICs that work this way
+    may also require an additional traffic redirection with ethtool to these
+    special queues.
+
+    .. parsed-literal::
+
+        # set number of queues to 1
+        ethtool -L eth0 combined 1
+        # redirect all the traffic to the second queue (id: 1)
+        # note: drivers may require non-empty key/mask pair.
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:00 m FF:FF:FF:FF:FF:FE action 1
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:01 m FF:FF:FF:FF:FF:FE action 1
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=1,start-queue=1
+
+    XDP program can also be loaded externally.  In this case 'inhibit' option
+    should be set to 'on' and 'sock-fds' provided with file descriptors for
+    already open but not bound XDP sockets already added to a socket map for
+    corresponding queues.  One socket per queue.
+
+    .. parsed-literal::
+
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=3,inhibit=on,sock-fds=15:16:17
+
 ``-netdev vhost-user,chardev=id[,vhostforce=on|off][,queues=n]``
     Establish a vhost-user netdev, backed by a chardev id. The chardev
     should be a unix domain socket backed one. The vhost-user uses a
@@ -4976,7 +5063,7 @@ SRST
     they are specified. Note that the 'id' property must be set. These
     objects are placed in the '/objects' path.
 
-    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off``
+    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off,rom=on|off|auto``
         Creates a memory file backend object, which can be used to back
         the guest RAM with huge pages.
 
@@ -5066,6 +5153,20 @@ SRST
         The ``readonly`` option specifies whether the backing file is opened
         read-only or read-write (default).
 
+        The ``rom`` option specifies whether to create Read Only Memory
+        (ROM) that cannot be modified by the VM. Any write attempts to such
+        ROM will be denied. Most use cases want proper RAM instead of ROM.
+        However, selected use cases, like R/O NVDIMMs, can benefit from
+        ROM. If set to ``on``, create ROM; if set to ``off``, create
+        writable RAM; if set to ``auto`` (default), the value of the
+        ``readonly`` option is used. This option is primarily helpful when
+        we want to have writable RAM in configurations that would
+        traditionally create ROM before the ``rom`` option was introduced:
+        VM templating, where we want to open a file readonly
+        (``readonly=on``) and mark the memory to be private for QEMU
+        (``share=off``). For this use case, we need writable RAM instead
+        of ROM, and want to also set ``rom=off``.
+
     ``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
         Creates a memory backend object, which can be used to back the
         guest RAM. Memory backend objects offer more control than the
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 0c5175d957..465d688ecb 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -152,7 +152,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path,
 #ifdef __FreeBSD__
         /*
          * In the default state channel sends echo of every command to a
-         * client. The client programm doesn't expect this and raises an
+         * client. The client program doesn't expect this and raises an
          * error. Suppress echo by resetting ECHO terminal flag.
          */
         struct termios tio;
diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c
index f3a580b8cc..236f80de44 100644
--- a/qga/commands-posix-ssh.c
+++ b/qga/commands-posix-ssh.c
@@ -382,7 +382,7 @@ test_add_keys(void)
                                       &err);
     g_assert(err == NULL);
 
-    /*  key2 came first, and should'nt be duplicated */
+    /*  key2 came first, and shouldn't be duplicated */
     test_authorized_keys_equal("algo key2 comments\n"
                                "algo key1 comments");
 }
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index def857d773..6169bbf7a0 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -3249,7 +3249,7 @@ GuestUserList *qmp_guest_get_users(Error **errp)
 
 #endif
 
-/* Replace escaped special characters with theire real values. The replacement
+/* Replace escaped special characters with their real values. The replacement
  * is done in place -- returned value is in the original string.
  */
 static void ga_osrelease_replace_special(gchar *value)
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index d23875264f..6beae659b7 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -487,7 +487,7 @@ static GuestDiskBusType win2qemu[] = {
     [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL,
     [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL,
     /*
-     * BusTypeSpaces currently is not suported
+     * BusTypeSpaces currently is not supported
      */
     [BusTypeSpaces] = GUEST_DISK_BUS_TYPE_UNKNOWN,
     [BusTypeNvme] = GUEST_DISK_BUS_TYPE_NVME,
@@ -2259,7 +2259,7 @@ static char *ga_get_win_product_name(Error **errp)
         }
     }
     if (err != ERROR_SUCCESS) {
-        error_setg_win32(errp, err, "failed to retrive ProductName");
+        error_setg_win32(errp, err, "failed to retrieve ProductName");
         goto fail;
     }
 
diff --git a/qga/main.c b/qga/main.c
index 002161a0cc..8668b9f3d3 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -1333,7 +1333,7 @@ static bool check_is_frozen(GAState *s)
     /* check if a previous instance of qemu-ga exited with filesystems' state
      * marked as frozen. this could be a stale value (a non-qemu-ga process
      * or reboot may have since unfrozen them), but better to require an
-     * uneeded unfreeze than to risk hanging on start-up
+     * unneeded unfreeze than to risk hanging on start-up
      */
     struct stat st;
     if (stat(s->state_filepath_isfrozen, &st) == -1) {
diff --git a/qga/meson.build b/qga/meson.build
index dd18092f56..59cae0cc6e 100644
--- a/qga/meson.build
+++ b/qga/meson.build
@@ -85,7 +85,7 @@ qga_ss.add(when: 'CONFIG_WIN32', if_true: files(
   'vss-win32.c'
 ))
 
-qga_ss = qga_ss.apply(config_host, strict: false)
+qga_ss = qga_ss.apply(config_targetos, strict: false)
 
 gen_tlb = []
 qga_libs = []
@@ -180,7 +180,7 @@ test_env.set('G_TEST_BUILDDIR', meson.current_build_dir())
 # the leak detector in build-oss-fuzz Gitlab CI test. we should re-enable
 # this when an alternative is implemented or when the underlying glib
 # issue is identified/fix
-#if 'CONFIG_POSIX' in config_host
+#if targetos != 'windows'
 if false
   srcs = [files('commands-posix-ssh.c')]
   i = 0
diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp
index ae38662a62..84944133f7 100644
--- a/qga/vss-win32/install.cpp
+++ b/qga/vss-win32/install.cpp
@@ -343,7 +343,7 @@ STDAPI COMRegister(void)
                                    _bstr_t(dllPath), _bstr_t(tlbPath),
                                    _bstr_t("")));
 
-    /* Setup roles of the applicaion */
+    /* Setup roles of the application */
 
     chk(getNameByStringSID(administratorsGroupSID, buffer, &bufferLen));
     chk(pApps->GetCollection(_bstr_t(L"Roles"), key,
@@ -439,7 +439,7 @@ STDAPI DllRegisterServer(void)
         goto out;
     }
 
-    /* Add this module to registery */
+    /* Add this module to registry */
 
     sprintf(key, "CLSID\\%s", g_szClsid);
     if (!CreateRegistryKey(key, NULL, g_szClsid)) {
diff --git a/roms/edk2 b/roms/edk2
index f80f052277..819cfc6b42 160000
--- a/roms/edk2
+++ b/roms/edk2
@@ -1 +1 @@
-Subproject commit f80f052277c88a67c55e107b550f504eeea947d3
+Subproject commit 819cfc6b42a68790a23509e4fcc58ceb70e1965e
diff --git a/roms/edk2-build.config b/roms/edk2-build.config
index 66ef9ffcb9..bab6a9caeb 100644
--- a/roms/edk2-build.config
+++ b/roms/edk2-build.config
@@ -26,6 +26,9 @@ DEBUG_PRINT_ERROR_LEVEL  = 0x80000000
 # grub.efi uses EfiLoaderData for code
 PcdDxeNxMemoryProtectionPolicy = 0xC000000000007FD1
 
+[pcds.workaround.202308]
+PcdFirstTimeWakeUpAPsBySipi = FALSE
+
 ####################################################################################
 # i386
 
@@ -57,6 +60,7 @@ desc = ovmf build (64-bit)
 conf = OvmfPkg/OvmfPkgX64.dsc
 arch = X64
 opts = common
+pcds = workaround.202308
 plat = OvmfX64
 dest = ../pc-bios
 cpy1 = FV/OVMF_CODE.fd edk2-x86_64-code.fd
@@ -67,6 +71,7 @@ conf = OvmfPkg/OvmfPkgIa32X64.dsc
 arch = IA32 X64
 opts = common
        ovmf.sb.smm
+pcds = workaround.202308
 plat = Ovmf3264
 dest = ../pc-bios
 cpy1 = FV/OVMF_CODE.fd edk2-x86_64-secure-code.fd
@@ -76,6 +81,7 @@ desc = ovmf build for microvm
 conf = OvmfPkg/Microvm/MicrovmX64.dsc
 arch = X64
 opts = common
+pcds = workaround.202308
 plat = MicrovmX64
 dest = ../pc-bios
 cpy1 = FV/MICROVM.fd  edk2-x86_64-microvm.fd
@@ -120,5 +126,7 @@ conf = OvmfPkg/RiscVVirt/RiscVVirtQemu.dsc
 arch = RISCV64
 plat = RiscVVirtQemu
 dest = ../pc-bios
-cpy1 = FV/RISCV_VIRT.fd  edk2-riscv.fd
-pad1 = edk2-riscv.fd     32m
+cpy1 = FV/RISCV_VIRT_CODE.fd  edk2-riscv-code.fd
+cpy2 = FV/RISCV_VIRT_VARS.fd  edk2-riscv-vars.fd
+pad1 = edk2-riscv-code.fd     32m
+pad2 = edk2-riscv-vars.fd     32m
diff --git a/roms/edk2-build.py b/roms/edk2-build.py
index 870893f7c8..e564765aaa 100755
--- a/roms/edk2-build.py
+++ b/roms/edk2-build.py
@@ -6,6 +6,7 @@ https://gitlab.com/kraxel/edk2-build-config
 """
 import os
 import sys
+import time
 import shutil
 import argparse
 import subprocess
@@ -45,19 +46,28 @@ def get_coredir(cfg):
         return os.path.abspath(cfg['global']['core'])
     return os.getcwd()
 
-def get_version(cfg):
+def get_toolchain(cfg, build):
+    if cfg.has_option(build, 'tool'):
+        return cfg[build]['tool']
+    if cfg.has_option('global', 'tool'):
+        return cfg['global']['tool']
+    return 'GCC5'
+
+def get_version(cfg, silent = False):
     coredir = get_coredir(cfg)
     if version_override:
         version = version_override
-        print('')
-        print(f'### version [override]: {version}')
+        if not silent:
+            print('')
+            print(f'### version [override]: {version}')
         return version
     if os.environ.get('RPM_PACKAGE_NAME'):
         version = os.environ.get('RPM_PACKAGE_NAME')
         version += '-' + os.environ.get('RPM_PACKAGE_VERSION')
         version += '-' + os.environ.get('RPM_PACKAGE_RELEASE')
-        print('')
-        print(f'### version [rpmbuild]: {version}')
+        if not silent:
+            print('')
+            print(f'### version [rpmbuild]: {version}')
         return version
     if os.path.exists(coredir + '/.git'):
         cmdline = [ 'git', 'describe', '--tags', '--abbrev=8',
@@ -66,16 +76,17 @@ def get_version(cfg):
                                 stdout = subprocess.PIPE,
                                 check = True)
         version = result.stdout.decode().strip()
-        print('')
-        print(f'### version [git]: {version}')
+        if not silent:
+            print('')
+            print(f'### version [git]: {version}')
         return version
     return None
 
 def pcd_string(name, value):
     return f'{name}=L{value}\\0'
 
-def pcd_version(cfg):
-    version = get_version(cfg)
+def pcd_version(cfg, silent = False):
+    version = get_version(cfg, silent)
     if version is None:
         return []
     return [ '--pcd', pcd_string('PcdFirmwareVersionString', version) ]
@@ -85,49 +96,58 @@ def pcd_release_date():
         return []
     return [ '--pcd', pcd_string('PcdFirmwareReleaseDateString', release_date) ]
 
-def build_message(line, line2 = None):
+def build_message(line, line2 = None, silent = False):
     if os.environ.get('TERM') in [ 'xterm', 'xterm-256color' ]:
         # setxterm  title
         start  = '\x1b]2;'
         end    = '\x07'
         print(f'{start}{rebase_prefix}{line}{end}', end = '')
 
-    print('')
-    print('###')
-    print(f'### {rebase_prefix}{line}')
-    if line2:
-        print(f'### {line2}')
-    print('###', flush = True)
-
-def build_run(cmdline, name, section, silent = False):
-    print(cmdline, flush = True)
     if silent:
-        print('### building in silent mode ...', flush = True)
+        print(f'### {rebase_prefix}{line}', flush = True)
+    else:
+        print('')
+        print('###')
+        print(f'### {rebase_prefix}{line}')
+        if line2:
+            print(f'### {line2}')
+        print('###', flush = True)
+
+def build_run(cmdline, name, section, silent = False, nologs = False):
+    if silent:
+        logfile = f'{section}.log'
+        if nologs:
+            print(f'### building in silent mode [no log] ...', flush = True)
+        else:
+            print(f'### building in silent mode [{logfile}] ...', flush = True)
+        start = time.time()
         result = subprocess.run(cmdline, check = False,
                                 stdout = subprocess.PIPE,
                                 stderr = subprocess.STDOUT)
-
-        logfile = f'{section}.log'
-        print(f'### writing log to {logfile} ...')
-        with open(logfile, 'wb') as f:
-            f.write(result.stdout)
+        if not nologs:
+            with open(logfile, 'wb') as f:
+                f.write(result.stdout)
 
         if result.returncode:
             print('### BUILD FAILURE')
+            print('### cmdline')
+            print(cmdline)
             print('### output')
             print(result.stdout.decode())
             print(f'### exit code: {result.returncode}')
         else:
-            print('### OK')
+            secs = int(time.time() - start)
+            print(f'### OK ({int(secs/60)}:{secs%60:02d})')
     else:
+        print(cmdline, flush = True)
         result = subprocess.run(cmdline, check = False)
     if result.returncode:
         print(f'ERROR: {cmdline[0]} exited with {result.returncode}'
               f' while building {name}')
         sys.exit(result.returncode)
 
-def build_copy(plat, tgt, dstdir, copy):
-    srcdir = f'Build/{plat}/{tgt}_GCC5'
+def build_copy(plat, tgt, toolchain, dstdir, copy):
+    srcdir = f'Build/{plat}/{tgt}_{toolchain}'
     names = copy.split()
     srcfile = names[0]
     if len(names) > 1:
@@ -156,66 +176,68 @@ def pad_file(dstdir, pad):
     subprocess.run(cmdline, check = True)
 
 # pylint: disable=too-many-branches
-def build_one(cfg, build, jobs = None, silent = False):
-    cmdline  = [ 'build' ]
-    cmdline += [ '-t', 'GCC5' ]
-    cmdline += [ '-p', cfg[build]['conf'] ]
+def build_one(cfg, build, jobs = None, silent = False, nologs = False):
+    b = cfg[build]
 
-    if (cfg[build]['conf'].startswith('OvmfPkg/') or
-        cfg[build]['conf'].startswith('ArmVirtPkg/')):
-        cmdline += pcd_version(cfg)
+    cmdline  = [ 'build' ]
+    cmdline += [ '-t', get_toolchain(cfg, build) ]
+    cmdline += [ '-p', b['conf'] ]
+
+    if (b['conf'].startswith('OvmfPkg/') or
+        b['conf'].startswith('ArmVirtPkg/')):
+        cmdline += pcd_version(cfg, silent)
         cmdline += pcd_release_date()
 
     if jobs:
         cmdline += [ '-n', jobs ]
-    for arch in cfg[build]['arch'].split():
+    for arch in b['arch'].split():
         cmdline += [ '-a', arch ]
-    if 'opts' in cfg[build]:
-        for name in cfg[build]['opts'].split():
+    if 'opts' in b:
+        for name in b['opts'].split():
             section = 'opts.' + name
             for opt in cfg[section]:
                 cmdline += [ '-D', opt + '=' + cfg[section][opt] ]
-    if 'pcds' in cfg[build]:
-        for name in cfg[build]['pcds'].split():
+    if 'pcds' in b:
+        for name in b['pcds'].split():
             section = 'pcds.' + name
             for pcd in cfg[section]:
                 cmdline += [ '--pcd', pcd + '=' + cfg[section][pcd] ]
-    if 'tgts' in cfg[build]:
-        tgts = cfg[build]['tgts'].split()
+    if 'tgts' in b:
+        tgts = b['tgts'].split()
     else:
         tgts = [ 'DEBUG' ]
     for tgt in tgts:
         desc = None
-        if 'desc' in cfg[build]:
-            desc = cfg[build]['desc']
-        build_message(f'building: {cfg[build]["conf"]} ({cfg[build]["arch"]}, {tgt})',
-                      f'description: {desc}')
+        if 'desc' in b:
+            desc = b['desc']
+        build_message(f'building: {b["conf"]} ({b["arch"]}, {tgt})',
+                      f'description: {desc}',
+                      silent = silent)
         build_run(cmdline + [ '-b', tgt ],
-                  cfg[build]['conf'],
+                  b['conf'],
                   build + '.' + tgt,
-                  silent)
+                  silent,
+                  nologs)
 
-        if 'plat' in cfg[build]:
+        if 'plat' in b:
             # copy files
-            for cpy in cfg[build]:
+            for cpy in b:
                 if not cpy.startswith('cpy'):
                     continue
-                build_copy(cfg[build]['plat'],
-                           tgt,
-                           cfg[build]['dest'],
-                           cfg[build][cpy])
+                build_copy(b['plat'], tgt,
+                           get_toolchain(cfg, build),
+                           b['dest'], b[cpy])
             # pad builds
-            for pad in cfg[build]:
+            for pad in b:
                 if not pad.startswith('pad'):
                     continue
-                pad_file(cfg[build]['dest'],
-                         cfg[build][pad])
+                pad_file(b['dest'], b[pad])
 
-def build_basetools(silent = False):
-    build_message('building: BaseTools')
+def build_basetools(silent = False, nologs = False):
+    build_message('building: BaseTools', silent = silent)
     basedir = os.environ['EDK_TOOLS_PATH']
     cmdline = [ 'make', '-C', basedir ]
-    build_run(cmdline, 'BaseTools', 'build.basetools', silent)
+    build_run(cmdline, 'BaseTools', 'build.basetools', silent, nologs)
 
 def binary_exists(name):
     for pdir in os.environ['PATH'].split(':'):
@@ -223,7 +245,7 @@ def binary_exists(name):
             return True
     return False
 
-def prepare_env(cfg):
+def prepare_env(cfg, silent = False):
     """ mimic Conf/BuildEnv.sh """
     workspace = os.getcwd()
     packages = [ workspace, ]
@@ -253,7 +275,7 @@ def prepare_env(cfg):
     toolsdef = coredir + '/Conf/tools_def.txt'
     if not os.path.exists(toolsdef):
         os.makedirs(os.path.dirname(toolsdef), exist_ok = True)
-        build_message('running BaseTools/BuildEnv')
+        build_message('running BaseTools/BuildEnv', silent = silent)
         cmdline = [ 'bash', 'BaseTools/BuildEnv' ]
         subprocess.run(cmdline, cwd = coredir, check = True)
 
@@ -267,20 +289,32 @@ def prepare_env(cfg):
     os.environ['PYTHONHASHSEED'] = '1'
 
     # for cross builds
-    if binary_exists('arm-linux-gnu-gcc'):
+    if binary_exists('arm-linux-gnueabi-gcc'):
+        # ubuntu
+        os.environ['GCC5_ARM_PREFIX'] = 'arm-linux-gnueabi-'
+        os.environ['GCC_ARM_PREFIX'] = 'arm-linux-gnueabi-'
+    elif binary_exists('arm-linux-gnu-gcc'):
+        # fedora
         os.environ['GCC5_ARM_PREFIX'] = 'arm-linux-gnu-'
+        os.environ['GCC_ARM_PREFIX'] = 'arm-linux-gnu-'
     if binary_exists('loongarch64-linux-gnu-gcc'):
         os.environ['GCC5_LOONGARCH64_PREFIX'] = 'loongarch64-linux-gnu-'
+        os.environ['GCC_LOONGARCH64_PREFIX'] = 'loongarch64-linux-gnu-'
 
     hostarch = os.uname().machine
     if binary_exists('aarch64-linux-gnu-gcc') and hostarch != 'aarch64':
         os.environ['GCC5_AARCH64_PREFIX'] = 'aarch64-linux-gnu-'
+        os.environ['GCC_AARCH64_PREFIX'] = 'aarch64-linux-gnu-'
     if binary_exists('riscv64-linux-gnu-gcc') and hostarch != 'riscv64':
         os.environ['GCC5_RISCV64_PREFIX'] = 'riscv64-linux-gnu-'
+        os.environ['GCC_RISCV64_PREFIX'] = 'riscv64-linux-gnu-'
     if binary_exists('x86_64-linux-gnu-gcc') and hostarch != 'x86_64':
         os.environ['GCC5_IA32_PREFIX'] = 'x86_64-linux-gnu-'
         os.environ['GCC5_X64_PREFIX'] = 'x86_64-linux-gnu-'
         os.environ['GCC5_BIN'] = 'x86_64-linux-gnu-'
+        os.environ['GCC_IA32_PREFIX'] = 'x86_64-linux-gnu-'
+        os.environ['GCC_X64_PREFIX'] = 'x86_64-linux-gnu-'
+        os.environ['GCC_BIN'] = 'x86_64-linux-gnu-'
 
 def build_list(cfg):
     for build in cfg.sections():
@@ -303,10 +337,12 @@ def main():
     parser.add_argument('-j', '--jobs', dest = 'jobs', type = str,
                         help = 'allow up to JOBS parallel build jobs',
                         metavar = 'JOBS')
-    parser.add_argument('-m', '--match', dest = 'match', type = str,
+    parser.add_argument('-m', '--match', dest = 'match',
+                        type = str, action = 'append',
                         help = 'only run builds matching INCLUDE (substring)',
                         metavar = 'INCLUDE')
-    parser.add_argument('-x', '--exclude', dest = 'exclude', type = str,
+    parser.add_argument('-x', '--exclude', dest = 'exclude',
+                        type = str, action = 'append',
                         help = 'skip builds matching EXCLUDE (substring)',
                         metavar = 'EXCLUDE')
     parser.add_argument('-l', '--list', dest = 'list',
@@ -316,6 +352,9 @@ def main():
                         action = 'store_true', default = False,
                         help = 'write build output to logfiles, '
                         'write to console only on errors')
+    parser.add_argument('--no-logs', dest = 'nologs',
+                        action = 'store_true', default = False,
+                        help = 'do not write build log files (with --silent)')
     parser.add_argument('--core', dest = 'core', type = str, metavar = 'DIR',
                         help = 'location of the core edk2 repository '
                         '(i.e. where BuildTools are located)')
@@ -323,6 +362,9 @@ def main():
                         type = str, action = 'append', metavar = 'DIR',
                         help = 'location(s) of additional packages '
                         '(can be specified multiple times)')
+    parser.add_argument('-t', '--toolchain', dest = 'toolchain',
+                        type = str, metavar = 'NAME',
+                        help = 'tool chain to be used to build edk2')
     parser.add_argument('--version-override', dest = 'version_override',
                         type = str, metavar = 'VERSION',
                         help = 'set firmware build version')
@@ -335,7 +377,7 @@ def main():
         os.chdir(options.directory)
 
     if not os.path.exists(options.configfile):
-        print('config file "{options.configfile}" not found')
+        print(f'config file "{options.configfile}" not found')
         return 1
 
     cfg = configparser.ConfigParser()
@@ -344,7 +386,7 @@ def main():
 
     if options.list:
         build_list(cfg)
-        return
+        return 0
 
     if not cfg.has_section('global'):
         cfg.add_section('global')
@@ -352,6 +394,8 @@ def main():
         cfg.set('global', 'core', options.core)
     if options.pkgs:
         cfg.set('global', 'pkgs', ' '.join(options.pkgs))
+    if options.toolchain:
+        cfg.set('global', 'tool', options.toolchain)
 
     global version_override
     global release_date
@@ -361,18 +405,28 @@ def main():
     if options.release_date:
         release_date = options.release_date
 
-    prepare_env(cfg)
-    build_basetools(options.silent)
+    prepare_env(cfg, options.silent)
+    build_basetools(options.silent, options.nologs)
     for build in cfg.sections():
         if not build.startswith('build.'):
             continue
-        if options.match and options.match not in build:
-            print(f'# skipping "{build}" (not matching "{options.match}")')
-            continue
-        if options.exclude and options.exclude in build:
-            print(f'# skipping "{build}" (matching "{options.exclude}")')
-            continue
-        build_one(cfg, build, options.jobs, options.silent)
+        if options.match:
+            matching = False
+            for item in options.match:
+                if item in build:
+                    matching = True
+            if not matching:
+                print(f'# skipping "{build}" (not matching "{"|".join(options.match)}")')
+                continue
+        if options.exclude:
+            exclude = False
+            for item in options.exclude:
+                if item in build:
+                    print(f'# skipping "{build}" (matching "{item}")')
+                    exclude = True
+            if exclude:
+                continue
+        build_one(cfg, build, options.jobs, options.silent, options.nologs)
 
     return 0
 
diff --git a/roms/seabios-hppa b/roms/seabios-hppa
index 673d2595d4..763e3b7349 160000
--- a/roms/seabios-hppa
+++ b/roms/seabios-hppa
@@ -1 +1 @@
-Subproject commit 673d2595d4f773cc266cbf8dbaf2f475a6adb949
+Subproject commit 763e3b73499db5fef94087bd310bfc8ccbcf7858
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
index d4a183db61..685d0b4ed4 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -71,10 +71,13 @@ class FuncDecl:
         self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
         self.create_only_co = 'mixed' not in variant
         self.graph_rdlock = 'bdrv_rdlock' in variant
+        self.graph_wrlock = 'bdrv_wrlock' in variant
 
         self.wrapper_type = wrapper_type
 
         if wrapper_type == 'co':
+            if self.graph_wrlock:
+                raise ValueError(f"co function can't be wrlock: {self.name}")
             subsystem, subname = self.name.split('_', 1)
             self.target_name = f'{subsystem}_co_{subname}'
         else:
@@ -102,12 +105,13 @@ class FuncDecl:
 
     def gen_ctx(self, prefix: str = '') -> str:
         t = self.args[0].type
+        name = self.args[0].name
         if t == 'BlockDriverState *':
-            return f'bdrv_get_aio_context({prefix}bs)'
+            return f'bdrv_get_aio_context({prefix}{name})'
         elif t == 'BdrvChild *':
-            return f'bdrv_get_aio_context({prefix}child->bs)'
+            return f'bdrv_get_aio_context({prefix}{name}->bs)'
         elif t == 'BlockBackend *':
-            return f'blk_get_aio_context({prefix}blk)'
+            return f'blk_get_aio_context({prefix}{name})'
         else:
             return 'qemu_get_aio_context()'
 
@@ -250,6 +254,12 @@ def gen_no_co_wrapper(func: FuncDecl) -> str:
     name = func.target_name
     struct_name = func.struct_name
 
+    graph_lock=''
+    graph_unlock=''
+    if func.graph_wrlock:
+        graph_lock='    bdrv_graph_wrlock(NULL);'
+        graph_unlock='    bdrv_graph_wrunlock();'
+
     return f"""\
 /*
  * Wrappers for {name}
@@ -266,9 +276,11 @@ static void {name}_bh(void *opaque)
     {struct_name} *s = opaque;
     AioContext *ctx = {func.gen_ctx('s->')};
 
+{graph_lock}
     aio_context_acquire(ctx);
     {func.get_result}{name}({ func.gen_list('s->{name}') });
     aio_context_release(ctx);
+{graph_unlock}
 
     aio_co_wake(s->co);
 }}
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index eeaec436eb..1ad9ccb74b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1621,7 +1621,7 @@ sub process {
 				my $hex =
 					qr/%[-+ *.0-9]*([hljztL]|ll|hh)?(x|X|"\s*PRI[xX][^"]*"?)/;
 
-				# don't consider groups splitted by [.:/ ], like 2A.20:12ab
+				# don't consider groups split by [.:/ ], like 2A.20:12ab
 				my $tmpline = $rawline;
 				$tmpline =~ s/($hex[.:\/ ])+$hex//g;
 
diff --git a/scripts/ci/gitlab-pipeline-status b/scripts/ci/gitlab-pipeline-status
index 924db327ff..e3343b0510 100755
--- a/scripts/ci/gitlab-pipeline-status
+++ b/scripts/ci/gitlab-pipeline-status
@@ -28,7 +28,7 @@ class CommunicationFailure(Exception):
 
 
 class NoPipelineFound(Exception):
-    """Communication is successfull but pipeline is not found."""
+    """Communication is successful but pipeline is not found."""
 
 
 def get_local_branch_commit(branch):
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index d02b09a4b9..76781f17f4 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -35,6 +35,7 @@
 --block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
 --with-coroutine=ucontext \
 --tls-priority=@QEMU,SYSTEM \
+--disable-af-xdp \
 --disable-attr \
 --disable-auth-pam \
 --disable-avx2 \
@@ -68,7 +69,6 @@
 --disable-gtk \
 --disable-guest-agent \
 --disable-guest-agent-msi \
---disable-hax \
 --disable-hvf \
 --disable-iconv \
 --disable-kvm \
diff --git a/scripts/codeconverter/codeconverter/qom_macros.py b/scripts/codeconverter/codeconverter/qom_macros.py
index 2d2f2055a3..2b0c8224a1 100644
--- a/scripts/codeconverter/codeconverter/qom_macros.py
+++ b/scripts/codeconverter/codeconverter/qom_macros.py
@@ -142,7 +142,7 @@ class FullStructTypedefMatch(TypedefMatch):
         return name
 
     def strip_typedef(self) -> Patch:
-        """generate patch that will strip typedef from the struct declartion
+        """generate patch that will strip typedef from the struct declaration
 
         The caller is responsible for readding the typedef somewhere else.
         """
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 9da3fe299b..230119346a 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -29,6 +29,7 @@ meson_options_help() {
   printf "%s\n" '  --enable-debug-mutex     mutex debugging support'
   printf "%s\n" '  --enable-debug-stack-usage'
   printf "%s\n" '                           measure coroutine stack usage'
+  printf "%s\n" '  --enable-debug-tcg       TCG debugging'
   printf "%s\n" '  --enable-fdt[=CHOICE]    Whether and how to find the libfdt library'
   printf "%s\n" '                           (choices: auto/disabled/enabled/internal/system)'
   printf "%s\n" '  --enable-fuzzing         build fuzzing targets'
@@ -39,6 +40,7 @@ meson_options_help() {
   printf "%s\n" '                           jemalloc/system/tcmalloc)'
   printf "%s\n" '  --enable-module-upgrades try to load modules from alternate paths for'
   printf "%s\n" '                           upgrades'
+  printf "%s\n" '  --enable-plugins         TCG plugins via shared library loading'
   printf "%s\n" '  --enable-rng-none        dummy RNG, avoid using /dev/(u)random and'
   printf "%s\n" '                           getrandom()'
   printf "%s\n" '  --enable-safe-stack      SafeStack Stack Smash Protection (requires'
@@ -74,6 +76,7 @@ meson_options_help() {
   printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available'
   printf "%s\n" '(unless built with --without-default-features):'
   printf "%s\n" ''
+  printf "%s\n" '  af-xdp          AF_XDP network backend support'
   printf "%s\n" '  alsa            ALSA sound support'
   printf "%s\n" '  attr            attr/xattr support'
   printf "%s\n" '  auth-pam        PAM access control'
@@ -110,7 +113,6 @@ meson_options_help() {
   printf "%s\n" '  gtk-clipboard   clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)'
   printf "%s\n" '  guest-agent     Build QEMU Guest Agent'
   printf "%s\n" '  guest-agent-msi Build MSI package for the QEMU Guest Agent'
-  printf "%s\n" '  hax             HAX acceleration support'
   printf "%s\n" '  hvf             HVF acceleration support'
   printf "%s\n" '  iconv           Font glyph conversion support'
   printf "%s\n" '  jack            JACK sound support'
@@ -207,6 +209,8 @@ meson_options_help() {
 }
 _meson_option_parse() {
   case $1 in
+    --enable-af-xdp) printf "%s" -Daf_xdp=enabled ;;
+    --disable-af-xdp) printf "%s" -Daf_xdp=disabled ;;
     --enable-alsa) printf "%s" -Dalsa=enabled ;;
     --disable-alsa) printf "%s" -Dalsa=disabled ;;
     --enable-attr) printf "%s" -Dattr=enabled ;;
@@ -276,6 +280,8 @@ _meson_option_parse() {
     --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
     --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
     --disable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=false ;;
+    --enable-debug-tcg) printf "%s" -Ddebug_tcg=true ;;
+    --disable-debug-tcg) printf "%s" -Ddebug_tcg=false ;;
     --enable-dmg) printf "%s" -Ddmg=enabled ;;
     --disable-dmg) printf "%s" -Ddmg=disabled ;;
     --docdir=*) quote_sh "-Ddocdir=$2" ;;
@@ -312,8 +318,6 @@ _meson_option_parse() {
     --disable-guest-agent) printf "%s" -Dguest_agent=disabled ;;
     --enable-guest-agent-msi) printf "%s" -Dguest_agent_msi=enabled ;;
     --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
-    --enable-hax) printf "%s" -Dhax=enabled ;;
-    --disable-hax) printf "%s" -Dhax=disabled ;;
     --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;;
     --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;;
     --enable-hvf) printf "%s" -Dhvf=enabled ;;
@@ -398,6 +402,8 @@ _meson_option_parse() {
     --enable-pipewire) printf "%s" -Dpipewire=enabled ;;
     --disable-pipewire) printf "%s" -Dpipewire=disabled ;;
     --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
+    --enable-plugins) printf "%s" -Dplugins=true ;;
+    --disable-plugins) printf "%s" -Dplugins=false ;;
     --enable-png) printf "%s" -Dpng=enabled ;;
     --disable-png) printf "%s" -Dpng=disabled ;;
     --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
diff --git a/scripts/oss-fuzz/minimize_qtest_trace.py b/scripts/oss-fuzz/minimize_qtest_trace.py
index 20825768c2..d1f3990c16 100755
--- a/scripts/oss-fuzz/minimize_qtest_trace.py
+++ b/scripts/oss-fuzz/minimize_qtest_trace.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 """
-This takes a crashing qtest trace and tries to remove superflous operations
+This takes a crashing qtest trace and tries to remove superfluous operations
 """
 
 import sys
@@ -38,7 +38,7 @@ crash by setting CRASH_TOKEN=
 Options:
 
 -M1: enable a loop around the remove minimizer, which may help decrease some
-     timing dependant instructions. Off by default.
+     timing dependent instructions. Off by default.
 -M2: try setting bits in operand of write/out to zero. Off by default.
 
 """.format((sys.argv[0])))
@@ -177,7 +177,7 @@ def remove_lines(newtrace, outpath):
         # it into two separate write commands. If splitting the data operand
         # from length/2^n bytes to the left does not work, try to move the pivot
         # to the right side, then add one to n, until length/2^n == 0. The idea
-        # is to prune unneccessary bytes from long writes, while accommodating
+        # is to prune unnecessary bytes from long writes, while accommodating
         # arbitrary MemoryRegion access sizes and alignments.
 
         # This algorithm will fail under some rare situations.
@@ -292,7 +292,7 @@ def minimize_trace(inpath, outpath):
     old_len = len(newtrace) + 1
     while(old_len > len(newtrace)):
         old_len = len(newtrace)
-        print("trace lenth = ", old_len)
+        print("trace length = ", old_len)
         remove_lines(newtrace, outpath)
         if not M1 and not M2:
             break
diff --git a/scripts/performance/topN_callgrind.py b/scripts/performance/topN_callgrind.py
index 67c59197af..f3f05fce55 100755
--- a/scripts/performance/topN_callgrind.py
+++ b/scripts/performance/topN_callgrind.py
@@ -4,7 +4,7 @@
 #  Syntax:
 #  topN_callgrind.py [-h] [-n] <number of displayed top functions>  -- \
 #           <qemu executable> [<qemu executable options>] \
-#           <target executable> [<target execurable options>]
+#           <target executable> [<target executable options>]
 #
 #  [-h] - Print the script arguments help message.
 #  [-n] - Specify the number of top functions to print.
diff --git a/scripts/performance/topN_perf.py b/scripts/performance/topN_perf.py
index 07be195fc8..7b19e6a742 100755
--- a/scripts/performance/topN_perf.py
+++ b/scripts/performance/topN_perf.py
@@ -4,7 +4,7 @@
 #  Syntax:
 #  topN_perf.py [-h] [-n] <number of displayed top functions>  -- \
 #           <qemu executable> [<qemu executable options>] \
-#           <target executable> [<target execurable options>]
+#           <target executable> [<target executable options>]
 #
 #  [-h] - Print the script arguments help message.
 #  [-n] - Specify the number of top functions to print.
diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
index 70bc576a10..bf5716b5f3 100644
--- a/scripts/qapi/gen.py
+++ b/scripts/qapi/gen.py
@@ -81,7 +81,7 @@ class QAPIGen:
         if odir:
             os.makedirs(odir, exist_ok=True)
 
-        # use os.open for O_CREAT to create and read a non-existant file
+        # use os.open for O_CREAT to create and read a non-existent file
         fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666)
         with os.fdopen(fd, 'r+', encoding='utf-8') as fp:
             text = self.get_content()
diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
index 3463307ddc..56e0dfb132 100644
--- a/scripts/qapi/mypy.ini
+++ b/scripts/qapi/mypy.ini
@@ -1,7 +1,7 @@
 [mypy]
 strict = True
 disallow_untyped_calls = False
-python_version = 3.7
+python_version = 3.8
 
 [mypy-qapi.schema]
 disallow_untyped_defs = False
diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
index 3ba97a6d30..b89dc29555 100755
--- a/scripts/replay-dump.py
+++ b/scripts/replay-dump.py
@@ -111,7 +111,7 @@ def print_event(eid, name, string=None, event_count=None):
 # Decoders for each event type
 
 def decode_unimp(eid, name, _unused_dumpfile):
-    "Unimplimented decoder, will trigger exit"
+    "Unimplemented decoder, will trigger exit"
     print("%s not handled - will now stop" % (name))
     return False
 
diff --git a/scripts/simplebench/bench_block_job.py b/scripts/simplebench/bench_block_job.py
index 56191db44b..e575a3af10 100755
--- a/scripts/simplebench/bench_block_job.py
+++ b/scripts/simplebench/bench_block_job.py
@@ -39,7 +39,7 @@ def bench_block_job(cmd, cmd_args, qemu_args):
                  binary
 
     Returns {'seconds': int} on success and {'error': str} on failure, dict may
-    contain addional 'vm-log' field. Return value is compatible with
+    contain additional 'vm-log' field. Return value is compatible with
     simplebench lib.
     """
 
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index ae44a816e1..c6c6347e9b 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -735,8 +735,7 @@ static void coroutine_fn prh_co_entry(void *opaque)
 
     qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
                              false, NULL);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
-                                   qemu_get_aio_context());
+    qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true);
 
     /* A very simple negotiation for future extensibility.  No features
      * are defined so write 0.
@@ -796,7 +795,6 @@ static void coroutine_fn prh_co_entry(void *opaque)
     }
 
 out:
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
     object_unref(OBJECT(client->ioc));
     g_free(client);
 }
diff --git a/semihosting/config.c b/semihosting/config.c
index 89a1759687..8ca569735d 100644
--- a/semihosting/config.c
+++ b/semihosting/config.c
@@ -8,7 +8,7 @@
  * targets that support it. Architecture specific handling is handled
  * in target/HW/HW-semi.c
  *
- * Semihosting is sightly strange in that it is also supported by some
+ * Semihosting is slightly strange in that it is also supported by some
  * linux-user targets. However in that use case no configuration of
  * the outputs and command lines is supported.
  *
diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c
index 68899ebb1c..d27574a1e2 100644
--- a/semihosting/syscalls.c
+++ b/semihosting/syscalls.c
@@ -720,7 +720,7 @@ void semihost_sys_read_gf(CPUState *cs, gdb_syscall_complete_cb complete,
                           GuestFD *gf, target_ulong buf, target_ulong len)
 {
     /*
-     * Bound length for 64-bit guests on 32-bit hosts, not overlowing ssize_t.
+     * Bound length for 64-bit guests on 32-bit hosts, not overflowing ssize_t.
      * Note the Linux kernel does this with MAX_RW_COUNT, so it's not a bad
      * idea to do this unconditionally.
      */
@@ -761,7 +761,7 @@ void semihost_sys_write_gf(CPUState *cs, gdb_syscall_complete_cb complete,
                            GuestFD *gf, target_ulong buf, target_ulong len)
 {
     /*
-     * Bound length for 64-bit guests on 32-bit hosts, not overlowing ssize_t.
+     * Bound length for 64-bit guests on 32-bit hosts, not overflowing ssize_t.
      * Note the Linux kernel does this with MAX_RW_COUNT, so it's not a bad
      * idea to do this unconditionally.
      */
diff --git a/util/async-teardown.c b/softmmu/async-teardown.c
similarity index 97%
rename from util/async-teardown.c
rename to softmmu/async-teardown.c
index 62cdeb0f20..396963c091 100644
--- a/util/async-teardown.c
+++ b/softmmu/async-teardown.c
@@ -121,10 +121,7 @@ static void *new_stack_for_clone(void)
 
     /* Allocate a new stack and get a pointer to its top. */
     stack_ptr = qemu_alloc_stack(&stack_size);
-#if !defined(HOST_HPPA)
-    /* The top is at the end of the area, except on HPPA. */
     stack_ptr += stack_size;
-#endif
 
     return stack_ptr;
 }
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 6f3346d5e1..556129385e 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -427,12 +427,6 @@ void qemu_wait_io_event(CPUState *cpu)
         qemu_plugin_vcpu_resume_cb(cpu);
     }
 
-#ifdef _WIN32
-    /* Eat dummy APC queued by cpus_kick_thread. */
-    if (hax_enabled()) {
-        SleepEx(0, TRUE);
-    }
-#endif
     qemu_wait_io_event_common(cpu);
 }
 
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 2463964805..36211acc7e 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -206,17 +206,9 @@ static void dma_aio_cancel(BlockAIOCB *acb)
     }
 }
 
-static AioContext *dma_get_aio_context(BlockAIOCB *acb)
-{
-    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
-
-    return dbs->ctx;
-}
-
 static const AIOCBInfo dma_aiocb_info = {
     .aiocb_size         = sizeof(DMAAIOCB),
     .cancel_async       = dma_aio_cancel,
-    .get_aio_context    = dma_get_aio_context,
 };
 
 BlockAIOCB *dma_blk_io(AioContext *ctx,
diff --git a/softmmu/icount.c b/softmmu/icount.c
index a5cef9c60a..144e24829c 100644
--- a/softmmu/icount.c
+++ b/softmmu/icount.c
@@ -325,7 +325,7 @@ void icount_start_warp_timer(void)
              * vCPU is sleeping and warp can't be started.
              * It is probably a race condition: notification sent
              * to vCPU was processed in advance and vCPU went to sleep.
-             * Therefore we have to wake it up for doing someting.
+             * Therefore we have to wake it up for doing something.
              */
             if (replay_has_event()) {
                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
diff --git a/softmmu/ioport.c b/softmmu/ioport.c
index b66e0a5a8e..1824aa808c 100644
--- a/softmmu/ioport.c
+++ b/softmmu/ioport.c
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 /*
- * splitted out ioport related stuffs from vl.c.
+ * split out ioport related stuffs from vl.c.
  */
 
 #include "qemu/osdep.h"
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..c0383a163d 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -842,6 +842,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
     AddrRange tmp;
     unsigned i;
 
+    if (!as->ioeventfd_notifiers) {
+        return;
+    }
+
     /*
      * It is likely that the number of ioeventfds hasn't changed much, so use
      * the previous size as the starting value, with some headroom to avoid
@@ -1620,18 +1624,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       uint32_t ram_flags,
                                       const char *path,
                                       ram_addr_t offset,
-                                      bool readonly,
                                       Error **errp)
 {
     Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
-    mr->readonly = readonly;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->align = align;
     mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path,
-                                             offset, readonly, &err);
+                                             offset, &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -1651,10 +1654,11 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
     Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
-                                           false, &err);
+                                           &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -3075,6 +3079,10 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
     }
 
     listener_add_address_space(listener, as);
+
+    if (listener->eventfd_add || listener->eventfd_del) {
+        as->ioeventfd_notifiers++;
+    }
 }
 
 void memory_listener_unregister(MemoryListener *listener)
@@ -3083,6 +3091,10 @@ void memory_listener_unregister(MemoryListener *listener)
         return;
     }
 
+    if (listener->eventfd_add || listener->eventfd_del) {
+        listener->address_space->ioeventfd_notifiers--;
+    }
+
     listener_del_address_space(listener, listener->address_space);
     QTAILQ_REMOVE(&memory_listeners, listener, link);
     QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
diff --git a/softmmu/meson.build b/softmmu/meson.build
index ea5603f021..c18b7ad738 100644
--- a/softmmu/meson.build
+++ b/softmmu/meson.build
@@ -37,3 +37,4 @@ endif
 
 system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c'))
 system_ss.add(when: fdt, if_true: files('device_tree.c'))
+system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 18277ddd67..4f6ca653b3 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1288,8 +1288,7 @@ static int64_t get_file_align(int fd)
 static int file_ram_open(const char *path,
                          const char *region_name,
                          bool readonly,
-                         bool *created,
-                         Error **errp)
+                         bool *created)
 {
     char *filename;
     char *sanitized_name;
@@ -1300,10 +1299,33 @@ static int file_ram_open(const char *path,
     for (;;) {
         fd = open(path, readonly ? O_RDONLY : O_RDWR);
         if (fd >= 0) {
+            /*
+             * open(O_RDONLY) won't fail with EISDIR. Check manually if we
+             * opened a directory and fail similarly to how we fail ENOENT
+             * in readonly mode. Note that mkstemp() would imply O_RDWR.
+             */
+            if (readonly) {
+                struct stat file_stat;
+
+                if (fstat(fd, &file_stat)) {
+                    close(fd);
+                    if (errno == EINTR) {
+                        continue;
+                    }
+                    return -errno;
+                } else if (S_ISDIR(file_stat.st_mode)) {
+                    close(fd);
+                    return -EISDIR;
+                }
+            }
             /* @path names an existing file, use it */
             break;
         }
         if (errno == ENOENT) {
+            if (readonly) {
+                /* Refuse to create new, readonly files. */
+                return -ENOENT;
+            }
             /* @path names a file that doesn't exist, create it */
             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
             if (fd >= 0) {
@@ -1333,10 +1355,7 @@ static int file_ram_open(const char *path,
             g_free(filename);
         }
         if (errno != EEXIST && errno != EINTR) {
-            error_setg_errno(errp, errno,
-                             "can't open backing store %s for guest RAM",
-                             path);
-            return -1;
+            return -errno;
         }
         /*
          * Try again on EINTR and EEXIST.  The latter happens when
@@ -1350,7 +1369,6 @@ static int file_ram_open(const char *path,
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             int fd,
-                            bool readonly,
                             bool truncate,
                             off_t offset,
                             Error **errp)
@@ -1408,7 +1426,7 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
+    qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
     qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
     qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
     qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
@@ -1876,7 +1894,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
 #ifdef CONFIG_POSIX
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
-                                 bool readonly, Error **errp)
+                                 Error **errp)
 {
     RAMBlock *new_block;
     Error *local_err = NULL;
@@ -1884,7 +1902,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
     /* Just support these ram flags by now. */
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
-                          RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
+                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
+                          RAM_READONLY_FD)) == 0);
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
@@ -1919,8 +1938,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = ram_flags;
-    new_block->host = file_ram_alloc(new_block, size, fd, readonly,
-                                     !file_size, offset, errp);
+    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
+                                     errp);
     if (!new_block->host) {
         g_free(new_block);
         return NULL;
@@ -1939,20 +1958,40 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
-                                   off_t offset, bool readonly, Error **errp)
+                                   off_t offset, Error **errp)
 {
     int fd;
     bool created;
     RAMBlock *block;
 
-    fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created,
-                       errp);
+    fd = file_ram_open(mem_path, memory_region_name(mr),
+                       !!(ram_flags & RAM_READONLY_FD), &created);
     if (fd < 0) {
+        error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
+                         mem_path);
+        if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
+            fd == -EACCES) {
+            /*
+             * If we can open the file R/O (note: will never create a new file)
+             * and we are dealing with a private mapping, there are still ways
+             * to consume such files and get RAM instead of ROM.
+             */
+            fd = file_ram_open(mem_path, memory_region_name(mr), true,
+                               &created);
+            if (fd < 0) {
+                return NULL;
+            }
+            assert(!created);
+            close(fd);
+            error_append_hint(errp, "Consider opening the backing store"
+                " read-only but still creating writable RAM using"
+                " '-object memory-backend-file,readonly=on,rom=off...'"
+                " (see \"VM templating\" documentation)\n");
+        }
         return NULL;
     }
 
-    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly,
-                                   errp);
+    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
     if (!block) {
         if (created) {
             unlink(mem_path);
@@ -2070,6 +2109,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     ram_addr_t offset;
     int flags;
     void *area, *vaddr;
+    int prot;
 
     RAMBLOCK_FOREACH(block) {
         offset = addr - block->offset;
@@ -2084,13 +2124,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 flags |= block->flags & RAM_SHARED ?
                          MAP_SHARED : MAP_PRIVATE;
                 flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+                prot = PROT_READ;
+                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
                 if (block->fd >= 0) {
-                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                flags, block->fd, offset + block->fd_offset);
+                    area = mmap(vaddr, length, prot, flags, block->fd,
+                                offset + block->fd_offset);
                 } else {
                     flags |= MAP_ANONYMOUS;
-                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                flags, -1, 0);
+                    area = mmap(vaddr, length, prot, flags, -1, 0);
                 }
                 if (area != vaddr) {
                     error_report("Could not remap addr: "
@@ -3480,6 +3521,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
              * so a userfault will trigger.
              */
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+            /*
+             * fallocate() will fail with readonly files. Let's print a
+             * proper error message.
+             */
+            if (rb->flags & RAM_READONLY_FD) {
+                error_report("ram_block_discard_range: Discarding RAM"
+                             " with readonly files is not supported");
+                goto err;
+
+            }
             /*
              * We'll discard data from the actual file, even though we only
              * have a MAP_PRIVATE mapping, possibly messing with other
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 32a0e23d9c..f81f903076 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -271,6 +271,7 @@ void qemu_system_vmstop_request(RunState state)
 }
 struct VMChangeStateEntry {
     VMChangeStateHandler *cb;
+    VMChangeStateHandler *prepare_cb;
     void *opaque;
     QTAILQ_ENTRY(VMChangeStateEntry) entries;
     int priority;
@@ -293,12 +294,39 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
  */
 VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
         VMChangeStateHandler *cb, void *opaque, int priority)
+{
+    return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
+                                                      priority);
+}
+
+/**
+ * qemu_add_vm_change_state_handler_prio_full:
+ * @cb: the main callback to invoke
+ * @prepare_cb: a callback to invoke before the main callback
+ * @opaque: user data passed to the callbacks
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *            true when the vm stops
+ *
+ * Register a main callback function and an optional prepare callback function
+ * that are invoked when the vm starts or stops running. The main callback and
+ * the prepare callback are called in two separate phases: First all prepare
+ * callbacks are called and only then all main callbacks are called. As its
+ * name suggests, the prepare callback can be used to do some preparatory work
+ * before invoking the main callback.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority)
 {
     VMChangeStateEntry *e;
     VMChangeStateEntry *other;
 
     e = g_malloc0(sizeof(*e));
     e->cb = cb;
+    e->prepare_cb = prepare_cb;
     e->opaque = opaque;
     e->priority = priority;
 
@@ -333,10 +361,22 @@ void vm_state_notify(bool running, RunState state)
     trace_vm_state_notify(running, state, RunState_str(state));
 
     if (running) {
+        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
         QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
             e->cb(e->opaque, running, state);
         }
     } else {
+        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
         QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
             e->cb(e->opaque, running, state);
         }
diff --git a/softmmu/vl.c b/softmmu/vl.c
index b0b96f67fa..3db4fd2680 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -49,6 +49,7 @@
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "qemu/accel.h"
+#include "qemu/async-teardown.h"
 #include "hw/usb.h"
 #include "hw/isa/isa.h"
 #include "hw/scsi/scsi.h"
@@ -86,11 +87,9 @@
 #include "migration/colo.h"
 #include "migration/postcopy-ram.h"
 #include "sysemu/kvm.h"
-#include "sysemu/hax.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
-#include "qemu/qemu-options.h"
 #include "qemu/main-loop.h"
 #ifdef CONFIG_VIRTFS
 #include "fsdev/qemu-fsdev.h"
@@ -748,6 +747,33 @@ static QemuOptsList qemu_smp_opts = {
     },
 };
 
+#if defined(CONFIG_POSIX)
+static QemuOptsList qemu_run_with_opts = {
+    .name = "run-with",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
+    .desc = {
+#if defined(CONFIG_LINUX)
+        {
+            .name = "async-teardown",
+            .type = QEMU_OPT_BOOL,
+        },
+#endif
+        {
+            .name = "chroot",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+#define qemu_add_run_with_opts() qemu_add_opts(&qemu_run_with_opts)
+
+#else
+
+#define qemu_add_run_with_opts()
+
+#endif /* CONFIG_POSIX */
+
 static void realtime_init(void)
 {
     if (enable_mlock) {
@@ -866,6 +892,16 @@ static void help(int exitcode)
     exit(exitcode);
 }
 
+enum {
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    opt_enum,
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#include "qemu-options.def"
+};
+
 #define HAS_ARG 0x0001
 
 typedef struct QEMUOption {
@@ -2546,11 +2582,6 @@ static void qemu_init_board(void)
     drive_check_orphaned();
 
     realtime_init();
-
-    if (hax_enabled()) {
-        /* FIXME: why isn't cpu_synchronize_all_post_init enough? */
-        hax_sync_vcpus();
-    }
 }
 
 static void qemu_create_cli_devices(void)
@@ -2704,6 +2735,7 @@ void qemu_init(int argc, char **argv)
     qemu_add_opts(&qemu_semihosting_config_opts);
     qemu_add_opts(&qemu_fw_cfg_opts);
     qemu_add_opts(&qemu_action_opts);
+    qemu_add_run_with_opts();
     module_call_init(MODULE_INIT_OPTS);
 
     error_init(argv[0]);
@@ -3522,11 +3554,52 @@ void qemu_init(int argc, char **argv)
             case QEMU_OPTION_nouserconfig:
                 /* Nothing to be parsed here. Especially, do not error out below. */
                 break;
-            default:
-                if (os_parse_cmd_args(popt->index, optarg)) {
-                    error_report("Option not supported in this build");
+#if defined(CONFIG_POSIX)
+            case QEMU_OPTION_runas:
+                if (!os_set_runas(optarg)) {
+                    error_report("User \"%s\" doesn't exist"
+                                 " (and is not <uid>:<gid>)",
+                                 optarg);
                     exit(1);
                 }
+                break;
+            case QEMU_OPTION_chroot:
+                warn_report("option is deprecated,"
+                            " use '-run-with chroot=...' instead");
+                os_set_chroot(optarg);
+                break;
+            case QEMU_OPTION_daemonize:
+                os_set_daemonize(true);
+                break;
+#if defined(CONFIG_LINUX)
+            /* deprecated */
+            case QEMU_OPTION_asyncteardown:
+                init_async_teardown();
+                break;
+#endif
+            case QEMU_OPTION_run_with: {
+                const char *str;
+                opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"),
+                                                         optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+#if defined(CONFIG_LINUX)
+                if (qemu_opt_get_bool(opts, "async-teardown", false)) {
+                    init_async_teardown();
+                }
+#endif
+                str = qemu_opt_get(opts, "chroot");
+                if (str) {
+                    os_set_chroot(str);
+                }
+                break;
+            }
+#endif /* CONFIG_POSIX */
+
+            default:
+                error_report("Option not supported in this build");
+                exit(1);
             }
         }
     }
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
index 49c9d2eac9..5e90cd32b4 100644
--- a/storage-daemon/meson.build
+++ b/storage-daemon/meson.build
@@ -5,7 +5,7 @@ qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil, gnutls)
 subdir('qapi')
 
 if have_tools
-  qsd_ss = qsd_ss.apply(config_host, strict: false)
+  qsd_ss = qsd_ss.apply(config_targetos, strict: false)
   qsd = executable('qemu-storage-daemon',
                    qsd_ss.sources(),
                    dependencies: qsd_ss.dependencies(),
diff --git a/subprojects/berkeley-softfloat-3.wrap b/subprojects/berkeley-softfloat-3.wrap
index a8fd87740b..c3e356d42f 100644
--- a/subprojects/berkeley-softfloat-3.wrap
+++ b/subprojects/berkeley-softfloat-3.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.com/qemu-project/berkeley-softfloat-3
+url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
 revision = b64af41c3276f97f0e181920400ee056b9c88037
 patch_directory = berkeley-softfloat-3
 depth = 1
diff --git a/subprojects/berkeley-testfloat-3.wrap b/subprojects/berkeley-testfloat-3.wrap
index 6ad80a37b2..b8b12e7629 100644
--- a/subprojects/berkeley-testfloat-3.wrap
+++ b/subprojects/berkeley-testfloat-3.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.com/qemu-project/berkeley-testfloat-3
-revision = 40619cbb3bf32872df8c53cc457039229428a263
+url = https://gitlab.com/qemu-project/berkeley-testfloat-3.git
+revision = e7af9751d9f9fd3b47911f51a5cfd08af256a9ab
 patch_directory = berkeley-testfloat-3
 depth = 1
diff --git a/subprojects/libblkio.wrap b/subprojects/libblkio.wrap
new file mode 100644
index 0000000000..f77af72210
--- /dev/null
+++ b/subprojects/libblkio.wrap
@@ -0,0 +1,6 @@
+[wrap-git]
+url = https://gitlab.com/libblkio/libblkio
+revision = f84cc963a444e4cb34813b2dcfc5bf8526947dc0
+
+[provide]
+blkio = libblkio_dep
diff --git a/subprojects/slirp.wrap b/subprojects/slirp.wrap
index 08291a4cf9..a93b048962 100644
--- a/subprojects/slirp.wrap
+++ b/subprojects/slirp.wrap
@@ -1,5 +1,5 @@
 [wrap-git]
-url = https://gitlab.freedesktop.org/slirp/libslirp
+url = https://gitlab.freedesktop.org/slirp/libslirp.git
 revision = 26be815b86e8d49add8c9a8b320239b9594ff03d
 
 [provide]
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 0839182a1f..9be912c50c 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -24,7 +24,6 @@
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
-#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index c8fa524002..b53d5efe13 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -95,7 +95,7 @@ static const char *cpu_model_advertised_features[] = {
     "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
     "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
     "kvm-no-adjvtime", "kvm-steal-time",
-    "pauth", "pauth-impdef",
+    "pauth", "pauth-impdef", "pauth-qarma3",
     NULL
 };
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 0bb0585441..b9e09a702d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -243,6 +243,10 @@ static void arm_cpu_reset_hold(Object *obj)
                                   SCTLR_EnDA | SCTLR_EnDB);
         /* Trap on btype=3 for PACIxSP. */
         env->cp15.sctlr_el[1] |= SCTLR_BT0;
+        /* Trap on implementation defined registers. */
+        if (cpu_isar_feature(aa64_tidcp1, cpu)) {
+            env->cp15.sctlr_el[1] |= SCTLR_TIDCP;
+        }
         /* and to the FP/Neon instructions */
         env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
                                          CPACR_EL1, FPEN, 3);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 278cc135c2..f2e3dc49a6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1033,6 +1033,7 @@ struct ArchCPU {
         uint32_t dbgdevid1;
         uint64_t id_aa64isar0;
         uint64_t id_aa64isar1;
+        uint64_t id_aa64isar2;
         uint64_t id_aa64pfr0;
         uint64_t id_aa64pfr1;
         uint64_t id_aa64mmfr0;
@@ -1071,6 +1072,7 @@ struct ArchCPU {
      */
     bool prop_pauth;
     bool prop_pauth_impdef;
+    bool prop_pauth_qarma3;
     bool prop_lpa2;
 
     /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
@@ -3208,11 +3210,7 @@ static inline bool bswap_code(bool sctlr_b)
      * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_BIG_ENDIAN=0
      * would also end up as a mixed-endian mode with BE code, LE data.
      */
-    return
-#if TARGET_BIG_ENDIAN
-        1 ^
-#endif
-        sctlr_b;
+    return TARGET_BIG_ENDIAN ^ sctlr_b;
 #else
     /* All code access in ARM is little endian, and there are no loaders
      * doing swaps that need to be reversed
@@ -3224,11 +3222,7 @@ static inline bool bswap_code(bool sctlr_b)
 #ifdef CONFIG_USER_ONLY
 static inline bool arm_cpu_bswap_data(CPUARMState *env)
 {
-    return
-#if TARGET_BIG_ENDIAN
-       1 ^
-#endif
-       arm_cpu_data_is_big_endian(env);
+    return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
 }
 #endif
 
@@ -3803,28 +3797,59 @@ static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
 }
 
+/*
+ * These are the values from APA/API/APA3.
+ * In general these must be compared '>=', per the normal Arm ARM
+ * treatment of fields in ID registers.
+ */
+typedef enum {
+    PauthFeat_None         = 0,
+    PauthFeat_1            = 1,
+    PauthFeat_EPAC         = 2,
+    PauthFeat_2            = 3,
+    PauthFeat_FPAC         = 4,
+    PauthFeat_FPACCOMBINED = 5,
+} ARMPauthFeature;
+
+static inline ARMPauthFeature
+isar_feature_pauth_feature(const ARMISARegisters *id)
+{
+    /*
+     * Architecturally, only one of {APA,API,APA3} may be active (non-zero)
+     * and the other two must be zero.  Thus we may avoid conditionals.
+     */
+    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) |
+            FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) |
+            FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3));
+}
+
 static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id)
 {
     /*
      * Return true if any form of pauth is enabled, as this
      * predicate controls migration of the 128-bit keys.
      */
-    return (id->id_aa64isar1 &
-            (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) |
-             FIELD_DP64(0, ID_AA64ISAR1, API, 0xf) |
-             FIELD_DP64(0, ID_AA64ISAR1, GPA, 0xf) |
-             FIELD_DP64(0, ID_AA64ISAR1, GPI, 0xf))) != 0;
+    return isar_feature_pauth_feature(id) != PauthFeat_None;
 }
 
-static inline bool isar_feature_aa64_pauth_arch(const ARMISARegisters *id)
+static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id)
 {
     /*
-     * Return true if pauth is enabled with the architected QARMA algorithm.
-     * QEMU will always set APA+GPA to the same value.
+     * Return true if pauth is enabled with the architected QARMA5 algorithm.
+     * QEMU will always enable or disable both APA and GPA.
      */
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0;
 }
 
+static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id)
+{
+    /*
+     * Return true if pauth is enabled with the architected QARMA3 algorithm.
+     * QEMU will always enable or disable both APA3 and GPA3.
+     */
+    return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0;
+}
+
 static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2;
@@ -3947,6 +3972,11 @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
 }
 
+static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR1, TIDCP1) != 0;
+}
+
 static inline bool isar_feature_aa64_uao(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 96158093cc..f3d87e001f 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -473,43 +473,80 @@ void aarch64_add_sme_properties(Object *obj)
 
 void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
 {
-    int arch_val = 0, impdef_val = 0;
-    uint64_t t;
+    ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu);
+    uint64_t isar1, isar2;
 
-    /* Exit early if PAuth is enabled, and fall through to disable it */
-    if ((kvm_enabled() || hvf_enabled()) && cpu->prop_pauth) {
-        if (!cpu_isar_feature(aa64_pauth, cpu)) {
-            error_setg(errp, "'pauth' feature not supported by %s on this host",
-                       kvm_enabled() ? "KVM" : "hvf");
+    /*
+     * These properties enable or disable Pauth as a whole, or change
+     * the pauth algorithm, but do not change the set of features that
+     * are present.  We have saved a copy of those features above and
+     * will now place it into the field that chooses the algorithm.
+     *
+     * Begin by disabling all fields.
+     */
+    isar1 = cpu->isar.id_aa64isar1;
+    isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0);
+    isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0);
+    isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0);
+    isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0);
+
+    isar2 = cpu->isar.id_aa64isar2;
+    isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0);
+    isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0);
+
+    if (kvm_enabled() || hvf_enabled()) {
+        /*
+         * Exit early if PAuth is enabled and fall through to disable it.
+         * The algorithm selection properties are not present.
+         */
+        if (cpu->prop_pauth) {
+            if (features == 0) {
+                error_setg(errp, "'pauth' feature not supported by "
+                           "%s on this host", current_accel_name());
+            }
+            return;
+        }
+    } else {
+        /* Pauth properties are only present when the model supports it. */
+        if (features == 0) {
+            assert(!cpu->prop_pauth);
+            return;
         }
 
-        return;
-    }
+        if (cpu->prop_pauth) {
+            if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) {
+                error_setg(errp,
+                           "cannot enable both pauth-impdef and pauth-qarma3");
+                return;
+            }
 
-    /* TODO: Handle HaveEnhancedPAC, HaveEnhancedPAC2, HaveFPAC. */
-    if (cpu->prop_pauth) {
-        if (cpu->prop_pauth_impdef) {
-            impdef_val = 1;
-        } else {
-            arch_val = 1;
+            if (cpu->prop_pauth_impdef) {
+                isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
+                isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
+            } else if (cpu->prop_pauth_qarma3) {
+                isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features);
+                isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1);
+            } else {
+                isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
+                isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
+            }
+        } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) {
+            error_setg(errp, "cannot enable pauth-impdef or "
+                       "pauth-qarma3 without pauth");
+            error_append_hint(errp, "Add pauth=on to the CPU property list.\n");
         }
-    } else if (cpu->prop_pauth_impdef) {
-        error_setg(errp, "cannot enable pauth-impdef without pauth");
-        error_append_hint(errp, "Add pauth=on to the CPU property list.\n");
     }
 
-    t = cpu->isar.id_aa64isar1;
-    t = FIELD_DP64(t, ID_AA64ISAR1, APA, arch_val);
-    t = FIELD_DP64(t, ID_AA64ISAR1, GPA, arch_val);
-    t = FIELD_DP64(t, ID_AA64ISAR1, API, impdef_val);
-    t = FIELD_DP64(t, ID_AA64ISAR1, GPI, impdef_val);
-    cpu->isar.id_aa64isar1 = t;
+    cpu->isar.id_aa64isar1 = isar1;
+    cpu->isar.id_aa64isar2 = isar2;
 }
 
 static Property arm_cpu_pauth_property =
     DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true);
 static Property arm_cpu_pauth_impdef_property =
     DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false);
+static Property arm_cpu_pauth_qarma3_property =
+    DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false);
 
 void aarch64_add_pauth_properties(Object *obj)
 {
@@ -529,6 +566,7 @@ void aarch64_add_pauth_properties(Object *obj)
         cpu->prop_pauth = cpu_isar_feature(aa64_pauth, cpu);
     } else {
         qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property);
+        qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property);
     }
 }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 304b55b779..14bbaa2cf4 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8435,11 +8435,11 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .access = PL1_R, .type = ARM_CP_CONST,
               .accessfn = access_aa64_tid3,
               .resetvalue = cpu->isar.id_aa64isar1 },
-            { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+            { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
               .accessfn = access_aa64_tid3,
-              .resetvalue = 0 },
+              .resetvalue = cpu->isar.id_aa64isar2 },
             { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -8682,16 +8682,25 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         };
         modify_arm_cp_regs(v8_idregs, v8_user_idregs);
 #endif
-        /* RVBAR_EL1 is only implemented if EL1 is the highest EL */
+        /*
+         * RVBAR_EL1 and RMR_EL1 only implemented if EL1 is the highest EL.
+         * TODO: For RMR, a write with bit 1 set should do something with
+         * cpu_reset(). In the meantime, "the bit is strictly a request",
+         * so we are in spec just ignoring writes.
+         */
         if (!arm_feature(env, ARM_FEATURE_EL3) &&
             !arm_feature(env, ARM_FEATURE_EL2)) {
-            ARMCPRegInfo rvbar = {
-                .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH,
-                .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
-                .access = PL1_R,
-                .fieldoffset = offsetof(CPUARMState, cp15.rvbar),
+            ARMCPRegInfo el1_reset_regs[] = {
+                { .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
+                  .access = PL1_R,
+                  .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+                { .name = "RMR_EL1", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+                  .access = PL1_RW, .type = ARM_CP_CONST,
+                  .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) }
             };
-            define_one_arm_cp_reg(cpu, &rvbar);
+            define_arm_cp_regs(cpu, el1_reset_regs);
         }
         define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
@@ -8775,22 +8784,25 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         if (cpu_isar_feature(aa64_sel2, cpu)) {
             define_arm_cp_regs(cpu, el2_sec_cp_reginfo);
         }
-        /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
+        /*
+         * RVBAR_EL2 and RMR_EL2 only implemented if EL2 is the highest EL.
+         * See commentary near RMR_EL1.
+         */
         if (!arm_feature(env, ARM_FEATURE_EL3)) {
-            ARMCPRegInfo rvbar[] = {
-                {
-                    .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64,
-                    .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1,
-                    .access = PL2_R,
-                    .fieldoffset = offsetof(CPUARMState, cp15.rvbar),
-                },
-                {   .name = "RVBAR", .type = ARM_CP_ALIAS,
-                    .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
-                    .access = PL2_R,
-                    .fieldoffset = offsetof(CPUARMState, cp15.rvbar),
-                },
+            static const ARMCPRegInfo el2_reset_regs[] = {
+                { .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64,
+                  .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1,
+                  .access = PL2_R,
+                  .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+                { .name = "RVBAR", .type = ARM_CP_ALIAS,
+                  .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
+                  .access = PL2_R,
+                  .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+                { .name = "RMR_EL2", .state = ARM_CP_STATE_AA64,
+                  .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 2,
+                  .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 1 },
             };
-            define_arm_cp_regs(cpu, rvbar);
+            define_arm_cp_regs(cpu, el2_reset_regs);
         }
     }
 
@@ -8801,8 +8813,14 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             { .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1,
               .access = PL3_R,
-              .fieldoffset = offsetof(CPUARMState, cp15.rvbar),
-            },
+              .fieldoffset = offsetof(CPUARMState, cp15.rvbar), },
+            { .name = "RMR_EL3", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 2,
+              .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 1 },
+            { .name = "RMR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+              .access = PL3_RW, .type = ARM_CP_CONST,
+              .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) },
             { .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0,
               .access = PL3_RW,
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 95e32a697a..2b02733305 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -81,6 +81,8 @@ DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32)
 
 DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32)
 DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32)
 DEF_HELPER_3(set_cp_reg, void, env, cptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, cptr)
 DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64)
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 486f90be1d..546c0e817f 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -847,6 +847,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
         { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 },
         { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 },
         { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 },
+        /* Add ID_AA64ISAR2_EL1 here when HVF supports it */
         { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
         { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
         { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 23aeb09949..b66b936a95 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -30,6 +30,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "qapi/visitor.h"
 #include "qemu/log.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
@@ -287,6 +288,26 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
 
+    if (s->kvm_eager_split_size) {
+        uint32_t sizes;
+
+        sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES);
+        if (!sizes) {
+            s->kvm_eager_split_size = 0;
+            warn_report("Eager Page Split support not available");
+        } else if (!(s->kvm_eager_split_size & sizes)) {
+            error_report("Eager Page Split requested chunk size not valid");
+            ret = -EINVAL;
+        } else {
+            ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0,
+                                    s->kvm_eager_split_size);
+            if (ret < 0) {
+                error_report("Enabling of Eager Page Split failed: %s",
+                             strerror(-ret));
+            }
+        }
+    }
+
     kvm_arm_init_debug(s);
 
     return ret;
@@ -1069,6 +1090,46 @@ bool kvm_arch_cpu_check_are_resettable(void)
     return true;
 }
 
+static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
+                                          const char *name, void *opaque,
+                                          Error **errp)
+{
+    KVMState *s = KVM_STATE(obj);
+    uint64_t value = s->kvm_eager_split_size;
+
+    visit_type_size(v, name, &value, errp);
+}
+
+static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v,
+                                          const char *name, void *opaque,
+                                          Error **errp)
+{
+    KVMState *s = KVM_STATE(obj);
+    uint64_t value;
+
+    if (s->fd != -1) {
+        error_setg(errp, "Unable to set early-split-size after KVM has been initialized");
+        return;
+    }
+
+    if (!visit_type_size(v, name, &value, errp)) {
+        return;
+    }
+
+    if (value && !is_power_of_2(value)) {
+        error_setg(errp, "early-split-size must be a power of two");
+        return;
+    }
+
+    s->kvm_eager_split_size = value;
+}
+
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
+    object_class_property_add(oc, "eager-split-size", "size",
+                              kvm_arch_get_eager_split_size,
+                              kvm_arch_set_eager_split_size, NULL, NULL);
+
+    object_class_property_set_description(oc, "eager-split-size",
+        "Eager Page Split chunk size for hugepages. (default: 0, disabled)");
 }
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 4d904a1d11..5e95c496bb 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -304,6 +304,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
                               ARM64_SYS_REG(3, 0, 0, 6, 0));
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
                               ARM64_SYS_REG(3, 0, 0, 6, 1));
+        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2,
+                              ARM64_SYS_REG(3, 0, 0, 6, 2));
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
                               ARM64_SYS_REG(3, 0, 0, 7, 0));
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
@@ -672,6 +674,7 @@ typedef struct CPRegStateLevel {
  */
 static const CPRegStateLevel non_runtime_cpregs[] = {
     { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
+    { KVM_REG_ARM_PTIMER_CNT, KVM_PUT_FULL_STATE },
 };
 
 int kvm_arm_cpreg_level(uint64_t regidx)
diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
index 62254d0e51..8a6b8f8162 100644
--- a/target/arm/syndrome.h
+++ b/target/arm/syndrome.h
@@ -49,6 +49,7 @@ enum arm_exception_class {
     EC_SYSTEMREGISTERTRAP     = 0x18,
     EC_SVEACCESSTRAP          = 0x19,
     EC_ERETTRAP               = 0x1a,
+    EC_PACFAIL                = 0x1c,
     EC_SMETRAP                = 0x1d,
     EC_GPC                    = 0x1e,
     EC_INSNABORT              = 0x20,
@@ -232,6 +233,12 @@ static inline uint32_t syn_smetrap(SMEExceptionType etype, bool is_16bit)
         | (is_16bit ? 0 : ARM_EL_IL) | etype;
 }
 
+static inline uint32_t syn_pacfail(bool data, int keynumber)
+{
+    int error_code = (data << 1) | keynumber;
+    return (EC_PACFAIL << ARM_EL_EC_SHIFT) | ARM_EL_IL | error_code;
+}
+
 static inline uint32_t syn_pactrap(void)
 {
     return EC_PACTRAP << ARM_EL_EC_SHIFT;
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 0f8972950d..7264ab5ead 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -745,6 +745,217 @@ static void aarch64_neoverse_v1_initfn(Object *obj)
     aarch64_add_sve_properties(obj);
 }
 
+static const ARMCPRegInfo cortex_a710_cp_reginfo[] = {
+    { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUACTLR4_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 4,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR5_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUACTLR6_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "CPUACTLR7_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = access_actlr_w },
+    { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPPMCR4_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 4,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPPMCR5_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 5,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPPMCR6_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 4, .opc2 = 0,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPOR2_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 4,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPMR2_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 5,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUPFR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 6,
+      .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+
+    /*
+     * Stub RAMINDEX, as we don't actually implement caches, BTB,
+     * or anything else with cpu internal memory.
+     * "Read" zeros into the IDATA* and DDATA* output registers.
+     */
+    { .name = "RAMINDEX_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0,
+      .access = PL3_W, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "IDATA0_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "IDATA1_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 1,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "IDATA2_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 2,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "DDATA0_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 0,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "DDATA1_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 1,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "DDATA2_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 2,
+      .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void aarch64_a710_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a710";
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_EL2);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+    /* Ordered by Section B.4: AArch64 registers */
+    cpu->midr          = 0x412FD471; /* r2p1 */
+    cpu->revidr        = 0;
+    cpu->isar.id_pfr0  = 0x21110131;
+    cpu->isar.id_pfr1  = 0x00010000; /* GIC filled in later */
+    cpu->isar.id_dfr0  = 0x16011099;
+    cpu->id_afr0       = 0;
+    cpu->isar.id_mmfr0 = 0x10201105;
+    cpu->isar.id_mmfr1 = 0x40000000;
+    cpu->isar.id_mmfr2 = 0x01260000;
+    cpu->isar.id_mmfr3 = 0x02122211;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232042;
+    cpu->isar.id_isar3 = 0x01112131;
+    cpu->isar.id_isar4 = 0x00010142;
+    cpu->isar.id_isar5 = 0x11011121; /* with Crypto */
+    cpu->isar.id_mmfr4 = 0x21021110;
+    cpu->isar.id_isar6 = 0x01111111;
+    cpu->isar.mvfr0    = 0x10110222;
+    cpu->isar.mvfr1    = 0x13211111;
+    cpu->isar.mvfr2    = 0x00000043;
+    cpu->isar.id_pfr2  = 0x00000011;
+    cpu->isar.id_aa64pfr0  = 0x1201111120111112ull; /* GIC filled in later */
+    cpu->isar.id_aa64pfr1  = 0x0000000000000221ull;
+    cpu->isar.id_aa64zfr0  = 0x0000110100110021ull; /* with Crypto */
+    cpu->isar.id_aa64dfr0  = 0x000011f010305611ull;
+    cpu->isar.id_aa64dfr1  = 0;
+    cpu->id_aa64afr0       = 0;
+    cpu->id_aa64afr1       = 0;
+    cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */
+    cpu->isar.id_aa64isar1 = 0x0010111101211032ull;
+    cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull;
+    cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+    cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull;
+    cpu->clidr             = 0x0000001482000023ull;
+    cpu->gm_blocksize      = 4;
+    cpu->ctr               = 0x000000049444c004ull;
+    cpu->dcz_blocksize     = 4;
+    /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_0006_003f */
+
+    /* Section B.5.2: PMCR_EL0 */
+    cpu->isar.reset_pmcr_el0 = 0xa000;  /* with 20 counters */
+
+    /* Section B.6.7: ICH_VTR_EL2 */
+    cpu->gic_num_lrs = 4;
+    cpu->gic_vpribits = 5;
+    cpu->gic_vprebits = 5;
+    cpu->gic_pribits = 5;
+
+    /* Section 14: Scalable Vector Extensions support */
+    cpu->sve_vq.supported = 1 << 0;  /* 128bit */
+
+    /*
+     * The cortex-a710 TRM does not list CCSIDR values.  The layout of
+     * the caches are in text in Table 7-1, Table 8-1, and Table 9-1.
+     *
+     * L1: 4-way set associative 64-byte line size, total either 32K or 64K.
+     * L2: 8-way set associative 64 byte line size, total either 256K or 512K.
+     */
+    cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB);   /* L1 dcache */
+    cpu->ccsidr[1] = cpu->ccsidr[0];                   /* L1 icache */
+    cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB);  /* L2 cache */
+
+    /* FIXME: Not documented -- copied from neoverse-v1 */
+    cpu->reset_sctlr = 0x30c50838;
+
+    define_arm_cp_regs(cpu, cortex_a710_cp_reginfo);
+
+    aarch64_add_pauth_properties(obj);
+    aarch64_add_sve_properties(obj);
+}
+
 /*
  * -cpu max: a CPU with as many features enabled as our emulation supports.
  * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c;
@@ -803,6 +1014,8 @@ void aarch64_max_tcg_initfn(Object *obj)
 
     t = cpu->isar.id_aa64isar1;
     t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);      /* FEAT_DPB2 */
+    t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED);
+    t = FIELD_DP64(t, ID_AA64ISAR1, API, 1);
     t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);    /* FEAT_JSCVT */
     t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);     /* FEAT_FCMA */
     t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2);    /* FEAT_LRCPC2 */
@@ -858,6 +1071,7 @@ void aarch64_max_tcg_initfn(Object *obj)
     t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1);      /* FEAT_XNX */
     t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1);      /* FEAT_ETS */
     t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1);      /* FEAT_HCX */
+    t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1);   /* FEAT_TIDCP1 */
     cpu->isar.id_aa64mmfr1 = t;
 
     t = cpu->isar.id_aa64mmfr2;
@@ -934,6 +1148,7 @@ static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a55",         .initfn = aarch64_a55_initfn },
     { .name = "cortex-a72",         .initfn = aarch64_a72_initfn },
     { .name = "cortex-a76",         .initfn = aarch64_a76_initfn },
+    { .name = "cortex-a710",        .initfn = aarch64_a710_initfn },
     { .name = "a64fx",              .initfn = aarch64_a64fx_initfn },
     { .name = "neoverse-n1",        .initfn = aarch64_neoverse_n1_initfn },
     { .name = "neoverse-v1",        .initfn = aarch64_neoverse_v1_initfn },
diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c
index fdd70abbfd..7cadd61e12 100644
--- a/target/arm/tcg/crypto_helper.c
+++ b/target/arm/tcg/crypto_helper.c
@@ -614,10 +614,7 @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
             CR_ST_WORD(d, (i + 3) % 4) ^
             CR_ST_WORD(n, i);
 
-        t = sm4_sbox[t & 0xff] |
-            sm4_sbox[(t >> 8) & 0xff] << 8 |
-            sm4_sbox[(t >> 16) & 0xff] << 16 |
-            sm4_sbox[(t >> 24) & 0xff] << 24;
+        t = sm4_subword(t);
 
         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
                             rol32(t, 24);
@@ -651,10 +648,7 @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
             CR_ST_WORD(d, (i + 3) % 4) ^
             CR_ST_WORD(m, i);
 
-        t = sm4_sbox[t & 0xff] |
-            sm4_sbox[(t >> 8) & 0xff] << 8 |
-            sm4_sbox[(t >> 16) & 0xff] << 16 |
-            sm4_sbox[(t >> 24) & 0xff] << 24;
+        t = sm4_subword(t);
 
         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
     }
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index 3d5957c11f..57cfd68569 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -90,9 +90,13 @@ DEF_HELPER_FLAGS_3(pacda, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(pacdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(pacga, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autia, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autia_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autib, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autib_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autda_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autdb_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 403b345ea3..c666a96ba1 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -26,6 +26,7 @@
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "fpu/softfloat.h"
+#include "crypto/clmul.h"
 
 static uint16_t mve_eci_mask(CPUARMState *env)
 {
@@ -984,17 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
  * Polynomial multiply. We can always do this generating 64 bits
  * of the result at a time, so we don't need to use DO_2OP_L.
  */
-#define VMULLPH_MASK 0x00ff00ff00ff00ffULL
-#define VMULLPW_MASK 0x0000ffff0000ffffULL
-#define DO_VMULLPBH(N, M) pmull_h((N) & VMULLPH_MASK, (M) & VMULLPH_MASK)
-#define DO_VMULLPTH(N, M) DO_VMULLPBH((N) >> 8, (M) >> 8)
-#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
-#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)
-
-DO_2OP(vmullpbh, 8, uint64_t, DO_VMULLPBH)
-DO_2OP(vmullpth, 8, uint64_t, DO_VMULLPTH)
-DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
-DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
+DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
+DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
+DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
+DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)
 
 /*
  * Because the computation type is at least twice as large as required,
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index 3baf8004f6..403f8b09d3 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -764,6 +764,39 @@ const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key)
     return ri;
 }
 
+/*
+ * Test for HCR_EL2.TIDCP at EL1.
+ * Since implementation defined registers are rare, and within QEMU
+ * most of them are no-op, do not waste HFLAGS space for this and
+ * always use a helper.
+ */
+void HELPER(tidcp_el1)(CPUARMState *env, uint32_t syndrome)
+{
+    if (arm_hcr_el2_eff(env) & HCR_TIDCP) {
+        raise_exception_ra(env, EXCP_UDEF, syndrome, 2, GETPC());
+    }
+}
+
+/*
+ * Similarly, for FEAT_TIDCP1 at EL0.
+ * We have already checked for the presence of the feature.
+ */
+void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome)
+{
+    /* See arm_sctlr(), but we also need the sctlr el. */
+    ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
+    int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+
+    /*
+     * The bit is not valid unless the target el is aa64, but since the
+     * bit test is simpler perform that first and check validity after.
+     */
+    if ((env->cp15.sctlr_el[target_el] & SCTLR_TIDCP)
+        && arm_el_is_aa64(env, target_el)) {
+        raise_exception_ra(env, EXCP_UDEF, syndrome, target_el, GETPC());
+    }
+}
+
 void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value)
 {
     const ARMCPRegInfo *ri = rip;
diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c
index 62af569341..4da2962ad5 100644
--- a/target/arm/tcg/pauth_helper.c
+++ b/target/arm/tcg/pauth_helper.c
@@ -96,6 +96,21 @@ static uint64_t pac_sub(uint64_t i)
     return o;
 }
 
+static uint64_t pac_sub1(uint64_t i)
+{
+    static const uint8_t sub1[16] = {
+        0xa, 0xd, 0xe, 0x6, 0xf, 0x7, 0x3, 0x5,
+        0x9, 0x8, 0x0, 0xc, 0xb, 0x1, 0x2, 0x4,
+    };
+    uint64_t o = 0;
+    int b;
+
+    for (b = 0; b < 64; b += 4) {
+        o |= (uint64_t)sub1[(i >> b) & 0xf] << b;
+    }
+    return o;
+}
+
 static uint64_t pac_inv_sub(uint64_t i)
 {
     static const uint8_t inv_sub[16] = {
@@ -209,7 +224,7 @@ static uint64_t tweak_inv_shuffle(uint64_t i)
 }
 
 static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
-                                             ARMPACKey key)
+                                             ARMPACKey key, bool isqarma3)
 {
     static const uint64_t RC[5] = {
         0x0000000000000000ull,
@@ -219,6 +234,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
         0x452821E638D01377ull,
     };
     const uint64_t alpha = 0xC0AC29B7C97C50DDull;
+    int iterations = isqarma3 ? 2 : 4;
     /*
      * Note that in the ARM pseudocode, key0 contains bits <127:64>
      * and key1 contains bits <63:0> of the 128-bit key.
@@ -231,7 +247,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
     runningmod = modifier;
     workingval = data ^ key0;
 
-    for (i = 0; i <= 4; ++i) {
+    for (i = 0; i <= iterations; ++i) {
         roundkey = key1 ^ runningmod;
         workingval ^= roundkey;
         workingval ^= RC[i];
@@ -239,32 +255,48 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
             workingval = pac_cell_shuffle(workingval);
             workingval = pac_mult(workingval);
         }
-        workingval = pac_sub(workingval);
+        if (isqarma3) {
+            workingval = pac_sub1(workingval);
+        } else {
+            workingval = pac_sub(workingval);
+        }
         runningmod = tweak_shuffle(runningmod);
     }
     roundkey = modk0 ^ runningmod;
     workingval ^= roundkey;
     workingval = pac_cell_shuffle(workingval);
     workingval = pac_mult(workingval);
-    workingval = pac_sub(workingval);
+    if (isqarma3) {
+        workingval = pac_sub1(workingval);
+    } else {
+        workingval = pac_sub(workingval);
+    }
     workingval = pac_cell_shuffle(workingval);
     workingval = pac_mult(workingval);
     workingval ^= key1;
     workingval = pac_cell_inv_shuffle(workingval);
-    workingval = pac_inv_sub(workingval);
+    if (isqarma3) {
+        workingval = pac_sub1(workingval);
+    } else {
+        workingval = pac_inv_sub(workingval);
+    }
     workingval = pac_mult(workingval);
     workingval = pac_cell_inv_shuffle(workingval);
     workingval ^= key0;
     workingval ^= runningmod;
-    for (i = 0; i <= 4; ++i) {
-        workingval = pac_inv_sub(workingval);
-        if (i < 4) {
+    for (i = 0; i <= iterations; ++i) {
+        if (isqarma3) {
+            workingval = pac_sub1(workingval);
+        } else {
+            workingval = pac_inv_sub(workingval);
+        }
+        if (i < iterations) {
             workingval = pac_mult(workingval);
             workingval = pac_cell_inv_shuffle(workingval);
         }
         runningmod = tweak_inv_shuffle(runningmod);
         roundkey = key1 ^ runningmod;
-        workingval ^= RC[4 - i];
+        workingval ^= RC[iterations - i];
         workingval ^= roundkey;
         workingval ^= alpha;
     }
@@ -282,8 +314,10 @@ static uint64_t pauth_computepac_impdef(uint64_t data, uint64_t modifier,
 static uint64_t pauth_computepac(CPUARMState *env, uint64_t data,
                                  uint64_t modifier, ARMPACKey key)
 {
-    if (cpu_isar_feature(aa64_pauth_arch, env_archcpu(env))) {
-        return pauth_computepac_architected(data, modifier, key);
+    if (cpu_isar_feature(aa64_pauth_qarma5, env_archcpu(env))) {
+        return pauth_computepac_architected(data, modifier, key, false);
+    } else if (cpu_isar_feature(aa64_pauth_qarma3, env_archcpu(env))) {
+        return pauth_computepac_architected(data, modifier, key, true);
     } else {
         return pauth_computepac_impdef(data, modifier, key);
     }
@@ -292,8 +326,10 @@ static uint64_t pauth_computepac(CPUARMState *env, uint64_t data,
 static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
                              ARMPACKey *key, bool data)
 {
+    ARMCPU *cpu = env_archcpu(env);
     ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
     ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false);
+    ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu);
     uint64_t pac, ext_ptr, ext, test;
     int bot_bit, top_bit;
 
@@ -317,17 +353,26 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
      */
     test = sextract64(ptr, bot_bit, top_bit - bot_bit);
     if (test != 0 && test != -1) {
-        /*
-         * Note that our top_bit is one greater than the pseudocode's
-         * version, hence "- 2" here.
-         */
-        pac ^= MAKE_64BIT_MASK(top_bit - 2, 1);
+        if (pauth_feature >= PauthFeat_2) {
+            /* No action required */
+        } else if (pauth_feature == PauthFeat_EPAC) {
+            pac = 0;
+        } else {
+            /*
+             * Note that our top_bit is one greater than the pseudocode's
+             * version, hence "- 2" here.
+             */
+            pac ^= MAKE_64BIT_MASK(top_bit - 2, 1);
+        }
     }
 
     /*
      * Preserve the determination between upper and lower at bit 55,
      * and insert pointer authentication code.
      */
+    if (pauth_feature >= PauthFeat_2) {
+        pac ^= ptr;
+    }
     if (param.tbi) {
         ptr &= ~MAKE_64BIT_MASK(bot_bit, 55 - bot_bit + 1);
         pac &= MAKE_64BIT_MASK(bot_bit, 54 - bot_bit + 1);
@@ -351,21 +396,46 @@ static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param)
     }
 }
 
-static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
-                           ARMPACKey *key, bool data, int keynumber)
+static G_NORETURN
+void pauth_fail_exception(CPUARMState *env, bool data,
+                          int keynumber, uintptr_t ra)
 {
+    raise_exception_ra(env, EXCP_UDEF, syn_pacfail(data, keynumber),
+                       exception_target_el(env), ra);
+}
+
+static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
+                           ARMPACKey *key, bool data, int keynumber,
+                           uintptr_t ra, bool is_combined)
+{
+    ARMCPU *cpu = env_archcpu(env);
     ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
     ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false);
+    ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu);
     int bot_bit, top_bit;
-    uint64_t pac, orig_ptr, test;
+    uint64_t pac, orig_ptr, cmp_mask;
 
     orig_ptr = pauth_original_ptr(ptr, param);
     pac = pauth_computepac(env, orig_ptr, modifier, *key);
     bot_bit = 64 - param.tsz;
     top_bit = 64 - 8 * param.tbi;
 
-    test = (pac ^ ptr) & ~MAKE_64BIT_MASK(55, 1);
-    if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) {
+    cmp_mask = MAKE_64BIT_MASK(bot_bit, top_bit - bot_bit);
+    cmp_mask &= ~MAKE_64BIT_MASK(55, 1);
+
+    if (pauth_feature >= PauthFeat_2) {
+        ARMPauthFeature fault_feature =
+            is_combined ? PauthFeat_FPACCOMBINED : PauthFeat_FPAC;
+        uint64_t result = ptr ^ (pac & cmp_mask);
+
+        if (pauth_feature >= fault_feature
+            && ((result ^ sextract64(result, 55, 1)) & cmp_mask)) {
+            pauth_fail_exception(env, data, keynumber, ra);
+        }
+        return result;
+    }
+
+    if ((pac ^ ptr) & cmp_mask) {
         int error_code = (keynumber << 1) | (keynumber ^ 1);
         if (param.tbi) {
             return deposit64(orig_ptr, 53, 2, error_code);
@@ -466,44 +536,88 @@ uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x, uint64_t y)
     return pac & 0xffffffff00000000ull;
 }
 
-uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y)
+static uint64_t pauth_autia(CPUARMState *env, uint64_t x, uint64_t y,
+                            uintptr_t ra, bool is_combined)
 {
     int el = arm_current_el(env);
     if (!pauth_key_enabled(env, el, SCTLR_EnIA)) {
         return x;
     }
-    pauth_check_trap(env, el, GETPC());
-    return pauth_auth(env, x, y, &env->keys.apia, false, 0);
+    pauth_check_trap(env, el, ra);
+    return pauth_auth(env, x, y, &env->keys.apia, false, 0, ra, is_combined);
 }
 
-uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y)
+uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autia(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autia_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autia(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autib(CPUARMState *env, uint64_t x, uint64_t y,
+                            uintptr_t ra, bool is_combined)
 {
     int el = arm_current_el(env);
     if (!pauth_key_enabled(env, el, SCTLR_EnIB)) {
         return x;
     }
-    pauth_check_trap(env, el, GETPC());
-    return pauth_auth(env, x, y, &env->keys.apib, false, 1);
+    pauth_check_trap(env, el, ra);
+    return pauth_auth(env, x, y, &env->keys.apib, false, 1, ra, is_combined);
 }
 
-uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y)
+uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autib(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autib_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autib(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autda(CPUARMState *env, uint64_t x, uint64_t y,
+                            uintptr_t ra, bool is_combined)
 {
     int el = arm_current_el(env);
     if (!pauth_key_enabled(env, el, SCTLR_EnDA)) {
         return x;
     }
-    pauth_check_trap(env, el, GETPC());
-    return pauth_auth(env, x, y, &env->keys.apda, true, 0);
+    pauth_check_trap(env, el, ra);
+    return pauth_auth(env, x, y, &env->keys.apda, true, 0, ra, is_combined);
 }
 
-uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y)
+uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autda(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autda_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autda(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autdb(CPUARMState *env, uint64_t x, uint64_t y,
+                            uintptr_t ra, bool is_combined)
 {
     int el = arm_current_el(env);
     if (!pauth_key_enabled(env, el, SCTLR_EnDB)) {
         return x;
     }
-    pauth_check_trap(env, el, GETPC());
-    return pauth_auth(env, x, y, &env->keys.apdb, true, 1);
+    pauth_check_trap(env, el, ra);
+    return pauth_auth(env, x, y, &env->keys.apdb, true, 1, ra, is_combined);
+}
+
+uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autdb(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autdb_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+    return pauth_autdb(env, x, y, GETPC(), true);
 }
 
 uint64_t HELPER(xpaci)(CPUARMState *env, uint64_t a)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 75c4e7e2ad..6c593fd21f 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1530,9 +1530,9 @@ static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
 
     truedst = tcg_temp_new_i64();
     if (use_key_a) {
-        gen_helper_autia(truedst, cpu_env, dst, modifier);
+        gen_helper_autia_combined(truedst, cpu_env, dst, modifier);
     } else {
-        gen_helper_autib(truedst, cpu_env, dst, modifier);
+        gen_helper_autib_combined(truedst, cpu_env, dst, modifier);
     }
     return truedst;
 }
@@ -2154,6 +2154,25 @@ static void handle_sys(DisasContext *s, bool isread,
     bool need_exit_tb = false;
     TCGv_ptr tcg_ri = NULL;
     TCGv_i64 tcg_rt;
+    uint32_t syndrome;
+
+    if (crn == 11 || crn == 15) {
+        /*
+         * Check for TIDCP trap, which must take precedence over
+         * the UNDEF for "no such register" etc.
+         */
+        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
+        switch (s->current_el) {
+        case 0:
+            if (dc_isar_feature(aa64_tidcp1, s)) {
+                gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome));
+            }
+            break;
+        case 1:
+            gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome));
+            break;
+        }
+    }
 
     if (!ri) {
         /* Unknown register; this might be a guest error or a QEMU
@@ -2176,8 +2195,6 @@ static void handle_sys(DisasContext *s, bool isread,
         /* Emit code to perform further access permissions checks at
          * runtime; this may result in an exception.
          */
-        uint32_t syndrome;
-
         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
         gen_a64_update_pc(s, 0);
         tcg_ri = tcg_temp_new_ptr();
@@ -3020,37 +3037,17 @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
     }
 
-    if (!s->ata) {
-        /*
-         * TODO: We could rely on the stores below, at least for
-         * system mode, if we arrange to add MO_ALIGN_16.
-         */
-        gen_helper_stg_stub(cpu_env, dirty_addr);
-    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
-    } else {
-        gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
-    }
-
-    mop = finalize_memop(s, MO_64);
-    clean_addr = gen_mte_checkN(s, dirty_addr, true, false, 2 << MO_64, mop);
-
+    clean_addr = clean_data_tbi(s, dirty_addr);
     tcg_rt = cpu_reg(s, a->rt);
     tcg_rt2 = cpu_reg(s, a->rt2);
 
     /*
-     * STGP is defined as two 8-byte memory operations and one tag operation.
-     * We implement it as one single 16-byte memory operation for convenience.
-     * Rebuild mop as for STP.
-     * TODO: The atomicity with LSE2 is stronger than required.
-     * Need a form of MO_ATOM_WITHIN16_PAIR that never requires
-     * 16-byte atomicity.
+     * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
+     * and one tag operation.  We implement it as one single aligned 16-byte
+     * memory operation for convenience.  Note that the alignment ensures
+     * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
      */
-    mop = MO_128;
-    if (s->align_mem) {
-        mop |= MO_ALIGN_8;
-    }
-    mop = finalize_memop_pair(s, mop);
+    mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
 
     tmp = tcg_temp_new_i128();
     if (s->be_data == MO_LE) {
@@ -3060,6 +3057,15 @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
     }
     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
 
+    /* Perform the tag store, if tag access enabled. */
+    if (s->ata) {
+        if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
+        } else {
+            gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
+        }
+    }
+
     op_addr_ldstpair_post(s, a, dirty_addr, offset);
     return true;
 }
@@ -3352,11 +3358,11 @@ static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
 
     if (s->pauth_active) {
         if (!a->m) {
-            gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
-                             tcg_constant_i64(0));
+            gen_helper_autda_combined(dirty_addr, cpu_env, dirty_addr,
+                                      tcg_constant_i64(0));
         } else {
-            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
-                             tcg_constant_i64(0));
+            gen_helper_autdb_combined(dirty_addr, cpu_env, dirty_addr,
+                                      tcg_constant_i64(0));
         }
     }
 
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index bd82213634..bfd444e87e 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -2949,54 +2949,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
 }
 
-#define GEN_CMP0(NAME, COND)                                            \
-    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
-    {                                                                   \
-        tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0));        \
-    }                                                                   \
-    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
-    {                                                                   \
-        tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0));        \
-    }                                                                   \
-    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
-    {                                                                   \
-        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
-        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
-    }                                                                   \
-    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
-                            uint32_t opr_sz, uint32_t max_sz)           \
-    {                                                                   \
-        const GVecGen2 op[4] = {                                        \
-            { .fno = gen_helper_gvec_##NAME##0_b,                       \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_8 },                                           \
-            { .fno = gen_helper_gvec_##NAME##0_h,                       \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_16 },                                          \
-            { .fni4 = gen_##NAME##0_i32,                                \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_32 },                                          \
-            { .fni8 = gen_##NAME##0_i64,                                \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
-              .vece = MO_64 },                                          \
-        };                                                              \
-        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
-    }
+#define GEN_CMP0(NAME, COND)                              \
+    void NAME(unsigned vece, uint32_t d, uint32_t m,      \
+              uint32_t opr_sz, uint32_t max_sz)           \
+    { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
 
-static const TCGOpcode vecop_list_cmp[] = {
-    INDEX_op_cmp_vec, 0
-};
-
-GEN_CMP0(ceq, TCG_COND_EQ)
-GEN_CMP0(cle, TCG_COND_LE)
-GEN_CMP0(cge, TCG_COND_GE)
-GEN_CMP0(clt, TCG_COND_LT)
-GEN_CMP0(cgt, TCG_COND_GT)
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
 
 #undef GEN_CMP0
 
@@ -4544,6 +4506,20 @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 }
 
+static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
+{
+    static const uint16_t mask[3] = {
+        0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
+        0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
+        0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
+    };
+
+    if (crn >= 9 && crn <= 11) {
+        return (mask[crn - 9] >> crm) & 1;
+    }
+    return false;
+}
+
 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
                            int opc1, int crn, int crm, int opc2,
                            bool isread, int rt, int rt2)
@@ -4625,6 +4601,25 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
         }
     }
 
+    if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
+        /*
+         * Check for TIDCP trap, which must take precedence over the UNDEF
+         * for "no such register" etc.  It shares precedence with HSTR,
+         * but raises the same exception, so order doesn't matter.
+         */
+        switch (s->current_el) {
+        case 0:
+            if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
+                && dc_isar_feature(aa64_tidcp1, s)) {
+                gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome));
+            }
+            break;
+        case 1:
+            gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome));
+            break;
+        }
+    }
+
     if (!ri) {
         /*
          * Unknown register; this might be a guest error or a QEMU
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 6712a2c790..1f93510b85 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -23,6 +23,7 @@
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
 #include "qemu/int128.h"
+#include "crypto/clmul.h"
 #include "vec_internal.h"
 
 /*
@@ -1986,21 +1987,11 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
  */
 void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t i, opr_sz = simd_oprsz(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        uint64_t nn = n[i];
-        uint64_t mm = m[i];
-        uint64_t rr = 0;
-
-        for (j = 0; j < 8; ++j) {
-            uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
-            rr ^= mm & mask;
-            mm = (mm << 1) & 0xfefefefefefefefeull;
-            nn >>= 1;
-        }
-        d[i] = rr;
+        d[i] = clmul_8x8_low(n[i], m[i]);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
@@ -2012,84 +2003,28 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
  */
 void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t i, opr_sz = simd_oprsz(desc);
     intptr_t hi = simd_data(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; i += 2) {
-        uint64_t nn = n[i + hi];
-        uint64_t mm = m[i + hi];
-        uint64_t rhi = 0;
-        uint64_t rlo = 0;
-
-        /* Bit 0 can only influence the low 64-bit result.  */
-        if (nn & 1) {
-            rlo = mm;
-        }
-
-        for (j = 1; j < 64; ++j) {
-            uint64_t mask = -((nn >> j) & 1);
-            rlo ^= (mm << j) & mask;
-            rhi ^= (mm >> (64 - j)) & mask;
-        }
-        d[i] = rlo;
-        d[i + 1] = rhi;
+        Int128 r = clmul_64(n[i + hi], m[i + hi]);
+        d[i] = int128_getlo(r);
+        d[i + 1] = int128_gethi(r);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-/*
- * 8x8->16 polynomial multiply.
- *
- * The byte inputs are expanded to (or extracted from) half-words.
- * Note that neon and sve2 get the inputs from different positions.
- * This allows 4 bytes to be processed in parallel with uint64_t.
- */
-
-static uint64_t expand_byte_to_half(uint64_t x)
-{
-    return  (x & 0x000000ff)
-         | ((x & 0x0000ff00) << 8)
-         | ((x & 0x00ff0000) << 16)
-         | ((x & 0xff000000) << 24);
-}
-
-uint64_t pmull_w(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-    for (i = 0; i < 16; ++i) {
-        uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff;
-        result ^= op2 & mask;
-        op1 >>= 1;
-        op2 <<= 1;
-    }
-    return result;
-}
-
-uint64_t pmull_h(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-    for (i = 0; i < 8; ++i) {
-        uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff;
-        result ^= op2 & mask;
-        op1 >>= 1;
-        op2 <<= 1;
-    }
-    return result;
-}
-
 void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     int hi = simd_data(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
     uint64_t nn = n[hi], mm = m[hi];
 
-    d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+    d[0] = clmul_8x4_packed(nn, mm);
     nn >>= 32;
     mm >>= 32;
-    d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+    d[1] = clmul_8x4_packed(nn, mm);
 
     clear_tail(d, 16, simd_maxsz(desc));
 }
@@ -2102,25 +2037,10 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull;
-        uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull;
-
-        d[i] = pmull_h(nn, mm);
+        d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift);
     }
 }
 
-static uint64_t pmull_d(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-
-    for (i = 0; i < 32; ++i) {
-        uint64_t mask = -((op1 >> i) & 1);
-        result ^= (op2 << i) & mask;
-    }
-    return result;
-}
-
 void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     intptr_t sel = H4(simd_data(desc));
@@ -2129,7 +2049,7 @@ void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
     uint64_t *d = vd;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        d[i] = pmull_d(n[2 * i + sel], m[2 * i + sel]);
+        d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]);
     }
 }
 #endif
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index 1f4ed80ff7..3ca1b94ccf 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -219,17 +219,6 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
 
-/*
- * 8 x 8 -> 16 vector polynomial multiply where the inputs are
- * in the low 8 bits of each 16-bit element
-*/
-uint64_t pmull_h(uint64_t op1, uint64_t op2);
-/*
- * 16 x 16 -> 32 vector polynomial multiply where the inputs are
- * in the low 16 bits of each 32-bit element
- */
-uint64_t pmull_w(uint64_t op1, uint64_t op2);
-
 /**
  * bfdotadd:
  * @sum: addend
diff --git a/target/avr/helper.c b/target/avr/helper.c
index e6e7d51487..fdc9884ea0 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -24,6 +24,7 @@
 #include "cpu.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/address-spaces.h"
 #include "exec/helper-proto.h"
 
diff --git a/target/cris/op_helper.c b/target/cris/op_helper.c
index 40cb74ce73..98a9aaf504 100644
--- a/target/cris/op_helper.c
+++ b/target/cris/op_helper.c
@@ -24,7 +24,6 @@
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 
 //#define CRIS_OP_HELPER_DEBUG
 
diff --git a/target/hexagon/README b/target/hexagon/README
index 43811178e9..e757bcb64a 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -239,7 +239,7 @@ helper_funcs_generated.c.inc.  There are also several helpers used for debugging
 
 VLIW packet semantics differ from serial semantics in that all input operands
 are read, then the operations are performed, then all the results are written.
-For exmaple, this packet performs a swap of registers r0 and r1
+For example, this packet performs a swap of registers r0 and r1
     { r0 = r1; r1 = r0 }
 Note that the result is different if the instructions are executed serially.
 
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index d3b45d494f..05a56d8c10 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -415,7 +415,7 @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
      * We want to normalize left until we have a leading one in bit 24 \
      * Theoretically, we only need to shift a maximum of one to the left if we \
      * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
-     * shoudl be 0  \
+     * should be 0  \
      */ \
     while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
         a = accum_norm_left(a); \
diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst
index debeddfde5..d0aa34309b 100644
--- a/target/hexagon/idef-parser/README.rst
+++ b/target/hexagon/idef-parser/README.rst
@@ -440,7 +440,7 @@ interested part of the grammar.
 
 Run-time errors can be divided between lexing and parsing errors, lexing errors
 are hard to detect, since the ``var`` token will catch everything which is not
-catched by other tokens, but easy to fix, because most of the time a simple
+caught by other tokens, but easy to fix, because most of the time a simple
 regex editing will be enough.
 
 idef-parser features a fancy parsing error reporting scheme, which for each
diff --git a/target/hexagon/idef-parser/idef-parser.h b/target/hexagon/idef-parser/idef-parser.h
index d23e71f13b..3faa1deecd 100644
--- a/target/hexagon/idef-parser/idef-parser.h
+++ b/target/hexagon/idef-parser/idef-parser.h
@@ -73,7 +73,7 @@ typedef struct HexTmp {
 } HexTmp;
 
 /**
- * Enum of the possible immediated, an immediate is a value which is known
+ * Enum of the possible immediate, an immediate is a value which is known
  * at tinycode generation time, e.g. an integer value, not a TCGv
  */
 enum ImmUnionTag {
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
index 7b5ebafec2..ec43343801 100644
--- a/target/hexagon/idef-parser/parser-helpers.c
+++ b/target/hexagon/idef-parser/parser-helpers.c
@@ -459,7 +459,7 @@ static bool try_find_variable(Context *c, YYLTYPE *locp,
     return false;
 }
 
-/* Calls `try_find_variable` and asserts succcess. */
+/* Calls `try_find_variable` and asserts success. */
 static void find_variable(Context *c, YYLTYPE *locp,
                           HexValue *dst,
                           HexValue *varid)
@@ -549,7 +549,7 @@ HexValue gen_bin_cmp(Context *c,
             ");\n");
         break;
     default:
-        fprintf(stderr, "Error in evalutating immediateness!");
+        fprintf(stderr, "Error in evaluating immediateness!");
         abort();
     }
     return res;
@@ -1164,7 +1164,7 @@ void gen_rdeposit_op(Context *c,
 {
     /*
      * Otherwise if the width is not known, we fallback on reimplementing
-     * desposit in TCG.
+     * deposit in TCG.
      */
     HexValue begin_m = *begin;
     HexValue value_m = *value;
diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef
index 58477ae40a..12d2aac5d4 100644
--- a/target/hexagon/imported/alu.idef
+++ b/target/hexagon/imported/alu.idef
@@ -292,16 +292,16 @@ Q6INSN(A4_combineii,"Rdd32=combine(#s8,#U6)",ATTRIBS(),"Set two small immediates
 
 
 Q6INSN(A2_combine_hh,"Rd32=combine(Rt.H32,Rs.H32)",ATTRIBS(),
-"Combine two halfs into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(1,RsV);})
+"Combine two halves into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(1,RsV);})
 
 Q6INSN(A2_combine_hl,"Rd32=combine(Rt.H32,Rs.L32)",ATTRIBS(),
-"Combine two halfs into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(0,RsV);})
+"Combine two halves into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(0,RsV);})
 
 Q6INSN(A2_combine_lh,"Rd32=combine(Rt.L32,Rs.H32)",ATTRIBS(),
-"Combine two halfs into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(1,RsV);})
+"Combine two halves into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(1,RsV);})
 
 Q6INSN(A2_combine_ll,"Rd32=combine(Rt.L32,Rs.L32)",ATTRIBS(),
-"Combine two halfs into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(0,RsV);})
+"Combine two halves into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(0,RsV);})
 
 Q6INSN(A2_tfril,"Rx.L32=#u16",ATTRIBS(),
 "Set low 16-bits, leave upper 16 unchanged",{ fSETHALF(0,RxV,uiV);})
diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def
index e23f91562e..4bbcfdd5e1 100755
--- a/target/hexagon/imported/macros.def
+++ b/target/hexagon/imported/macros.def
@@ -902,7 +902,7 @@ DEF_MACRO(
 )
 
 DEF_MACRO(
-    fEA_GPI, /* Calculate EA with Global Poitner + Immediate */
+    fEA_GPI, /* Calculate EA with Global Pointer + Immediate */
     do { EA=fREAD_GP()+IMM; fGP_DOCHKPAGECROSS(fREAD_GP(),EA); } while (0),
     ()
 )
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
index ead32c243b..98daabfb07 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -17,7 +17,7 @@
 
 /******************************************************************************
  *
- *     HOYA: MULTI MEDIA INSTRUCITONS
+ *     HOYA: MULTI MEDIA INSTRUCTIONS
  *
  ******************************************************************************/
 
@@ -295,7 +295,7 @@ MMVEC_COND_EACH_EA(vS32Ub,"Unaligned Vector Store",ATTRIBS(ATTR_VMEMU,A_STORE,A_
 
 MMVEC_EACH_EA(vS32b_new,"Aligned Vector Store New",ATTRIBS(ATTR_VMEM,A_STORE,A_CVI_NEW,A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY),,"vmem","=Os8.new",fSTOREMMV(EA,fNEWVREG(OsN)))
 
-// V65 store relase, zero byte store
+// V65 store release, zero byte store
 MMVEC_EACH_EA(vS32b_srls,"Aligned Vector Scatter Release",ATTRIBS(ATTR_VMEM,A_STORE,A_CVI_SCATTER_RELEASE,A_CVI_NEW,A_RESTRICT_SLOT0ONLY),,"vmem",":scatter_release",fSTORERELEASE(EA,0))
 
 
@@ -2045,11 +2045,11 @@ VxV.uw[0] = RtV;)
 
 
 
-ITERATOR_INSN_MPY_SLOT_LATE(32,lvsplatw, "Vd32=vsplat(Rt32)", "Replicates scalar accross words in vector", VdV.uw[i] = RtV)
+ITERATOR_INSN_MPY_SLOT_LATE(32,lvsplatw, "Vd32=vsplat(Rt32)", "Replicates scalar across words in vector", VdV.uw[i] = RtV)
 
-ITERATOR_INSN_MPY_SLOT_LATE(16,lvsplath, "Vd32.h=vsplat(Rt32)", "Replicates scalar accross halves in vector", VdV.uh[i] = RtV)
+ITERATOR_INSN_MPY_SLOT_LATE(16,lvsplath, "Vd32.h=vsplat(Rt32)", "Replicates scalar across halves in vector", VdV.uh[i] = RtV)
 
-ITERATOR_INSN_MPY_SLOT_LATE(8,lvsplatb, "Vd32.b=vsplat(Rt32)", "Replicates scalar accross bytes in vector", VdV.ub[i] = RtV)
+ITERATOR_INSN_MPY_SLOT_LATE(8,lvsplatb, "Vd32.b=vsplat(Rt32)", "Replicates scalar across bytes in vector", VdV.ub[i] = RtV)
 
 
 ITERATOR_INSN_ANY_SLOT(32,vassign,"Vd32=Vu32","Copy a vector",VdV.w[i]=VuV.w[i])
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 708339198e..c00254e4d5 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -23,7 +23,6 @@
 #include "exec/helper-gen.h"
 #include "exec/helper-proto.h"
 #include "exec/translation-block.h"
-#include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "internal.h"
 #include "attribs.h"
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index fa13694dab..730f35231a 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -211,8 +211,14 @@ typedef struct CPUArchState {
     target_ureg shadow[7];   /* shadow registers */
 
     /* ??? The number of entries isn't specified by the architecture.  */
+#ifdef TARGET_HPPA64
+#define HPPA_BTLB_FIXED         0       /* BTLBs are not supported in 64-bit machines */
+#else
+#define HPPA_BTLB_FIXED         16
+#endif
+#define HPPA_BTLB_VARIABLE      0
 #define HPPA_TLB_ENTRIES        256
-#define HPPA_BTLB_ENTRIES       0
+#define HPPA_BTLB_ENTRIES       (HPPA_BTLB_FIXED + HPPA_BTLB_VARIABLE)
 
     /* ??? Implement a unified itlb/dtlb for the moment.  */
     /* ??? We should use a more intelligent data structure.  */
@@ -344,7 +350,8 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 void hppa_cpu_do_interrupt(CPUState *cpu);
 bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req);
 int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
-                              int type, hwaddr *pphys, int *pprot);
+                              int type, hwaddr *pphys, int *pprot,
+                              hppa_tlb_entry **tlb_entry);
 extern const MemoryRegionOps hppa_io_eir_ops;
 extern const VMStateDescription vmstate_hppa_cpu;
 void hppa_cpu_alarm_timer(void *);
diff --git a/target/hppa/helper.h b/target/hppa/helper.h
index c7e35ce8c7..647f043c85 100644
--- a/target/hppa/helper.h
+++ b/target/hppa/helper.h
@@ -95,4 +95,5 @@ DEF_HELPER_FLAGS_2(ptlb, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_1(ptlbe, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(lpa, TCG_CALL_NO_WG, tr, env, tl)
 DEF_HELPER_FLAGS_1(change_prot_id, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_1(diag_btlb, void, env)
 #endif
diff --git a/target/hppa/insns.decode b/target/hppa/insns.decode
index 27341d27b2..aebe03ccfd 100644
--- a/target/hppa/insns.decode
+++ b/target/hppa/insns.decode
@@ -528,4 +528,4 @@ fdiv_d          001110 ..... ..... 011 ..... ... .....  @f0e_d_3
 xmpyu           001110 ..... ..... 010 .0111 .00 t:5    r1=%ra64 r2=%rb64
 
 # diag
-diag            000101 ----- ----- ---- ---- ---- ----
+diag            000101 i:26
diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c
index bebc732c97..3ab9934a1d 100644
--- a/target/hppa/int_helper.c
+++ b/target/hppa/int_helper.c
@@ -154,7 +154,7 @@ void hppa_cpu_do_interrupt(CPUState *cs)
 
                     vaddr = hppa_form_gva_psw(old_psw, iasq_f, vaddr);
                     t = hppa_get_physical_address(env, vaddr, MMU_KERNEL_IDX,
-                                                  0, &paddr, &prot);
+                                                  0, &paddr, &prot, NULL);
                     if (t >= 0) {
                         /* We can't re-load the instruction.  */
                         env->cr[CR_IIR] = 0;
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index 46c3dcaf15..520fd311f8 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -41,16 +41,24 @@ static hppa_tlb_entry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
     return NULL;
 }
 
-static void hppa_flush_tlb_ent(CPUHPPAState *env, hppa_tlb_entry *ent)
+static void hppa_flush_tlb_ent(CPUHPPAState *env, hppa_tlb_entry *ent,
+                               bool force_flush_btlb)
 {
     CPUState *cs = env_cpu(env);
-    unsigned i, n = 1 << (2 * ent->page_size);
-    uint64_t addr = ent->va_b;
+
+    if (!ent->entry_valid) {
+        return;
+    }
 
     trace_hppa_tlb_flush_ent(env, ent, ent->va_b, ent->va_e, ent->pa);
 
-    for (i = 0; i < n; ++i, addr += TARGET_PAGE_SIZE) {
-        tlb_flush_page_by_mmuidx(cs, addr, HPPA_MMU_FLUSH_MASK);
+    tlb_flush_range_by_mmuidx(cs, ent->va_b,
+                                ent->va_e - ent->va_b + 1,
+                                HPPA_MMU_FLUSH_MASK, TARGET_LONG_BITS);
+
+    /* never clear BTLBs, unless forced to do so. */
+    if (ent < &env->tlb[HPPA_BTLB_ENTRIES] && !force_flush_btlb) {
+        return;
     }
 
     memset(ent, 0, sizeof(*ent));
@@ -60,23 +68,35 @@ static void hppa_flush_tlb_ent(CPUHPPAState *env, hppa_tlb_entry *ent)
 static hppa_tlb_entry *hppa_alloc_tlb_ent(CPUHPPAState *env)
 {
     hppa_tlb_entry *ent;
-    uint32_t i = env->tlb_last;
+    uint32_t i;
+
+    if (env->tlb_last < HPPA_BTLB_ENTRIES || env->tlb_last >= ARRAY_SIZE(env->tlb)) {
+        i = HPPA_BTLB_ENTRIES;
+        env->tlb_last = HPPA_BTLB_ENTRIES + 1;
+    } else {
+        i = env->tlb_last;
+        env->tlb_last++;
+    }
 
-    env->tlb_last = (i == ARRAY_SIZE(env->tlb) - 1 ? 0 : i + 1);
     ent = &env->tlb[i];
 
-    hppa_flush_tlb_ent(env, ent);
+    hppa_flush_tlb_ent(env, ent, false);
     return ent;
 }
 
 int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
-                              int type, hwaddr *pphys, int *pprot)
+                              int type, hwaddr *pphys, int *pprot,
+                              hppa_tlb_entry **tlb_entry)
 {
     hwaddr phys;
     int prot, r_prot, w_prot, x_prot, priv;
     hppa_tlb_entry *ent;
     int ret = -1;
 
+    if (tlb_entry) {
+        *tlb_entry = NULL;
+    }
+
     /* Virtual translation disabled.  Direct map virtual to physical.  */
     if (mmu_idx == MMU_PHYS_IDX) {
         phys = addr;
@@ -93,8 +113,12 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
         goto egress;
     }
 
+    if (tlb_entry) {
+        *tlb_entry = ent;
+    }
+
     /* We now know the physical address.  */
-    phys = ent->pa + (addr & ~TARGET_PAGE_MASK);
+    phys = ent->pa + (addr - ent->va_b);
 
     /* Map TLB access_rights field to QEMU protection.  */
     priv = MMU_IDX_TO_PRIV(mmu_idx);
@@ -193,7 +217,7 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     }
 
     excp = hppa_get_physical_address(&cpu->env, addr, MMU_KERNEL_IDX, 0,
-                                     &phys, &prot);
+                                     &phys, &prot, NULL);
 
     /* Since we're translating for debugging, the only error that is a
        hard error is no translation at all.  Otherwise, while a real cpu
@@ -207,6 +231,7 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
 {
     HPPACPU *cpu = HPPA_CPU(cs);
     CPUHPPAState *env = &cpu->env;
+    hppa_tlb_entry *ent;
     int prot, excp, a_prot;
     hwaddr phys;
 
@@ -223,7 +248,7 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
     }
 
     excp = hppa_get_physical_address(env, addr, mmu_idx,
-                                     a_prot, &phys, &prot);
+                                     a_prot, &phys, &prot, &ent);
     if (unlikely(excp >= 0)) {
         if (probe) {
             return false;
@@ -243,7 +268,7 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
                                 phys & TARGET_PAGE_MASK, size, type, mmu_idx);
     /* Success!  Store the translation into the QEMU TLB.  */
     tlb_set_page(cs, addr & TARGET_PAGE_MASK, phys & TARGET_PAGE_MASK,
-                 prot, mmu_idx, TARGET_PAGE_SIZE);
+                 prot, mmu_idx, TARGET_PAGE_SIZE << (ent ? 2 * ent->page_size : 0));
     return true;
 }
 
@@ -254,11 +279,11 @@ void HELPER(itlba)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
     int i;
 
     /* Zap any old entries covering ADDR; notice empty entries on the way.  */
-    for (i = 0; i < ARRAY_SIZE(env->tlb); ++i) {
+    for (i = HPPA_BTLB_ENTRIES; i < ARRAY_SIZE(env->tlb); ++i) {
         hppa_tlb_entry *ent = &env->tlb[i];
         if (ent->va_b <= addr && addr <= ent->va_e) {
             if (ent->entry_valid) {
-                hppa_flush_tlb_ent(env, ent);
+                hppa_flush_tlb_ent(env, ent, false);
             }
             if (!empty) {
                 empty = ent;
@@ -278,16 +303,8 @@ void HELPER(itlba)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
     trace_hppa_tlb_itlba(env, empty, empty->va_b, empty->va_e, empty->pa);
 }
 
-/* Insert (Insn/Data) TLB Protection.  Note this is PA 1.1 only.  */
-void HELPER(itlbp)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
+static void set_access_bits(CPUHPPAState *env, hppa_tlb_entry *ent, target_ureg reg)
 {
-    hppa_tlb_entry *ent = hppa_find_tlb(env, addr);
-
-    if (unlikely(ent == NULL)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n");
-        return;
-    }
-
     ent->access_id = extract32(reg, 1, 18);
     ent->u = extract32(reg, 19, 1);
     ent->ar_pl2 = extract32(reg, 20, 2);
@@ -301,6 +318,19 @@ void HELPER(itlbp)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
                          ent->ar_pl1, ent->ar_type, ent->b, ent->d, ent->t);
 }
 
+/* Insert (Insn/Data) TLB Protection.  Note this is PA 1.1 only.  */
+void HELPER(itlbp)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
+{
+    hppa_tlb_entry *ent = hppa_find_tlb(env, addr);
+
+    if (unlikely(ent == NULL)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n");
+        return;
+    }
+
+    set_access_bits(env, ent, reg);
+}
+
 /* Purge (Insn/Data) TLB.  This is explicitly page-based, and is
    synchronous across all processors.  */
 static void ptlb_work(CPUState *cpu, run_on_cpu_data data)
@@ -310,7 +340,7 @@ static void ptlb_work(CPUState *cpu, run_on_cpu_data data)
     hppa_tlb_entry *ent = hppa_find_tlb(env, addr);
 
     if (ent && ent->entry_valid) {
-        hppa_flush_tlb_ent(env, ent);
+        hppa_flush_tlb_ent(env, ent, false);
     }
 }
 
@@ -334,7 +364,10 @@ void HELPER(ptlb)(CPUHPPAState *env, target_ulong addr)
 void HELPER(ptlbe)(CPUHPPAState *env)
 {
     trace_hppa_tlb_ptlbe(env);
-    memset(env->tlb, 0, sizeof(env->tlb));
+    qemu_log_mask(CPU_LOG_MMU, "FLUSH ALL TLB ENTRIES\n");
+    memset(&env->tlb[HPPA_BTLB_ENTRIES], 0,
+        sizeof(env->tlb) - HPPA_BTLB_ENTRIES * sizeof(env->tlb[0]));
+    env->tlb_last = HPPA_BTLB_ENTRIES;
     tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK);
 }
 
@@ -356,7 +389,7 @@ target_ureg HELPER(lpa)(CPUHPPAState *env, target_ulong addr)
     int prot, excp;
 
     excp = hppa_get_physical_address(env, addr, MMU_KERNEL_IDX, 0,
-                                     &phys, &prot);
+                                     &phys, &prot, NULL);
     if (excp >= 0) {
         if (env->psw & PSW_Q) {
             /* ??? Needs tweaking for hppa64.  */
@@ -379,3 +412,95 @@ int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr)
     hppa_tlb_entry *ent = hppa_find_tlb(env, vaddr);
     return ent ? ent->ar_type : -1;
 }
+
+/*
+ * diag_btlb() emulates the PDC PDC_BLOCK_TLB firmware call to
+ * allow operating systems to modify the Block TLB (BTLB) entries.
+ * For implementation details see page 1-13 in
+ * https://parisc.wiki.kernel.org/images-parisc/e/ef/Pdc11-v0.96-Ch1-procs.pdf
+ */
+void HELPER(diag_btlb)(CPUHPPAState *env)
+{
+    unsigned int phys_page, len, slot;
+    int mmu_idx = cpu_mmu_index(env, 0);
+    uintptr_t ra = GETPC();
+    hppa_tlb_entry *btlb;
+    uint64_t virt_page;
+    uint32_t *vaddr;
+
+#ifdef TARGET_HPPA64
+    /* BTLBs are not supported on 64-bit CPUs */
+    env->gr[28] = -1; /* nonexistent procedure */
+    return;
+#endif
+    env->gr[28] = 0; /* PDC_OK */
+
+    switch (env->gr[25]) {
+    case 0:
+        /* return BTLB parameters */
+        qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_INFO\n");
+        vaddr = probe_access(env, env->gr[24], 4 * sizeof(target_ulong),
+                             MMU_DATA_STORE, mmu_idx, ra);
+        if (vaddr == NULL) {
+            env->gr[28] = -10; /* invalid argument */
+        } else {
+            vaddr[0] = cpu_to_be32(1);
+            vaddr[1] = cpu_to_be32(16 * 1024);
+            vaddr[2] = cpu_to_be32(HPPA_BTLB_FIXED);
+            vaddr[3] = cpu_to_be32(HPPA_BTLB_VARIABLE);
+        }
+        break;
+    case 1:
+        /* insert BTLB entry */
+        virt_page = env->gr[24];        /* upper 32 bits */
+        virt_page <<= 32;
+        virt_page |= env->gr[23];       /* lower 32 bits */
+        phys_page = env->gr[22];
+        len = env->gr[21];
+        slot = env->gr[19];
+        qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_INSERT "
+                    "0x%08llx-0x%08llx: vpage 0x%llx for phys page 0x%04x len %d "
+                    "into slot %d\n",
+                    (long long) virt_page << TARGET_PAGE_BITS,
+                    (long long) (virt_page + len) << TARGET_PAGE_BITS,
+                    (long long) virt_page, phys_page, len, slot);
+        if (slot < HPPA_BTLB_ENTRIES) {
+            btlb = &env->tlb[slot];
+            /* force flush of possibly existing BTLB entry */
+            hppa_flush_tlb_ent(env, btlb, true);
+            /* create new BTLB entry */
+            btlb->va_b = virt_page << TARGET_PAGE_BITS;
+            btlb->va_e = btlb->va_b + len * TARGET_PAGE_SIZE - 1;
+            btlb->pa = phys_page << TARGET_PAGE_BITS;
+            set_access_bits(env, btlb, env->gr[20]);
+            btlb->t = 0;
+            btlb->d = 1;
+        } else {
+            env->gr[28] = -10; /* invalid argument */
+        }
+        break;
+    case 2:
+        /* Purge BTLB entry */
+        slot = env->gr[22];
+        qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_PURGE slot %d\n",
+                                    slot);
+        if (slot < HPPA_BTLB_ENTRIES) {
+            btlb = &env->tlb[slot];
+            hppa_flush_tlb_ent(env, btlb, true);
+        } else {
+            env->gr[28] = -10; /* invalid argument */
+        }
+        break;
+    case 3:
+        /* Purge all BTLB entries */
+        qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_PURGE_ALL\n");
+        for (slot = 0; slot < HPPA_BTLB_ENTRIES; slot++) {
+            btlb = &env->tlb[slot];
+            hppa_flush_tlb_ent(env, btlb, true);
+        }
+        break;
+    default:
+        env->gr[28] = -2; /* nonexistent option */
+        break;
+    }
+}
diff --git a/target/hppa/op_helper.c b/target/hppa/op_helper.c
index f25a5a72aa..837e2b3117 100644
--- a/target/hppa/op_helper.c
+++ b/target/hppa/op_helper.c
@@ -179,7 +179,8 @@ target_ureg HELPER(probe)(CPUHPPAState *env, target_ulong addr,
         return 0;
     }
 
-    excp = hppa_get_physical_address(env, addr, level, 0, &phys, &prot);
+    excp = hppa_get_physical_address(env, addr, level, 0, &phys,
+                                     &prot, NULL);
     if (excp >= 0) {
         if (env->psw & PSW_Q) {
             /* ??? Needs tweaking for hppa64.  */
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index e3af668252..650bbcfe95 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -23,7 +23,6 @@
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
-#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
@@ -4043,9 +4042,18 @@ static bool trans_fmpyfadd_d(DisasContext *ctx, arg_fmpyfadd_d *a)
 
 static bool trans_diag(DisasContext *ctx, arg_diag *a)
 {
-    qemu_log_mask(LOG_UNIMP, "DIAG opcode ignored\n");
-    cond_free(&ctx->null_cond);
-    return true;
+    nullify_over(ctx);
+    CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
+#ifndef CONFIG_USER_ONLY
+    if (a->i == 0x100) {
+        /* emulate PDC BTLB, called by SeaBIOS-hppa */
+        gen_helper_diag_btlb(cpu_env);
+    } else
+#endif
+    {
+        qemu_log_mask(LOG_UNIMP, "DIAG opcode 0x%04x ignored\n", a->i);
+    }
+    return nullify_end(ctx);
 }
 
 static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index 28115edf44..2375e48178 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -19,9 +19,9 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "sysemu/kvm.h"
 #include "sysemu/xen.h"
 #include "sysemu/whpx.h"
-#include "kvm/kvm_i386.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-run-state.h"
 #include "qapi/qmp/qdict.h"
@@ -253,7 +253,7 @@ APICCommonClass *apic_get_class(Error **errp)
 
     /* TODO: in-kernel irqchip for hvf */
     if (kvm_enabled()) {
-        if (!kvm_apic_in_kernel()) {
+        if (!kvm_irqchip_in_kernel()) {
             error_setg(errp, "KVM does not support userspace APIC");
             return NULL;
         }
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 00f913b638..2589c8e929 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -980,7 +980,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .feat_names = {
             NULL, NULL, NULL, NULL,
             "avx-vnni-int8", "avx-ne-convert", NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            "amx-complex", NULL, NULL, NULL,
             NULL, NULL, "prefetchiti", NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
@@ -5340,7 +5340,7 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr)
     return name;
 }
 
-/* Compatibily hack to maintain legacy +-feat semantic,
+/* Compatibility hack to maintain legacy +-feat semantic,
  * where +-feat overwrites any feature set by
  * feat=on|feat even if the later is parsed after +-feat
  * (i.e. "-x2apic,x2apic=on" will result in x2apic disabled)
@@ -6154,6 +6154,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     case 7:
         /* Structured Extended Feature Flags Enumeration Leaf */
         if (count == 0) {
+            uint32_t eax_0_unused, ebx_0, ecx_0, edx_0_unused;
+
             /* Maximum ECX value for sub-leaves */
             *eax = env->cpuid_level_func7;
             *ebx = env->features[FEAT_7_0_EBX]; /* Feature flags */
@@ -6168,17 +6170,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
              * support enabling SGX and/or SGX flexible launch control,
              * then we need to update the VM's CPUID values accordingly.
              */
-            if ((*ebx & CPUID_7_0_EBX_SGX) &&
-                (!kvm_enabled() ||
-                 !(kvm_arch_get_supported_cpuid(cs->kvm_state, 0x7, 0, R_EBX) &
-                    CPUID_7_0_EBX_SGX))) {
+            x86_cpu_get_supported_cpuid(0x7, 0,
+                                        &eax_0_unused, &ebx_0,
+                                        &ecx_0, &edx_0_unused);
+            if ((*ebx & CPUID_7_0_EBX_SGX) && !(ebx_0 & CPUID_7_0_EBX_SGX)) {
                 *ebx &= ~CPUID_7_0_EBX_SGX;
             }
 
-            if ((*ecx & CPUID_7_0_ECX_SGX_LC) &&
-                (!(*ebx & CPUID_7_0_EBX_SGX) || !kvm_enabled() ||
-                 !(kvm_arch_get_supported_cpuid(cs->kvm_state, 0x7, 0, R_ECX) &
-                    CPUID_7_0_ECX_SGX_LC))) {
+            if ((*ecx & CPUID_7_0_ECX_SGX_LC)
+                    && (!(*ebx & CPUID_7_0_EBX_SGX) || !(ecx_0 & CPUID_7_0_ECX_SGX_LC))) {
                 *ecx &= ~CPUID_7_0_ECX_SGX_LC;
             }
         } else if (count == 1) {
@@ -6207,7 +6207,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0xA:
         /* Architectural Performance Monitoring Leaf */
-        if (accel_uses_host_cpuid() && cpu->enable_pmu) {
+        if (cpu->enable_pmu) {
             x86_cpu_get_supported_cpuid(0xA, count, eax, ebx, ecx, edx);
         } else {
             *eax = 0;
@@ -6247,8 +6247,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ebx &= 0xffff; /* The count doesn't need to be reliable. */
         break;
     case 0x1C:
-        if (accel_uses_host_cpuid() && cpu->enable_pmu &&
-            (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
+        if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
             x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx);
             *edx = 0;
         }
@@ -6304,7 +6303,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
              * The initial value of xcr0 and ebx == 0, On host without kvm
              * commit 412a3c41(e.g., CentOS 6), the ebx's value always == 0
              * even through guest update xcr0, this will crash some legacy guest
-             * (e.g., CentOS 6), So set ebx == ecx to workaroud it.
+             * (e.g., CentOS 6), So set ebx == ecx to workaround it.
              */
             *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0, false);
         } else if (count == 1) {
@@ -6322,9 +6321,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             } else {
                 *ecx &= ~XSTATE_ARCH_LBR_MASK;
             }
-        } else if (count == 0xf &&
-                   accel_uses_host_cpuid() && cpu->enable_pmu &&
-                   (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
+        } else if (count == 0xf && cpu->enable_pmu
+                   && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
             x86_cpu_get_supported_cpuid(0xD, count, eax, ebx, ecx, edx);
         } else if (count < ARRAY_SIZE(x86_ext_save_areas)) {
             const ExtSaveArea *esa = &x86_ext_save_areas[count];
@@ -7121,8 +7119,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
         env->cpuid_xlevel2 = env->cpuid_min_xlevel2;
     }
 
-    if (kvm_enabled()) {
-        kvm_hyperv_expand_features(cpu, errp);
+    if (kvm_enabled() && !kvm_hyperv_expand_features(cpu, errp)) {
+        return;
     }
 }
 
@@ -7152,14 +7150,14 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose)
         mark_unavailable_features(cpu, w, unavailable_features, prefix);
     }
 
-    if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) &&
-        kvm_enabled()) {
-        KVMState *s = CPU(cpu)->kvm_state;
-        uint32_t eax_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EAX);
-        uint32_t ebx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EBX);
-        uint32_t ecx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_ECX);
-        uint32_t eax_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EAX);
-        uint32_t ebx_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EBX);
+    if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
+        uint32_t eax_0, ebx_0, ecx_0, edx_0_unused;
+        uint32_t eax_1, ebx_1, ecx_1_unused, edx_1_unused;
+
+        x86_cpu_get_supported_cpuid(0x14, 0,
+                                    &eax_0, &ebx_0, &ecx_0, &edx_0_unused);
+        x86_cpu_get_supported_cpuid(0x14, 1,
+                                    &eax_1, &ebx_1, &ecx_1_unused, &edx_1_unused);
 
         if (!eax_0 ||
            ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) ||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index a6000e93bd..fe06a1b286 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -728,7 +728,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_EXT2_3DNOWEXT (1U << 30)
 #define CPUID_EXT2_3DNOW   (1U << 31)
 
-/* CPUID[8000_0001].EDX bits that are aliase of CPUID[1].EDX bits on AMD CPUs */
+/* CPUID[8000_0001].EDX bits that are aliases of CPUID[1].EDX bits on AMD CPUs */
 #define CPUID_EXT2_AMD_ALIASES (CPUID_EXT2_FPU | CPUID_EXT2_VME | \
                                 CPUID_EXT2_DE | CPUID_EXT2_PSE | \
                                 CPUID_EXT2_TSC | CPUID_EXT2_MSR | \
@@ -930,6 +930,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_1_EDX_AVX_VNNI_INT8     (1U << 4)
 /* AVX NE CONVERT Instructions */
 #define CPUID_7_1_EDX_AVX_NE_CONVERT    (1U << 5)
+/* AMX COMPLEX Instructions */
+#define CPUID_7_1_EDX_AMX_COMPLEX       (1U << 8)
 /* PREFETCHIT0/1 Instructions */
 #define CPUID_7_1_EDX_PREFETCHITI       (1U << 14)
 
@@ -2069,7 +2071,7 @@ hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                          MemTxAttrs *attrs);
 int cpu_get_pic_interrupt(CPUX86State *s);
 
-/* MSDOS compatibility mode FPU exception support */
+/* MS-DOS compatibility mode FPU exception support */
 void x86_register_ferr_irq(qemu_irq irq);
 void fpu_check_raise_ferr_irq(CPUX86State *s);
 void cpu_set_ignne(void);
diff --git a/target/i386/hax/hax-accel-ops.c b/target/i386/hax/hax-accel-ops.c
deleted file mode 100644
index 5031096760..0000000000
--- a/target/i386/hax/hax-accel-ops.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * QEMU HAX support
- *
- * Copyright IBM, Corp. 2008
- *           Red Hat, Inc. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *  Glauber Costa     <gcosta@redhat.com>
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
-#include "sysemu/runstate.h"
-#include "sysemu/cpus.h"
-#include "qemu/guest-random.h"
-
-#include "hax-accel-ops.h"
-
-static void *hax_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-    int r;
-
-    rcu_register_thread();
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    current_cpu = cpu;
-    hax_init_vcpu(cpu);
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    do {
-        if (cpu_can_run(cpu)) {
-            r = hax_smp_cpu_exec(cpu);
-            if (r == EXCP_DEBUG) {
-                cpu_handle_guest_debug(cpu);
-            }
-        }
-
-        qemu_wait_io_event(cpu);
-    } while (!cpu->unplug || cpu_can_run(cpu));
-    hax_vcpu_destroy(cpu);
-    cpu_thread_signal_destroyed(cpu);
-    rcu_unregister_thread();
-    return NULL;
-}
-
-static void hax_start_vcpu_thread(CPUState *cpu)
-{
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-
-    cpu->thread = g_new0(QemuThread, 1);
-    cpu->halt_cond = g_new0(QemuCond, 1);
-    qemu_cond_init(cpu->halt_cond);
-
-    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
-             cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, hax_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
-    assert(cpu->accel);
-#ifdef _WIN32
-    cpu->accel->hThread = qemu_thread_get_handle(cpu->thread);
-#endif
-}
-
-static void hax_accel_ops_class_init(ObjectClass *oc, void *data)
-{
-    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
-
-    ops->create_vcpu_thread = hax_start_vcpu_thread;
-    ops->kick_vcpu_thread = hax_kick_vcpu_thread;
-
-    ops->synchronize_post_reset = hax_cpu_synchronize_post_reset;
-    ops->synchronize_post_init = hax_cpu_synchronize_post_init;
-    ops->synchronize_state = hax_cpu_synchronize_state;
-    ops->synchronize_pre_loadvm = hax_cpu_synchronize_pre_loadvm;
-}
-
-static const TypeInfo hax_accel_ops_type = {
-    .name = ACCEL_OPS_NAME("hax"),
-
-    .parent = TYPE_ACCEL_OPS,
-    .class_init = hax_accel_ops_class_init,
-    .abstract = true,
-};
-
-static void hax_accel_ops_register_types(void)
-{
-    type_register_static(&hax_accel_ops_type);
-}
-type_init(hax_accel_ops_register_types);
diff --git a/target/i386/hax/hax-accel-ops.h b/target/i386/hax/hax-accel-ops.h
deleted file mode 100644
index 9e357e7b40..0000000000
--- a/target/i386/hax/hax-accel-ops.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Accelerator CPUS Interface
- *
- * Copyright 2020 SUSE LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TARGET_I386_HAX_ACCEL_OPS_H
-#define TARGET_I386_HAX_ACCEL_OPS_H
-
-#include "sysemu/cpus.h"
-
-#include "hax-interface.h"
-#include "hax-i386.h"
-
-int hax_init_vcpu(CPUState *cpu);
-int hax_smp_cpu_exec(CPUState *cpu);
-int hax_populate_ram(uint64_t va, uint64_t size);
-
-void hax_cpu_synchronize_state(CPUState *cpu);
-void hax_cpu_synchronize_post_reset(CPUState *cpu);
-void hax_cpu_synchronize_post_init(CPUState *cpu);
-void hax_cpu_synchronize_pre_loadvm(CPUState *cpu);
-
-int hax_vcpu_destroy(CPUState *cpu);
-void hax_raise_event(CPUState *cpu);
-void hax_reset_vcpu_state(void *opaque);
-
-#endif /* TARGET_I386_HAX_ACCEL_OPS_H */
diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
deleted file mode 100644
index 18d78e5b6b..0000000000
--- a/target/i386/hax/hax-all.c
+++ /dev/null
@@ -1,1141 +0,0 @@
-/*
- * QEMU HAX support
- *
- * Copyright IBM, Corp. 2008
- *           Red Hat, Inc. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *  Glauber Costa     <gcosta@redhat.com>
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-/*
- * HAX common code for both windows and darwin
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
-
-#include "qemu/accel.h"
-#include "sysemu/reset.h"
-#include "sysemu/runstate.h"
-#include "hw/boards.h"
-
-#include "hax-accel-ops.h"
-
-#define DEBUG_HAX 0
-
-#define DPRINTF(fmt, ...) \
-    do { \
-        if (DEBUG_HAX) { \
-            fprintf(stdout, fmt, ## __VA_ARGS__); \
-        } \
-    } while (0)
-
-/* Current version */
-const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
-/* Minimum HAX kernel version */
-const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
-
-bool hax_allowed;
-
-struct hax_state hax_global;
-
-static void hax_vcpu_sync_state(CPUArchState *env, int modified);
-static int hax_arch_get_registers(CPUArchState *env);
-
-int valid_hax_tunnel_size(uint16_t size)
-{
-    return size >= sizeof(struct hax_tunnel);
-}
-
-hax_fd hax_vcpu_get_fd(CPUArchState *env)
-{
-    AccelCPUState *vcpu = env_cpu(env)->accel;
-    if (!vcpu) {
-        return HAX_INVALID_FD;
-    }
-    return vcpu->fd;
-}
-
-static int hax_get_capability(struct hax_state *hax)
-{
-    int ret;
-    struct hax_capabilityinfo capinfo, *cap = &capinfo;
-
-    ret = hax_capability(hax, cap);
-    if (ret) {
-        return ret;
-    }
-
-    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
-        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
-            DPRINTF
-                ("VTX feature is not enabled, HAX driver will not work.\n");
-        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
-            DPRINTF
-                ("NX feature is not enabled, HAX driver will not work.\n");
-        }
-        return -ENXIO;
-
-    }
-
-    if (!(cap->winfo & HAX_CAP_UG)) {
-        fprintf(stderr, "UG mode is not supported by the hardware.\n");
-        return -ENOTSUP;
-    }
-
-    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
-
-    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
-        if (cap->mem_quota < hax->mem_quota) {
-            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
-            return -ENOSPC;
-        }
-    }
-    return 0;
-}
-
-static int hax_version_support(struct hax_state *hax)
-{
-    int ret;
-    struct hax_module_version version;
-
-    ret = hax_mod_version(hax, &version);
-    if (ret < 0) {
-        return 0;
-    }
-
-    if (hax_min_version > version.cur_version) {
-        fprintf(stderr, "Incompatible HAX module version %d,",
-                version.cur_version);
-        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
-        return 0;
-    }
-    if (hax_cur_version < version.compat_version) {
-        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
-                hax_cur_version);
-        fprintf(stderr, "requires minimum HAX API version %x\n",
-                version.compat_version);
-        return 0;
-    }
-
-    return 1;
-}
-
-int hax_vcpu_create(int id)
-{
-    AccelCPUState *vcpu = NULL;
-    int ret;
-
-    if (!hax_global.vm) {
-        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
-        return -1;
-    }
-
-    if (hax_global.vm->vcpus[id]) {
-        fprintf(stderr, "vcpu %x allocated already\n", id);
-        return 0;
-    }
-
-    vcpu = g_new0(AccelCPUState, 1);
-
-    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
-    if (ret) {
-        fprintf(stderr, "Failed to create vcpu %x\n", id);
-        goto error;
-    }
-
-    vcpu->vcpu_id = id;
-    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
-    if (hax_invalid_fd(vcpu->fd)) {
-        fprintf(stderr, "Failed to open the vcpu\n");
-        ret = -ENODEV;
-        goto error;
-    }
-
-    hax_global.vm->vcpus[id] = vcpu;
-
-    ret = hax_host_setup_vcpu_channel(vcpu);
-    if (ret) {
-        fprintf(stderr, "Invalid hax tunnel size\n");
-        ret = -EINVAL;
-        goto error;
-    }
-    return 0;
-
-  error:
-    /* vcpu and tunnel will be closed automatically */
-    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
-        hax_close_fd(vcpu->fd);
-    }
-
-    hax_global.vm->vcpus[id] = NULL;
-    g_free(vcpu);
-    return -1;
-}
-
-int hax_vcpu_destroy(CPUState *cpu)
-{
-    AccelCPUState *vcpu = cpu->accel;
-
-    if (!hax_global.vm) {
-        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
-        return -1;
-    }
-
-    if (!vcpu) {
-        return 0;
-    }
-
-    /*
-     * 1. The hax_tunnel is also destroyed when vcpu is destroyed
-     * 2. close fd will cause hax module vcpu be cleaned
-     */
-    hax_close_fd(vcpu->fd);
-    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
-#ifdef _WIN32
-    CloseHandle(vcpu->hThread);
-#endif
-    g_free(vcpu);
-    cpu->accel = NULL;
-    return 0;
-}
-
-int hax_init_vcpu(CPUState *cpu)
-{
-    int ret;
-
-    ret = hax_vcpu_create(cpu->cpu_index);
-    if (ret < 0) {
-        fprintf(stderr, "Failed to create HAX vcpu\n");
-        exit(-1);
-    }
-
-    cpu->accel = hax_global.vm->vcpus[cpu->cpu_index];
-    cpu->vcpu_dirty = true;
-    qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr);
-
-    return ret;
-}
-
-struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
-{
-    struct hax_vm *vm;
-    int vm_id = 0, ret, i;
-
-    if (hax_invalid_fd(hax->fd)) {
-        return NULL;
-    }
-
-    if (hax->vm) {
-        return hax->vm;
-    }
-
-    if (max_cpus > HAX_MAX_VCPU) {
-        fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
-        return NULL;
-    }
-
-    vm = g_new0(struct hax_vm, 1);
-
-    ret = hax_host_create_vm(hax, &vm_id);
-    if (ret) {
-        fprintf(stderr, "Failed to create vm %x\n", ret);
-        goto error;
-    }
-    vm->id = vm_id;
-    vm->fd = hax_host_open_vm(hax, vm_id);
-    if (hax_invalid_fd(vm->fd)) {
-        fprintf(stderr, "Failed to open vm %d\n", vm_id);
-        goto error;
-    }
-
-    vm->numvcpus = max_cpus;
-    vm->vcpus = g_new0(AccelCPUState *, vm->numvcpus);
-    for (i = 0; i < vm->numvcpus; i++) {
-        vm->vcpus[i] = NULL;
-    }
-
-    hax->vm = vm;
-    return vm;
-
-  error:
-    g_free(vm);
-    hax->vm = NULL;
-    return NULL;
-}
-
-int hax_vm_destroy(struct hax_vm *vm)
-{
-    int i;
-
-    for (i = 0; i < vm->numvcpus; i++)
-        if (vm->vcpus[i]) {
-            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
-            return -1;
-        }
-    hax_close_fd(vm->fd);
-    vm->numvcpus = 0;
-    g_free(vm->vcpus);
-    g_free(vm);
-    hax_global.vm = NULL;
-    return 0;
-}
-
-static int hax_init(ram_addr_t ram_size, int max_cpus)
-{
-    struct hax_state *hax = NULL;
-    struct hax_qemu_version qversion;
-    int ret;
-
-    hax = &hax_global;
-
-    memset(hax, 0, sizeof(struct hax_state));
-    hax->mem_quota = ram_size;
-
-    hax->fd = hax_mod_open();
-    if (hax_invalid_fd(hax->fd)) {
-        hax->fd = 0;
-        ret = -ENODEV;
-        goto error;
-    }
-
-    ret = hax_get_capability(hax);
-
-    if (ret) {
-        if (ret != -ENOSPC) {
-            ret = -EINVAL;
-        }
-        goto error;
-    }
-
-    if (!hax_version_support(hax)) {
-        ret = -EINVAL;
-        goto error;
-    }
-
-    hax->vm = hax_vm_create(hax, max_cpus);
-    if (!hax->vm) {
-        fprintf(stderr, "Failed to create HAX VM\n");
-        ret = -EINVAL;
-        goto error;
-    }
-
-    hax_memory_init();
-
-    qversion.cur_version = hax_cur_version;
-    qversion.min_version = hax_min_version;
-    hax_notify_qemu_version(hax->vm->fd, &qversion);
-
-    return ret;
-  error:
-    if (hax->vm) {
-        hax_vm_destroy(hax->vm);
-    }
-    if (hax->fd) {
-        hax_mod_close(hax);
-    }
-
-    return ret;
-}
-
-static int hax_accel_init(MachineState *ms)
-{
-    int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
-
-    if (ret && (ret != -ENOSPC)) {
-        fprintf(stderr, "No accelerator found.\n");
-    } else {
-        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
-                !ret ? "working" : "not working",
-                !ret ? "fast virt" : "emulation");
-        fprintf(stdout,
-                "NOTE: HAX is deprecated and will be removed in a future release.\n"
-                "      Use 'whpx' (on Windows) or 'hvf' (on macOS) instead.\n");
-    }
-    return ret;
-}
-
-static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
-{
-    if (hft->direction < 2) {
-        cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
-                               hft->direction);
-    } else {
-        /*
-         * HAX API v4 supports transferring data between two MMIO addresses,
-         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
-         *  hft->direction == 2: gpa ==> gpa2
-         */
-        uint64_t value;
-        cpu_physical_memory_read(hft->gpa, &value, hft->size);
-        cpu_physical_memory_write(hft->gpa2, &value, hft->size);
-    }
-
-    return 0;
-}
-
-static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
-                         int direction, int size, int count, void *buffer)
-{
-    uint8_t *ptr;
-    int i;
-    MemTxAttrs attrs = { 0 };
-
-    if (!df) {
-        ptr = buffer;
-    } else {
-        ptr = buffer + size * count - size;
-    }
-    for (i = 0; i < count; i++) {
-        address_space_rw(&address_space_io, port, attrs,
-                         ptr, size, direction == HAX_EXIT_IO_OUT);
-        if (!df) {
-            ptr += size;
-        } else {
-            ptr -= size;
-        }
-    }
-
-    return 0;
-}
-
-static int hax_vcpu_interrupt(CPUArchState *env)
-{
-    CPUState *cpu = env_cpu(env);
-    AccelCPUState *vcpu = cpu->accel;
-    struct hax_tunnel *ht = vcpu->tunnel;
-
-    /*
-     * Try to inject an interrupt if the guest can accept it
-     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
-     */
-    if (ht->ready_for_interrupt_injection &&
-        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
-        int irq;
-
-        irq = cpu_get_pic_interrupt(env);
-        if (irq >= 0) {
-            hax_inject_interrupt(env, irq);
-            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
-        }
-    }
-
-    /* If we have an interrupt but the guest is not ready to receive an
-     * interrupt, request an interrupt window exit.  This will
-     * cause a return to userspace as soon as the guest is ready to
-     * receive interrupts. */
-    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
-        ht->request_interrupt_window = 1;
-    } else {
-        ht->request_interrupt_window = 0;
-    }
-    return 0;
-}
-
-void hax_raise_event(CPUState *cpu)
-{
-    AccelCPUState *vcpu = cpu->accel;
-
-    if (!vcpu) {
-        return;
-    }
-    vcpu->tunnel->user_event_pending = 1;
-}
-
-/*
- * Ask hax kernel module to run the CPU for us till:
- * 1. Guest crash or shutdown
- * 2. Need QEMU's emulation like guest execute MMIO instruction
- * 3. Guest execute HLT
- * 4. QEMU have Signal/event pending
- * 5. An unknown VMX exit happens
- */
-static int hax_vcpu_hax_exec(CPUArchState *env)
-{
-    int ret = 0;
-    CPUState *cpu = env_cpu(env);
-    X86CPU *x86_cpu = X86_CPU(cpu);
-    AccelCPUState *vcpu = cpu->accel;
-    struct hax_tunnel *ht = vcpu->tunnel;
-
-    if (!hax_enabled()) {
-        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
-        return 0;
-    }
-
-    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
-        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
-        apic_poll_irq(x86_cpu->apic_state);
-    }
-
-    /* After a vcpu is halted (either because it is an AP and has just been
-     * reset, or because it has executed the HLT instruction), it will not be
-     * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
-     * for events that may change the halted state of this vcpu:
-     *  a) Maskable interrupt, when RFLAGS.IF is 1;
-     *     Note: env->eflags may not reflect the current RFLAGS state, because
-     *           it is not updated after each hax_vcpu_run(). We cannot afford
-     *           to fail to recognize any unhalt-by-maskable-interrupt event
-     *           (in which case the vcpu will halt forever), and yet we cannot
-     *           afford the overhead of hax_vcpu_sync_state(). The current
-     *           solution is to err on the side of caution and have the HLT
-     *           handler (see case HAX_EXIT_HLT below) unconditionally set the
-     *           IF_MASK bit in env->eflags, which, in effect, disables the
-     *           RFLAGS.IF check.
-     *  b) NMI;
-     *  c) INIT signal;
-     *  d) SIPI signal.
-     */
-    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
-         (env->eflags & IF_MASK)) ||
-        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
-        cpu->halted = 0;
-    }
-
-    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
-        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
-                cpu->cpu_index);
-        do_cpu_init(x86_cpu);
-        hax_vcpu_sync_state(env, 1);
-    }
-
-    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
-        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
-                cpu->cpu_index);
-        hax_vcpu_sync_state(env, 0);
-        do_cpu_sipi(x86_cpu);
-        hax_vcpu_sync_state(env, 1);
-    }
-
-    if (cpu->halted) {
-        /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
-         * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
-         * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
-         * until the vcpu is unhalted.
-         */
-        cpu->exception_index = EXCP_HLT;
-        return 0;
-    }
-
-    do {
-        int hax_ret;
-
-        if (cpu->exit_request) {
-            ret = 1;
-            break;
-        }
-
-        hax_vcpu_interrupt(env);
-
-        qemu_mutex_unlock_iothread();
-        cpu_exec_start(cpu);
-        hax_ret = hax_vcpu_run(vcpu);
-        cpu_exec_end(cpu);
-        qemu_mutex_lock_iothread();
-
-        /* Simply continue the vcpu_run if system call interrupted */
-        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
-            DPRINTF("io window interrupted\n");
-            continue;
-        }
-
-        if (hax_ret < 0) {
-            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
-            abort();
-        }
-        switch (ht->_exit_status) {
-        case HAX_EXIT_IO:
-            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
-                            ht->pio._direction,
-                            ht->pio._size, ht->pio._count, vcpu->iobuf);
-            break;
-        case HAX_EXIT_FAST_MMIO:
-            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
-            break;
-        /* Guest state changed, currently only for shutdown */
-        case HAX_EXIT_STATECHANGE:
-            fprintf(stdout, "VCPU shutdown request\n");
-            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
-            hax_vcpu_sync_state(env, 0);
-            ret = 1;
-            break;
-        case HAX_EXIT_UNKNOWN_VMEXIT:
-            fprintf(stderr, "Unknown VMX exit %x from guest\n",
-                    ht->_exit_reason);
-            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
-            hax_vcpu_sync_state(env, 0);
-            cpu_dump_state(cpu, stderr, 0);
-            ret = -1;
-            break;
-        case HAX_EXIT_HLT:
-            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
-                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
-                /* hlt instruction with interrupt disabled is shutdown */
-                env->eflags |= IF_MASK;
-                cpu->halted = 1;
-                cpu->exception_index = EXCP_HLT;
-                ret = 1;
-            }
-            break;
-        /* these situations will continue to hax module */
-        case HAX_EXIT_INTERRUPT:
-        case HAX_EXIT_PAUSED:
-            break;
-        case HAX_EXIT_MMIO:
-            /* Should not happen on UG system */
-            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
-            ret = -1;
-            break;
-        case HAX_EXIT_REAL:
-            /* Should not happen on UG system */
-            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
-            ret = -1;
-            break;
-        default:
-            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
-            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
-            hax_vcpu_sync_state(env, 0);
-            cpu_dump_state(cpu, stderr, 0);
-            ret = 1;
-            break;
-        }
-    } while (!ret);
-
-    if (cpu->exit_request) {
-        cpu->exit_request = 0;
-        cpu->exception_index = EXCP_INTERRUPT;
-    }
-    return ret < 0;
-}
-
-static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
-{
-    CPUArchState *env = cpu->env_ptr;
-
-    hax_arch_get_registers(env);
-    cpu->vcpu_dirty = true;
-}
-
-void hax_cpu_synchronize_state(CPUState *cpu)
-{
-    if (!cpu->vcpu_dirty) {
-        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
-    }
-}
-
-static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
-                                              run_on_cpu_data arg)
-{
-    CPUArchState *env = cpu->env_ptr;
-
-    hax_vcpu_sync_state(env, 1);
-    cpu->vcpu_dirty = false;
-}
-
-void hax_cpu_synchronize_post_reset(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
-}
-
-static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
-{
-    CPUArchState *env = cpu->env_ptr;
-
-    hax_vcpu_sync_state(env, 1);
-    cpu->vcpu_dirty = false;
-}
-
-void hax_cpu_synchronize_post_init(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
-}
-
-static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
-{
-    cpu->vcpu_dirty = true;
-}
-
-void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
-}
-
-int hax_smp_cpu_exec(CPUState *cpu)
-{
-    CPUArchState *env = cpu->env_ptr;
-    int fatal;
-    int ret;
-
-    while (1) {
-        if (cpu->exception_index >= EXCP_INTERRUPT) {
-            ret = cpu->exception_index;
-            cpu->exception_index = -1;
-            break;
-        }
-
-        fatal = hax_vcpu_hax_exec(env);
-
-        if (fatal) {
-            fprintf(stderr, "Unsupported HAX vcpu return\n");
-            abort();
-        }
-    }
-
-    return ret;
-}
-
-static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
-{
-    memset(lhs, 0, sizeof(struct segment_desc_t));
-    lhs->selector = rhs->selector;
-    lhs->base = rhs->base;
-    lhs->limit = rhs->limit;
-    lhs->type = 3;
-    lhs->present = 1;
-    lhs->dpl = 3;
-    lhs->operand_size = 0;
-    lhs->desc = 1;
-    lhs->long_mode = 0;
-    lhs->granularity = 0;
-    lhs->available = 0;
-}
-
-static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
-{
-    lhs->selector = rhs->selector;
-    lhs->base = rhs->base;
-    lhs->limit = rhs->limit;
-    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
-        | (rhs->present * DESC_P_MASK)
-        | (rhs->dpl << DESC_DPL_SHIFT)
-        | (rhs->operand_size << DESC_B_SHIFT)
-        | (rhs->desc * DESC_S_MASK)
-        | (rhs->long_mode << DESC_L_SHIFT)
-        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
-}
-
-static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
-{
-    unsigned flags = rhs->flags;
-
-    memset(lhs, 0, sizeof(struct segment_desc_t));
-    lhs->selector = rhs->selector;
-    lhs->base = rhs->base;
-    lhs->limit = rhs->limit;
-    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
-    lhs->present = (flags & DESC_P_MASK) != 0;
-    lhs->dpl = rhs->selector & 3;
-    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
-    lhs->desc = (flags & DESC_S_MASK) != 0;
-    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
-    lhs->granularity = (flags & DESC_G_MASK) != 0;
-    lhs->available = (flags & DESC_AVL_MASK) != 0;
-}
-
-static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
-{
-    target_ulong reg = *hax_reg;
-
-    if (set) {
-        *hax_reg = *qemu_reg;
-    } else {
-        *qemu_reg = reg;
-    }
-}
-
-/* The sregs has been synced with HAX kernel already before this call */
-static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
-{
-    get_seg(&env->segs[R_CS], &sregs->_cs);
-    get_seg(&env->segs[R_DS], &sregs->_ds);
-    get_seg(&env->segs[R_ES], &sregs->_es);
-    get_seg(&env->segs[R_FS], &sregs->_fs);
-    get_seg(&env->segs[R_GS], &sregs->_gs);
-    get_seg(&env->segs[R_SS], &sregs->_ss);
-
-    get_seg(&env->tr, &sregs->_tr);
-    get_seg(&env->ldt, &sregs->_ldt);
-    env->idt.limit = sregs->_idt.limit;
-    env->idt.base = sregs->_idt.base;
-    env->gdt.limit = sregs->_gdt.limit;
-    env->gdt.base = sregs->_gdt.base;
-    return 0;
-}
-
-static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
-{
-    if ((env->eflags & VM_MASK)) {
-        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
-        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
-        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
-        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
-        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
-        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
-    } else {
-        set_seg(&sregs->_cs, &env->segs[R_CS]);
-        set_seg(&sregs->_ds, &env->segs[R_DS]);
-        set_seg(&sregs->_es, &env->segs[R_ES]);
-        set_seg(&sregs->_fs, &env->segs[R_FS]);
-        set_seg(&sregs->_gs, &env->segs[R_GS]);
-        set_seg(&sregs->_ss, &env->segs[R_SS]);
-
-        if (env->cr[0] & CR0_PE_MASK) {
-            /* force ss cpl to cs cpl */
-            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
-                                  (sregs->_cs.selector & 3);
-            sregs->_ss.dpl = sregs->_ss.selector & 3;
-        }
-    }
-
-    set_seg(&sregs->_tr, &env->tr);
-    set_seg(&sregs->_ldt, &env->ldt);
-    sregs->_idt.limit = env->idt.limit;
-    sregs->_idt.base = env->idt.base;
-    sregs->_gdt.limit = env->gdt.limit;
-    sregs->_gdt.base = env->gdt.base;
-    return 0;
-}
-
-static int hax_sync_vcpu_register(CPUArchState *env, int set)
-{
-    struct vcpu_state_t regs;
-    int ret;
-    memset(&regs, 0, sizeof(struct vcpu_state_t));
-
-    if (!set) {
-        ret = hax_sync_vcpu_state(env, &regs, 0);
-        if (ret < 0) {
-            return -1;
-        }
-    }
-
-    /* generic register */
-    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
-    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
-    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
-    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
-    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
-    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
-    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
-    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
-#ifdef TARGET_X86_64
-    hax_getput_reg(&regs._r8, &env->regs[8], set);
-    hax_getput_reg(&regs._r9, &env->regs[9], set);
-    hax_getput_reg(&regs._r10, &env->regs[10], set);
-    hax_getput_reg(&regs._r11, &env->regs[11], set);
-    hax_getput_reg(&regs._r12, &env->regs[12], set);
-    hax_getput_reg(&regs._r13, &env->regs[13], set);
-    hax_getput_reg(&regs._r14, &env->regs[14], set);
-    hax_getput_reg(&regs._r15, &env->regs[15], set);
-#endif
-    hax_getput_reg(&regs._rflags, &env->eflags, set);
-    hax_getput_reg(&regs._rip, &env->eip, set);
-
-    if (set) {
-        regs._cr0 = env->cr[0];
-        regs._cr2 = env->cr[2];
-        regs._cr3 = env->cr[3];
-        regs._cr4 = env->cr[4];
-        hax_set_segments(env, &regs);
-    } else {
-        env->cr[0] = regs._cr0;
-        env->cr[2] = regs._cr2;
-        env->cr[3] = regs._cr3;
-        env->cr[4] = regs._cr4;
-        hax_get_segments(env, &regs);
-    }
-
-    if (set) {
-        ret = hax_sync_vcpu_state(env, &regs, 1);
-        if (ret < 0) {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
-                              uint64_t value)
-{
-    item->entry = index;
-    item->value = value;
-}
-
-static int hax_get_msrs(CPUArchState *env)
-{
-    struct hax_msr_data md;
-    struct vmx_msr *msrs = md.entries;
-    int ret, i, n;
-
-    n = 0;
-    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
-    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
-    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
-    msrs[n++].entry = MSR_IA32_TSC;
-#ifdef TARGET_X86_64
-    msrs[n++].entry = MSR_EFER;
-    msrs[n++].entry = MSR_STAR;
-    msrs[n++].entry = MSR_LSTAR;
-    msrs[n++].entry = MSR_CSTAR;
-    msrs[n++].entry = MSR_FMASK;
-    msrs[n++].entry = MSR_KERNELGSBASE;
-#endif
-    md.nr_msr = n;
-    ret = hax_sync_msr(env, &md, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (i = 0; i < md.done; i++) {
-        switch (msrs[i].entry) {
-        case MSR_IA32_SYSENTER_CS:
-            env->sysenter_cs = msrs[i].value;
-            break;
-        case MSR_IA32_SYSENTER_ESP:
-            env->sysenter_esp = msrs[i].value;
-            break;
-        case MSR_IA32_SYSENTER_EIP:
-            env->sysenter_eip = msrs[i].value;
-            break;
-        case MSR_IA32_TSC:
-            env->tsc = msrs[i].value;
-            break;
-#ifdef TARGET_X86_64
-        case MSR_EFER:
-            env->efer = msrs[i].value;
-            break;
-        case MSR_STAR:
-            env->star = msrs[i].value;
-            break;
-        case MSR_LSTAR:
-            env->lstar = msrs[i].value;
-            break;
-        case MSR_CSTAR:
-            env->cstar = msrs[i].value;
-            break;
-        case MSR_FMASK:
-            env->fmask = msrs[i].value;
-            break;
-        case MSR_KERNELGSBASE:
-            env->kernelgsbase = msrs[i].value;
-            break;
-#endif
-        }
-    }
-
-    return 0;
-}
-
-static int hax_set_msrs(CPUArchState *env)
-{
-    struct hax_msr_data md;
-    struct vmx_msr *msrs;
-    msrs = md.entries;
-    int n = 0;
-
-    memset(&md, 0, sizeof(struct hax_msr_data));
-    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
-    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
-    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
-    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
-#ifdef TARGET_X86_64
-    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
-    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
-    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
-    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
-    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
-    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
-#endif
-    md.nr_msr = n;
-    md.done = 0;
-
-    return hax_sync_msr(env, &md, 1);
-}
-
-static int hax_get_fpu(CPUArchState *env)
-{
-    struct fx_layout fpu;
-    int i, ret;
-
-    ret = hax_sync_fpu(env, &fpu, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    env->fpstt = (fpu.fsw >> 11) & 7;
-    env->fpus = fpu.fsw;
-    env->fpuc = fpu.fcw;
-    for (i = 0; i < 8; ++i) {
-        env->fptags[i] = !((fpu.ftw >> i) & 1);
-    }
-    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
-
-    for (i = 0; i < 8; i++) {
-        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
-        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
-        if (CPU_NB_REGS > 8) {
-            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
-            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
-        }
-    }
-    env->mxcsr = fpu.mxcsr;
-
-    return 0;
-}
-
-static int hax_set_fpu(CPUArchState *env)
-{
-    struct fx_layout fpu;
-    int i;
-
-    memset(&fpu, 0, sizeof(fpu));
-    fpu.fsw = env->fpus & ~(7 << 11);
-    fpu.fsw |= (env->fpstt & 7) << 11;
-    fpu.fcw = env->fpuc;
-
-    for (i = 0; i < 8; ++i) {
-        fpu.ftw |= (!env->fptags[i]) << i;
-    }
-
-    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
-    for (i = 0; i < 8; i++) {
-        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
-        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
-        if (CPU_NB_REGS > 8) {
-            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
-            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
-        }
-    }
-
-    fpu.mxcsr = env->mxcsr;
-
-    return hax_sync_fpu(env, &fpu, 1);
-}
-
-static int hax_arch_get_registers(CPUArchState *env)
-{
-    int ret;
-
-    ret = hax_sync_vcpu_register(env, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = hax_get_fpu(env);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = hax_get_msrs(env);
-    if (ret < 0) {
-        return ret;
-    }
-
-    x86_update_hflags(env);
-    return 0;
-}
-
-static int hax_arch_set_registers(CPUArchState *env)
-{
-    int ret;
-    ret = hax_sync_vcpu_register(env, 1);
-
-    if (ret < 0) {
-        fprintf(stderr, "Failed to sync vcpu reg\n");
-        return ret;
-    }
-    ret = hax_set_fpu(env);
-    if (ret < 0) {
-        fprintf(stderr, "FPU failed\n");
-        return ret;
-    }
-    ret = hax_set_msrs(env);
-    if (ret < 0) {
-        fprintf(stderr, "MSR failed\n");
-        return ret;
-    }
-
-    return 0;
-}
-
-static void hax_vcpu_sync_state(CPUArchState *env, int modified)
-{
-    if (hax_enabled()) {
-        if (modified) {
-            hax_arch_set_registers(env);
-        } else {
-            hax_arch_get_registers(env);
-        }
-    }
-}
-
-/*
- * much simpler than kvm, at least in first stage because:
- * We don't need consider the device pass-through, we don't need
- * consider the framebuffer, and we may even remove the bios at all
- */
-int hax_sync_vcpus(void)
-{
-    if (hax_enabled()) {
-        CPUState *cpu;
-
-        cpu = first_cpu;
-        if (!cpu) {
-            return 0;
-        }
-
-        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
-            int ret;
-
-            ret = hax_arch_set_registers(cpu->env_ptr);
-            if (ret < 0) {
-                return ret;
-            }
-        }
-    }
-
-    return 0;
-}
-
-void hax_reset_vcpu_state(void *opaque)
-{
-    CPUState *cpu;
-    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
-        cpu->accel->tunnel->user_event_pending = 0;
-        cpu->accel->tunnel->ready_for_interrupt_injection = 0;
-    }
-}
-
-static void hax_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "HAX";
-    ac->init_machine = hax_accel_init;
-    ac->allowed = &hax_allowed;
-}
-
-static const TypeInfo hax_accel_type = {
-    .name = ACCEL_CLASS_NAME("hax"),
-    .parent = TYPE_ACCEL,
-    .class_init = hax_accel_class_init,
-};
-
-static void hax_type_init(void)
-{
-    type_register_static(&hax_accel_type);
-}
-
-type_init(hax_type_init);
diff --git a/target/i386/hax/hax-i386.h b/target/i386/hax/hax-i386.h
deleted file mode 100644
index 87153f40ab..0000000000
--- a/target/i386/hax/hax-i386.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HAX_I386_H
-#define HAX_I386_H
-
-#include "cpu.h"
-#include "sysemu/hax.h"
-
-#ifdef CONFIG_POSIX
-typedef int hax_fd;
-#endif
-
-#ifdef CONFIG_WIN32
-typedef HANDLE hax_fd;
-#endif
-
-extern struct hax_state hax_global;
-
-struct AccelCPUState {
-#ifdef _WIN32
-    HANDLE hThread;
-#endif
-    hax_fd fd;
-    int vcpu_id;
-    struct hax_tunnel *tunnel;
-    unsigned char *iobuf;
-};
-
-struct hax_state {
-    hax_fd fd; /* the global hax device interface */
-    uint32_t version;
-    struct hax_vm *vm;
-    uint64_t mem_quota;
-    bool supports_64bit_ramblock;
-};
-
-#define HAX_MAX_VCPU 0x10
-
-struct hax_vm {
-    hax_fd fd;
-    int id;
-    int numvcpus;
-    AccelCPUState **vcpus;
-};
-
-/* Functions exported to host specific mode */
-hax_fd hax_vcpu_get_fd(CPUArchState *env);
-int valid_hax_tunnel_size(uint16_t size);
-
-/* Host specific functions */
-int hax_mod_version(struct hax_state *hax, struct hax_module_version *version);
-int hax_inject_interrupt(CPUArchState *env, int vector);
-struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus);
-int hax_vcpu_run(AccelCPUState *vcpu);
-int hax_vcpu_create(int id);
-void hax_kick_vcpu_thread(CPUState *cpu);
-
-int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state,
-                        int set);
-int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set);
-int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set);
-
-int hax_vm_destroy(struct hax_vm *vm);
-int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap);
-int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion);
-int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags);
-
-/* Common host function */
-int hax_host_create_vm(struct hax_state *hax, int *vm_id);
-hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id);
-int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid);
-hax_fd hax_host_open_vcpu(int vmid, int vcpuid);
-int hax_host_setup_vcpu_channel(AccelCPUState *vcpu);
-hax_fd hax_mod_open(void);
-void hax_memory_init(void);
-
-
-#ifdef CONFIG_POSIX
-#include "hax-posix.h"
-#endif
-
-#ifdef CONFIG_WIN32
-#include "hax-windows.h"
-#endif
-
-#include "hax-interface.h"
-
-#endif
diff --git a/target/i386/hax/hax-interface.h b/target/i386/hax/hax-interface.h
deleted file mode 100644
index 537ae084e9..0000000000
--- a/target/i386/hax/hax-interface.h
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-/* Interface with HAX kernel module */
-
-#ifndef HAX_INTERFACE_H
-#define HAX_INTERFACE_H
-
-/* fx_layout has 3 formats table 3-56, 512bytes */
-struct fx_layout {
-    uint16_t fcw;
-    uint16_t fsw;
-    uint8_t ftw;
-    uint8_t res1;
-    uint16_t fop;
-    union {
-        struct {
-            uint32_t fip;
-            uint16_t fcs;
-            uint16_t res2;
-        };
-        uint64_t fpu_ip;
-    };
-    union {
-        struct {
-            uint32_t fdp;
-            uint16_t fds;
-            uint16_t res3;
-        };
-        uint64_t fpu_dp;
-    };
-    uint32_t mxcsr;
-    uint32_t mxcsr_mask;
-    uint8_t st_mm[8][16];
-    uint8_t mmx_1[8][16];
-    uint8_t mmx_2[8][16];
-    uint8_t pad[96];
-} __attribute__ ((aligned(8)));
-
-struct vmx_msr {
-    uint64_t entry;
-    uint64_t value;
-} __attribute__ ((__packed__));
-
-/*
- * Fixed array is not good, but it makes Mac support a bit easier by avoiding
- * memory map or copyin staff.
- */
-#define HAX_MAX_MSR_ARRAY 0x20
-struct hax_msr_data {
-    uint16_t nr_msr;
-    uint16_t done;
-    uint16_t pad[2];
-    struct vmx_msr entries[HAX_MAX_MSR_ARRAY];
-} __attribute__ ((__packed__));
-
-union interruptibility_state_t {
-    uint32_t raw;
-    struct {
-        uint32_t sti_blocking:1;
-        uint32_t movss_blocking:1;
-        uint32_t smi_blocking:1;
-        uint32_t nmi_blocking:1;
-        uint32_t reserved:28;
-    };
-    uint64_t pad;
-};
-
-typedef union interruptibility_state_t interruptibility_state_t;
-
-/* Segment descriptor */
-struct segment_desc_t {
-    uint16_t selector;
-    uint16_t _dummy;
-    uint32_t limit;
-    uint64_t base;
-    union {
-        struct {
-            uint32_t type:4;
-            uint32_t desc:1;
-            uint32_t dpl:2;
-            uint32_t present:1;
-            uint32_t:4;
-            uint32_t available:1;
-            uint32_t long_mode:1;
-            uint32_t operand_size:1;
-            uint32_t granularity:1;
-            uint32_t null:1;
-            uint32_t:15;
-        };
-        uint32_t ar;
-    };
-    uint32_t ipad;
-};
-
-typedef struct segment_desc_t segment_desc_t;
-
-struct vcpu_state_t {
-    union {
-        uint64_t _regs[16];
-        struct {
-            union {
-                struct {
-                    uint8_t _al, _ah;
-                };
-                uint16_t _ax;
-                uint32_t _eax;
-                uint64_t _rax;
-            };
-            union {
-                struct {
-                    uint8_t _cl, _ch;
-                };
-                uint16_t _cx;
-                uint32_t _ecx;
-                uint64_t _rcx;
-            };
-            union {
-                struct {
-                    uint8_t _dl, _dh;
-                };
-                uint16_t _dx;
-                uint32_t _edx;
-                uint64_t _rdx;
-            };
-            union {
-                struct {
-                    uint8_t _bl, _bh;
-                };
-                uint16_t _bx;
-                uint32_t _ebx;
-                uint64_t _rbx;
-            };
-            union {
-                uint16_t _sp;
-                uint32_t _esp;
-                uint64_t _rsp;
-            };
-            union {
-                uint16_t _bp;
-                uint32_t _ebp;
-                uint64_t _rbp;
-            };
-            union {
-                uint16_t _si;
-                uint32_t _esi;
-                uint64_t _rsi;
-            };
-            union {
-                uint16_t _di;
-                uint32_t _edi;
-                uint64_t _rdi;
-            };
-
-            uint64_t _r8;
-            uint64_t _r9;
-            uint64_t _r10;
-            uint64_t _r11;
-            uint64_t _r12;
-            uint64_t _r13;
-            uint64_t _r14;
-            uint64_t _r15;
-        };
-    };
-
-    union {
-        uint32_t _eip;
-        uint64_t _rip;
-    };
-
-    union {
-        uint32_t _eflags;
-        uint64_t _rflags;
-    };
-
-    segment_desc_t _cs;
-    segment_desc_t _ss;
-    segment_desc_t _ds;
-    segment_desc_t _es;
-    segment_desc_t _fs;
-    segment_desc_t _gs;
-    segment_desc_t _ldt;
-    segment_desc_t _tr;
-
-    segment_desc_t _gdt;
-    segment_desc_t _idt;
-
-    uint64_t _cr0;
-    uint64_t _cr2;
-    uint64_t _cr3;
-    uint64_t _cr4;
-
-    uint64_t _dr0;
-    uint64_t _dr1;
-    uint64_t _dr2;
-    uint64_t _dr3;
-    uint64_t _dr6;
-    uint64_t _dr7;
-    uint64_t _pde;
-
-    uint32_t _efer;
-
-    uint32_t _sysenter_cs;
-    uint64_t _sysenter_eip;
-    uint64_t _sysenter_esp;
-
-    uint32_t _activity_state;
-    uint32_t pad;
-    interruptibility_state_t _interruptibility_state;
-};
-
-/* HAX exit status */
-enum exit_status {
-    /* IO port request */
-    HAX_EXIT_IO = 1,
-    /* MMIO instruction emulation */
-    HAX_EXIT_MMIO,
-    /* QEMU emulation mode request, currently means guest enter non-PG mode */
-    HAX_EXIT_REAL,
-    /*
-     * Interrupt window open, qemu can inject interrupt now
-     * Also used when signal pending since at that time qemu usually need
-     * check interrupt
-     */
-    HAX_EXIT_INTERRUPT,
-    /* Unknown vmexit, mostly trigger reboot */
-    HAX_EXIT_UNKNOWN_VMEXIT,
-    /* HALT from guest */
-    HAX_EXIT_HLT,
-    /* Reboot request, like because of tripple fault in guest */
-    HAX_EXIT_STATECHANGE,
-    /* the vcpu is now only paused when destroy, so simply return to hax */
-    HAX_EXIT_PAUSED,
-    HAX_EXIT_FAST_MMIO,
-};
-
-/*
- * The interface definition:
- * 1. vcpu_run execute will return 0 on success, otherwise mean failed
- * 2. exit_status return the exit reason, as stated in enum exit_status
- * 3. exit_reason is the vmx exit reason
- */
-struct hax_tunnel {
-    uint32_t _exit_reason;
-    uint32_t _exit_flag;
-    uint32_t _exit_status;
-    uint32_t user_event_pending;
-    int ready_for_interrupt_injection;
-    int request_interrupt_window;
-    union {
-        struct {
-            /* 0: read, 1: write */
-#define HAX_EXIT_IO_IN  1
-#define HAX_EXIT_IO_OUT 0
-            uint8_t _direction;
-            uint8_t _df;
-            uint16_t _size;
-            uint16_t _port;
-            uint16_t _count;
-            uint8_t _flags;
-            uint8_t _pad0;
-            uint16_t _pad1;
-            uint32_t _pad2;
-            uint64_t _vaddr;
-        } pio;
-        struct {
-            uint64_t gla;
-        } mmio;
-        struct {
-        } state;
-    };
-} __attribute__ ((__packed__));
-
-struct hax_module_version {
-    uint32_t compat_version;
-    uint32_t cur_version;
-} __attribute__ ((__packed__));
-
-/* This interface is support only after API version 2 */
-struct hax_qemu_version {
-    /* Current API version in QEMU */
-    uint32_t cur_version;
-    /* The minimum API version supported by QEMU */
-    uint32_t min_version;
-} __attribute__ ((__packed__));
-
-/* The mac specfic interface to qemu, mostly is ioctl related */
-struct hax_tunnel_info {
-    uint64_t va;
-    uint64_t io_va;
-    uint16_t size;
-    uint16_t pad[3];
-} __attribute__ ((__packed__));
-
-struct hax_alloc_ram_info {
-    uint32_t size;
-    uint32_t pad;
-    uint64_t va;
-} __attribute__ ((__packed__));
-
-struct hax_ramblock_info {
-    uint64_t start_va;
-    uint64_t size;
-    uint64_t reserved;
-} __attribute__ ((__packed__));
-
-#define HAX_RAM_INFO_ROM     0x01 /* Read-Only */
-#define HAX_RAM_INFO_INVALID 0x80 /* Unmapped, usually used for MMIO */
-struct hax_set_ram_info {
-    uint64_t pa_start;
-    uint32_t size;
-    uint8_t flags;
-    uint8_t pad[3];
-    uint64_t va;
-} __attribute__ ((__packed__));
-
-#define HAX_CAP_STATUS_WORKING     0x1
-#define HAX_CAP_STATUS_NOTWORKING  0x0
-#define HAX_CAP_WORKSTATUS_MASK    0x1
-
-#define HAX_CAP_FAILREASON_VT      0x1
-#define HAX_CAP_FAILREASON_NX      0x2
-
-#define HAX_CAP_MEMQUOTA           0x2
-#define HAX_CAP_UG                 0x4
-#define HAX_CAP_64BIT_RAMBLOCK     0x8
-
-struct hax_capabilityinfo {
-    /* bit 0: 1 - working
-     *        0 - not working, possibly because NT/NX disabled
-     * bit 1: 1 - memory limitation working
-     *        0 - no memory limitation
-     */
-    uint16_t wstatus;
-    /* valid when not working
-     * bit 0: VT not enabeld
-     * bit 1: NX not enabled*/
-    uint16_t winfo;
-    uint32_t pad;
-    uint64_t mem_quota;
-} __attribute__ ((__packed__));
-
-struct hax_fastmmio {
-    uint64_t gpa;
-    union {
-        uint64_t value;
-        uint64_t gpa2;  /* since HAX API v4 */
-    };
-    uint8_t size;
-    uint8_t direction;
-    uint16_t reg_index;
-    uint32_t pad0;
-    uint64_t _cr0;
-    uint64_t _cr2;
-    uint64_t _cr3;
-    uint64_t _cr4;
-} __attribute__ ((__packed__));
-#endif
diff --git a/target/i386/hax/hax-mem.c b/target/i386/hax/hax-mem.c
deleted file mode 100644
index bb5ffbc9ac..0000000000
--- a/target/i386/hax/hax-mem.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * HAX memory mapping operations
- *
- * Copyright (c) 2015-16 Intel Corporation
- * Copyright 2016 Google, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
-#include "qemu/error-report.h"
-
-#include "hax-accel-ops.h"
-#include "qemu/queue.h"
-
-#define DEBUG_HAX_MEM 0
-
-#define DPRINTF(fmt, ...) \
-    do { \
-        if (DEBUG_HAX_MEM) { \
-            fprintf(stdout, fmt, ## __VA_ARGS__); \
-        } \
-    } while (0)
-
-/**
- * HAXMapping: describes a pending guest physical memory mapping
- *
- * @start_pa: a guest physical address marking the start of the region; must be
- *            page-aligned
- * @size: a guest physical address marking the end of the region; must be
- *          page-aligned
- * @host_va: the host virtual address of the start of the mapping
- * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
- * @entry: additional fields for linking #HAXMapping instances together
- */
-typedef struct HAXMapping {
-    uint64_t start_pa;
-    uint32_t size;
-    uint64_t host_va;
-    int flags;
-    QTAILQ_ENTRY(HAXMapping) entry;
-} HAXMapping;
-
-/*
- * A doubly-linked list (actually a tail queue) of the pending page mappings
- * for the ongoing memory transaction.
- *
- * It is used to optimize the number of page mapping updates done through the
- * kernel module. For example, it's effective when a driver is digging an MMIO
- * hole inside an existing memory mapping. It will get a deletion of the whole
- * region, then the addition of the 2 remaining RAM areas around the hole and
- * finally the memory transaction commit. During the commit, it will effectively
- * send to the kernel only the removal of the pages from the MMIO hole after
- * having computed locally the result of the deletion and additions.
- */
-static QTAILQ_HEAD(, HAXMapping) mappings =
-    QTAILQ_HEAD_INITIALIZER(mappings);
-
-/**
- * hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
- */
-static void hax_mapping_dump_list(void)
-{
-    HAXMapping *entry;
-
-    DPRINTF("%s updates:\n", __func__);
-    QTAILQ_FOREACH(entry, &mappings, entry) {
-        DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
-                "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
-                entry->start_pa, entry->start_pa + entry->size, entry->host_va,
-                entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
-    }
-}
-
-static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
-                                      uint32_t size, uint64_t host_va,
-                                      uint8_t flags)
-{
-    HAXMapping *entry;
-
-    entry = g_malloc0(sizeof(*entry));
-    entry->start_pa = start_pa;
-    entry->size = size;
-    entry->host_va = host_va;
-    entry->flags = flags;
-    if (!next) {
-        QTAILQ_INSERT_TAIL(&mappings, entry, entry);
-    } else {
-        QTAILQ_INSERT_BEFORE(next, entry, entry);
-    }
-}
-
-static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
-                                    uint8_t flags)
-{
-    /* removed then added without change for the read-only flag */
-    bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;
-
-    return (entry->host_va == host_va) && nop_flags;
-}
-
-static void hax_update_mapping(uint64_t start_pa, uint32_t size,
-                               uint64_t host_va, uint8_t flags)
-{
-    uint64_t end_pa = start_pa + size;
-    HAXMapping *entry, *next;
-
-    QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
-        uint32_t chunk_sz;
-        if (start_pa >= entry->start_pa + entry->size) {
-            continue;
-        }
-        if (start_pa < entry->start_pa) {
-            chunk_sz = end_pa <= entry->start_pa ? size
-                                                 : entry->start_pa - start_pa;
-            hax_insert_mapping_before(entry, start_pa, chunk_sz,
-                                      host_va, flags);
-            start_pa += chunk_sz;
-            host_va += chunk_sz;
-            size -= chunk_sz;
-        } else if (start_pa > entry->start_pa) {
-            /* split the existing chunk at start_pa */
-            chunk_sz = start_pa - entry->start_pa;
-            hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
-                                      entry->host_va, entry->flags);
-            entry->start_pa += chunk_sz;
-            entry->host_va += chunk_sz;
-            entry->size -= chunk_sz;
-        }
-        /* now start_pa == entry->start_pa */
-        chunk_sz = MIN(size, entry->size);
-        if (chunk_sz) {
-            bool nop = hax_mapping_is_opposite(entry, host_va, flags);
-            bool partial = chunk_sz < entry->size;
-            if (partial) {
-                /* remove the beginning of the existing chunk */
-                entry->start_pa += chunk_sz;
-                entry->host_va += chunk_sz;
-                entry->size -= chunk_sz;
-                if (!nop) {
-                    hax_insert_mapping_before(entry, start_pa, chunk_sz,
-                                              host_va, flags);
-                }
-            } else { /* affects the full mapping entry */
-                if (nop) { /* no change to this mapping, remove it */
-                    QTAILQ_REMOVE(&mappings, entry, entry);
-                    g_free(entry);
-                } else { /* update mapping properties */
-                    entry->host_va = host_va;
-                    entry->flags = flags;
-                }
-            }
-            start_pa += chunk_sz;
-            host_va += chunk_sz;
-            size -= chunk_sz;
-        }
-        if (!size) { /* we are done */
-            break;
-        }
-    }
-    if (size) { /* add the leftover */
-        hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
-    }
-}
-
-static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
-{
-    MemoryRegion *mr = section->mr;
-    hwaddr start_pa = section->offset_within_address_space;
-    ram_addr_t size = int128_get64(section->size);
-    unsigned int delta;
-    uint64_t host_va;
-    uint32_t max_mapping_size;
-
-    /* We only care about RAM and ROM regions */
-    if (!memory_region_is_ram(mr)) {
-        if (memory_region_is_romd(mr)) {
-            /* HAXM kernel module does not support ROMD yet  */
-            warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
-                        start_pa, start_pa + size);
-        }
-        return;
-    }
-
-    /* Adjust start_pa and size so that they are page-aligned. (Cf
-     * kvm_set_phys_mem() in kvm-all.c).
-     */
-    delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
-    delta &= ~qemu_real_host_page_mask();
-    if (delta > size) {
-        return;
-    }
-    start_pa += delta;
-    size -= delta;
-    size &= qemu_real_host_page_mask();
-    if (!size || (start_pa & ~qemu_real_host_page_mask())) {
-        return;
-    }
-
-    host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
-            + section->offset_within_region + delta;
-    if (memory_region_is_rom(section->mr)) {
-        flags |= HAX_RAM_INFO_ROM;
-    }
-
-    /*
-     * The kernel module interface uses 32-bit sizes:
-     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
-     *
-     * If the mapping size is longer than 32 bits, we can't process it in one
-     * call into the kernel. Instead, we split the mapping into smaller ones,
-     * and call hax_update_mapping() on each.
-     */
-    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask();
-    while (size > max_mapping_size) {
-        hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
-        start_pa += max_mapping_size;
-        size -= max_mapping_size;
-        host_va += max_mapping_size;
-    }
-    /* Now size <= max_mapping_size */
-    hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
-}
-
-static void hax_region_add(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-    memory_region_ref(section->mr);
-    hax_process_section(section, 0);
-}
-
-static void hax_region_del(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-    hax_process_section(section, HAX_RAM_INFO_INVALID);
-    memory_region_unref(section->mr);
-}
-
-static void hax_transaction_begin(MemoryListener *listener)
-{
-    g_assert(QTAILQ_EMPTY(&mappings));
-}
-
-static void hax_transaction_commit(MemoryListener *listener)
-{
-    if (!QTAILQ_EMPTY(&mappings)) {
-        HAXMapping *entry, *next;
-
-        if (DEBUG_HAX_MEM) {
-            hax_mapping_dump_list();
-        }
-        QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
-            if (entry->flags & HAX_RAM_INFO_INVALID) {
-                /* for unmapping, put the values expected by the kernel */
-                entry->flags = HAX_RAM_INFO_INVALID;
-                entry->host_va = 0;
-            }
-            if (hax_set_ram(entry->start_pa, entry->size,
-                            entry->host_va, entry->flags)) {
-                fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
-                        PRIx32 " flags %02x\n", __func__, entry->start_pa,
-                        entry->size, entry->flags);
-            }
-            QTAILQ_REMOVE(&mappings, entry, entry);
-            g_free(entry);
-        }
-    }
-}
-
-/* currently we fake the dirty bitmap sync, always dirty */
-static void hax_log_sync(MemoryListener *listener,
-                         MemoryRegionSection *section)
-{
-    MemoryRegion *mr = section->mr;
-
-    if (!memory_region_is_ram(mr)) {
-        /* Skip MMIO regions */
-        return;
-    }
-
-    memory_region_set_dirty(mr, 0, int128_get64(section->size));
-}
-
-static MemoryListener hax_memory_listener = {
-    .name = "hax",
-    .begin = hax_transaction_begin,
-    .commit = hax_transaction_commit,
-    .region_add = hax_region_add,
-    .region_del = hax_region_del,
-    .log_sync = hax_log_sync,
-    .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
-};
-
-static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
-                                size_t max_size)
-{
-    /*
-     * We must register each RAM block with the HAXM kernel module, or
-     * hax_set_ram() will fail for any mapping into the RAM block:
-     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
-     *
-     * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
-     * host physical pages for the RAM block as part of this registration
-     * process, hence the name hax_populate_ram().
-     */
-    if (hax_populate_ram((uint64_t)(uintptr_t)host, max_size) < 0) {
-        fprintf(stderr, "HAX failed to populate RAM\n");
-        abort();
-    }
-}
-
-static struct RAMBlockNotifier hax_ram_notifier = {
-    .ram_block_added = hax_ram_block_added,
-};
-
-void hax_memory_init(void)
-{
-    ram_block_notifier_add(&hax_ram_notifier);
-    memory_listener_register(&hax_memory_listener, &address_space_memory);
-}
diff --git a/target/i386/hax/hax-posix.c b/target/i386/hax/hax-posix.c
deleted file mode 100644
index a057a5bd94..0000000000
--- a/target/i386/hax/hax-posix.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-/* HAX module interface - darwin version */
-#include "qemu/osdep.h"
-#include <sys/ioctl.h>
-
-#include "sysemu/cpus.h"
-#include "hax-accel-ops.h"
-
-hax_fd hax_mod_open(void)
-{
-    int fd = open("/dev/HAX", O_RDWR);
-    if (fd == -1) {
-        fprintf(stderr, "Failed to open the hax module\n");
-    }
-
-    qemu_set_cloexec(fd);
-
-    return fd;
-}
-
-int hax_populate_ram(uint64_t va, uint64_t size)
-{
-    int ret;
-
-    if (!hax_global.vm || !hax_global.vm->fd) {
-        fprintf(stderr, "Allocate memory before vm create?\n");
-        return -EINVAL;
-    }
-
-    if (hax_global.supports_64bit_ramblock) {
-        struct hax_ramblock_info ramblock = {
-            .start_va = va,
-            .size = size,
-            .reserved = 0
-        };
-
-        ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ADD_RAMBLOCK, &ramblock);
-    } else {
-        struct hax_alloc_ram_info info = {
-            .size = (uint32_t)size,
-            .pad = 0,
-            .va = va
-        };
-
-        ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
-    }
-    if (ret < 0) {
-        fprintf(stderr, "Failed to register RAM block: ret=%d, va=0x%" PRIx64
-                ", size=0x%" PRIx64 ", method=%s\n", ret, va, size,
-                hax_global.supports_64bit_ramblock ? "new" : "legacy");
-        return ret;
-    }
-    return 0;
-}
-
-int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags)
-{
-    struct hax_set_ram_info info;
-    int ret;
-
-    info.pa_start = start_pa;
-    info.size = size;
-    info.va = host_va;
-    info.flags = (uint8_t) flags;
-
-    ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_SET_RAM, &info);
-    if (ret < 0) {
-        return -errno;
-    }
-    return 0;
-}
-
-int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap)
-{
-    int ret;
-
-    ret = ioctl(hax->fd, HAX_IOCTL_CAPABILITY, cap);
-    if (ret == -1) {
-        fprintf(stderr, "Failed to get HAX capability\n");
-        return -errno;
-    }
-
-    return 0;
-}
-
-int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)
-{
-    int ret;
-
-    ret = ioctl(hax->fd, HAX_IOCTL_VERSION, version);
-    if (ret == -1) {
-        fprintf(stderr, "Failed to get HAX version\n");
-        return -errno;
-    }
-
-    return 0;
-}
-
-static char *hax_vm_devfs_string(int vm_id)
-{
-    return g_strdup_printf("/dev/hax_vm/vm%02d", vm_id);
-}
-
-static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
-{
-    return g_strdup_printf("/dev/hax_vm%02d/vcpu%02d", vm_id, vcpu_id);
-}
-
-int hax_host_create_vm(struct hax_state *hax, int *vmid)
-{
-    int ret;
-    int vm_id = 0;
-
-    if (hax_invalid_fd(hax->fd)) {
-        return -EINVAL;
-    }
-
-    if (hax->vm) {
-        return 0;
-    }
-
-    ret = ioctl(hax->fd, HAX_IOCTL_CREATE_VM, &vm_id);
-    *vmid = vm_id;
-    return ret;
-}
-
-hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id)
-{
-    hax_fd fd;
-    char *vm_name = NULL;
-
-    vm_name = hax_vm_devfs_string(vm_id);
-    if (!vm_name) {
-        return -1;
-    }
-
-    fd = open(vm_name, O_RDWR);
-    g_free(vm_name);
-
-    qemu_set_cloexec(fd);
-
-    return fd;
-}
-
-int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion)
-{
-    int ret;
-
-    if (hax_invalid_fd(vm_fd)) {
-        return -EINVAL;
-    }
-
-    ret = ioctl(vm_fd, HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, qversion);
-
-    if (ret < 0) {
-        fprintf(stderr, "Failed to notify qemu API version\n");
-        return ret;
-    }
-    return 0;
-}
-
-/* Simply assume the size should be bigger than the hax_tunnel,
- * since the hax_tunnel can be extended later with compatibility considered
- */
-int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid)
-{
-    int ret;
-
-    ret = ioctl(vm_fd, HAX_VM_IOCTL_VCPU_CREATE, &vcpuid);
-    if (ret < 0) {
-        fprintf(stderr, "Failed to create vcpu %x\n", vcpuid);
-    }
-
-    return ret;
-}
-
-hax_fd hax_host_open_vcpu(int vmid, int vcpuid)
-{
-    char *devfs_path = NULL;
-    hax_fd fd;
-
-    devfs_path = hax_vcpu_devfs_string(vmid, vcpuid);
-    if (!devfs_path) {
-        fprintf(stderr, "Failed to get the devfs\n");
-        return -EINVAL;
-    }
-
-    fd = open(devfs_path, O_RDWR);
-    g_free(devfs_path);
-    if (fd < 0) {
-        fprintf(stderr, "Failed to open the vcpu devfs\n");
-    }
-    qemu_set_cloexec(fd);
-    return fd;
-}
-
-int hax_host_setup_vcpu_channel(AccelCPUState *vcpu)
-{
-    int ret;
-    struct hax_tunnel_info info;
-
-    ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_SETUP_TUNNEL, &info);
-    if (ret) {
-        fprintf(stderr, "Failed to setup the hax tunnel\n");
-        return ret;
-    }
-
-    if (!valid_hax_tunnel_size(info.size)) {
-        fprintf(stderr, "Invalid hax tunnel size %x\n", info.size);
-        ret = -EINVAL;
-        return ret;
-    }
-
-    vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va);
-    vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va);
-    return 0;
-}
-
-int hax_vcpu_run(AccelCPUState *vcpu)
-{
-    return ioctl(vcpu->fd, HAX_VCPU_IOCTL_RUN, NULL);
-}
-
-int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set)
-{
-    int ret, fd;
-
-    fd = hax_vcpu_get_fd(env);
-    if (fd <= 0) {
-        return -1;
-    }
-
-    if (set) {
-        ret = ioctl(fd, HAX_VCPU_IOCTL_SET_FPU, fl);
-    } else {
-        ret = ioctl(fd, HAX_VCPU_IOCTL_GET_FPU, fl);
-    }
-    return ret;
-}
-
-int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set)
-{
-    int ret, fd;
-
-    fd = hax_vcpu_get_fd(env);
-    if (fd <= 0) {
-        return -1;
-    }
-    if (set) {
-        ret = ioctl(fd, HAX_VCPU_IOCTL_SET_MSRS, msrs);
-    } else {
-        ret = ioctl(fd, HAX_VCPU_IOCTL_GET_MSRS, msrs);
-    }
-    return ret;
-}
-
-int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, int set)
-{
-    int ret, fd;
-
-    fd = hax_vcpu_get_fd(env);
-    if (fd <= 0) {
-        return -1;
-    }
-
-    if (set) {
-        ret = ioctl(fd, HAX_VCPU_SET_REGS, state);
-    } else {
-        ret = ioctl(fd, HAX_VCPU_GET_REGS, state);
-    }
-    return ret;
-}
-
-int hax_inject_interrupt(CPUArchState *env, int vector)
-{
-    int fd;
-
-    fd = hax_vcpu_get_fd(env);
-    if (fd <= 0) {
-        return -1;
-    }
-
-    return ioctl(fd, HAX_VCPU_IOCTL_INTERRUPT, &vector);
-}
-
-void hax_kick_vcpu_thread(CPUState *cpu)
-{
-    /*
-     * FIXME: race condition with the exit_request check in
-     * hax_vcpu_hax_exec
-     */
-    cpu->exit_request = 1;
-    cpus_kick_thread(cpu);
-}
diff --git a/target/i386/hax/hax-posix.h b/target/i386/hax/hax-posix.h
deleted file mode 100644
index fb7c64426d..0000000000
--- a/target/i386/hax/hax-posix.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef TARGET_I386_HAX_POSIX_H
-#define TARGET_I386_HAX_POSIX_H
-
-#include <sys/ioctl.h>
-
-#define HAX_INVALID_FD  (-1)
-static inline int hax_invalid_fd(hax_fd fd)
-{
-    return fd <= 0;
-}
-
-static inline void hax_mod_close(struct hax_state *hax)
-{
-    close(hax->fd);
-}
-
-static inline void hax_close_fd(hax_fd fd)
-{
-    close(fd);
-}
-
-/* HAX model level ioctl */
-#define HAX_IOCTL_VERSION _IOWR(0, 0x20, struct hax_module_version)
-#define HAX_IOCTL_CREATE_VM _IOWR(0, 0x21, uint32_t)
-#define HAX_IOCTL_DESTROY_VM _IOW(0, 0x22, uint32_t)
-#define HAX_IOCTL_CAPABILITY _IOR(0, 0x23, struct hax_capabilityinfo)
-
-#define HAX_VM_IOCTL_VCPU_CREATE _IOWR(0, 0x80, uint32_t)
-#define HAX_VM_IOCTL_ALLOC_RAM _IOWR(0, 0x81, struct hax_alloc_ram_info)
-#define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info)
-#define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t)
-#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version)
-#define HAX_VM_IOCTL_ADD_RAMBLOCK _IOW(0, 0x85, struct hax_ramblock_info)
-
-#define HAX_VCPU_IOCTL_RUN  _IO(0, 0xc0)
-#define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data)
-#define HAX_VCPU_IOCTL_GET_MSRS _IOWR(0, 0xc2, struct hax_msr_data)
-
-#define HAX_VCPU_IOCTL_SET_FPU  _IOW(0, 0xc3, struct fx_layout)
-#define HAX_VCPU_IOCTL_GET_FPU  _IOR(0, 0xc4, struct fx_layout)
-
-#define HAX_VCPU_IOCTL_SETUP_TUNNEL _IOWR(0, 0xc5, struct hax_tunnel_info)
-#define HAX_VCPU_IOCTL_INTERRUPT _IOWR(0, 0xc6, uint32_t)
-#define HAX_VCPU_SET_REGS       _IOWR(0, 0xc7, struct vcpu_state_t)
-#define HAX_VCPU_GET_REGS       _IOWR(0, 0xc8, struct vcpu_state_t)
-
-#endif /* TARGET_I386_HAX_POSIX_H */
diff --git a/target/i386/hax/hax-windows.c b/target/i386/hax/hax-windows.c
deleted file mode 100644
index 4bf6cc08d2..0000000000
--- a/target/i386/hax/hax-windows.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "hax-accel-ops.h"
-
-/*
- * return 0 when success, -1 when driver not loaded,
- * other negative value for other failure
- */
-static int hax_open_device(hax_fd *fd)
-{
-    uint32_t errNum = 0;
-    HANDLE hDevice;
-
-    if (!fd) {
-        return -2;
-    }
-
-    hDevice = CreateFile("\\\\.\\HAX",
-                         GENERIC_READ | GENERIC_WRITE,
-                         0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-
-    if (hDevice == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Failed to open the HAX device!\n");
-        errNum = GetLastError();
-        if (errNum == ERROR_FILE_NOT_FOUND) {
-            return -1;
-        }
-        return -2;
-    }
-    *fd = hDevice;
-    return 0;
-}
-
-/* hax_fd hax_mod_open */
- hax_fd hax_mod_open(void)
-{
-    int ret;
-    hax_fd fd = NULL;
-
-    ret = hax_open_device(&fd);
-    if (ret != 0) {
-        fprintf(stderr, "Open HAX device failed\n");
-    }
-
-    return fd;
-}
-
-int hax_populate_ram(uint64_t va, uint64_t size)
-{
-    int ret;
-    HANDLE hDeviceVM;
-    DWORD dSize = 0;
-
-    if (!hax_global.vm || !hax_global.vm->fd) {
-        fprintf(stderr, "Allocate memory before vm create?\n");
-        return -EINVAL;
-    }
-
-    hDeviceVM = hax_global.vm->fd;
-    if (hax_global.supports_64bit_ramblock) {
-        struct hax_ramblock_info ramblock = {
-            .start_va = va,
-            .size = size,
-            .reserved = 0
-        };
-
-        ret = DeviceIoControl(hDeviceVM,
-                              HAX_VM_IOCTL_ADD_RAMBLOCK,
-                              &ramblock, sizeof(ramblock), NULL, 0, &dSize,
-                              (LPOVERLAPPED) NULL);
-    } else {
-        struct hax_alloc_ram_info info = {
-            .size = (uint32_t) size,
-            .pad = 0,
-            .va = va
-        };
-
-        ret = DeviceIoControl(hDeviceVM,
-                              HAX_VM_IOCTL_ALLOC_RAM,
-                              &info, sizeof(info), NULL, 0, &dSize,
-                              (LPOVERLAPPED) NULL);
-    }
-
-    if (!ret) {
-        fprintf(stderr, "Failed to register RAM block: va=0x%" PRIx64
-                ", size=0x%" PRIx64 ", method=%s\n", va, size,
-                hax_global.supports_64bit_ramblock ? "new" : "legacy");
-        return ret;
-    }
-
-    return 0;
-}
-
-int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags)
-{
-    struct hax_set_ram_info info;
-    HANDLE hDeviceVM = hax_global.vm->fd;
-    DWORD dSize = 0;
-    int ret;
-
-    info.pa_start = start_pa;
-    info.size = size;
-    info.va = host_va;
-    info.flags = (uint8_t) flags;
-
-    ret = DeviceIoControl(hDeviceVM, HAX_VM_IOCTL_SET_RAM,
-                          &info, sizeof(info), NULL, 0, &dSize,
-                          (LPOVERLAPPED) NULL);
-
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap)
-{
-    int ret;
-    HANDLE hDevice = hax->fd;        /* handle to hax module */
-    DWORD dSize = 0;
-    DWORD err = 0;
-
-    if (hax_invalid_fd(hDevice)) {
-        fprintf(stderr, "Invalid fd for hax device!\n");
-        return -ENODEV;
-    }
-
-    ret = DeviceIoControl(hDevice, HAX_IOCTL_CAPABILITY, NULL, 0, cap,
-                          sizeof(*cap), &dSize, (LPOVERLAPPED) NULL);
-
-    if (!ret) {
-        err = GetLastError();
-        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) {
-            fprintf(stderr, "hax capability is too long to hold.\n");
-        }
-        fprintf(stderr, "Failed to get Hax capability:%luu\n", err);
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)
-{
-    int ret;
-    HANDLE hDevice = hax->fd; /* handle to hax module */
-    DWORD dSize = 0;
-    DWORD err = 0;
-
-    if (hax_invalid_fd(hDevice)) {
-        fprintf(stderr, "Invalid fd for hax device!\n");
-        return -ENODEV;
-    }
-
-    ret = DeviceIoControl(hDevice,
-                          HAX_IOCTL_VERSION,
-                          NULL, 0,
-                          version, sizeof(*version), &dSize,
-                          (LPOVERLAPPED) NULL);
-
-    if (!ret) {
-        err = GetLastError();
-        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) {
-            fprintf(stderr, "hax module verion is too long to hold.\n");
-        }
-        fprintf(stderr, "Failed to get Hax module version:%lu\n", err);
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-static char *hax_vm_devfs_string(int vm_id)
-{
-    return g_strdup_printf("\\\\.\\hax_vm%02d", vm_id);
-}
-
-static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
-{
-    return g_strdup_printf("\\\\.\\hax_vm%02d_vcpu%02d", vm_id, vcpu_id);
-}
-
-int hax_host_create_vm(struct hax_state *hax, int *vmid)
-{
-    int ret;
-    int vm_id = 0;
-    DWORD dSize = 0;
-
-    if (hax_invalid_fd(hax->fd)) {
-        return -EINVAL;
-    }
-
-    if (hax->vm) {
-        return 0;
-    }
-
-    ret = DeviceIoControl(hax->fd,
-                          HAX_IOCTL_CREATE_VM,
-                          NULL, 0, &vm_id, sizeof(vm_id), &dSize,
-                          (LPOVERLAPPED) NULL);
-    if (!ret) {
-        fprintf(stderr, "Failed to create VM. Error code: %lu\n",
-                GetLastError());
-        return -1;
-    }
-    *vmid = vm_id;
-    return 0;
-}
-
-hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id)
-{
-    char *vm_name = NULL;
-    hax_fd hDeviceVM;
-
-    vm_name = hax_vm_devfs_string(vm_id);
-    if (!vm_name) {
-        fprintf(stderr, "Failed to open VM. VM name is null\n");
-        return INVALID_HANDLE_VALUE;
-    }
-
-    hDeviceVM = CreateFile(vm_name,
-                           GENERIC_READ | GENERIC_WRITE,
-                           0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-    if (hDeviceVM == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Open the vm device error:%s, ec:%lu\n",
-                vm_name, GetLastError());
-    }
-
-    g_free(vm_name);
-    return hDeviceVM;
-}
-
-int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion)
-{
-    int ret;
-    DWORD dSize = 0;
-    if (hax_invalid_fd(vm_fd)) {
-        return -EINVAL;
-    }
-    ret = DeviceIoControl(vm_fd,
-                          HAX_VM_IOCTL_NOTIFY_QEMU_VERSION,
-                          qversion, sizeof(struct hax_qemu_version),
-                          NULL, 0, &dSize, (LPOVERLAPPED) NULL);
-    if (!ret) {
-        fprintf(stderr, "Failed to notify qemu API version\n");
-        return -1;
-    }
-    return 0;
-}
-
-int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid)
-{
-    int ret;
-    DWORD dSize = 0;
-
-    ret = DeviceIoControl(vm_fd,
-                          HAX_VM_IOCTL_VCPU_CREATE,
-                          &vcpuid, sizeof(vcpuid), NULL, 0, &dSize,
-                          (LPOVERLAPPED) NULL);
-    if (!ret) {
-        fprintf(stderr, "Failed to create vcpu %x\n", vcpuid);
-        return -1;
-    }
-
-    return 0;
-}
-
-hax_fd hax_host_open_vcpu(int vmid, int vcpuid)
-{
-    char *devfs_path = NULL;
-    hax_fd hDeviceVCPU;
-
-    devfs_path = hax_vcpu_devfs_string(vmid, vcpuid);
-    if (!devfs_path) {
-        fprintf(stderr, "Failed to get the devfs\n");
-        return INVALID_HANDLE_VALUE;
-    }
-
-    hDeviceVCPU = CreateFile(devfs_path,
-                             GENERIC_READ | GENERIC_WRITE,
-                             0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
-                             NULL);
-
-    if (hDeviceVCPU == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Failed to open the vcpu devfs\n");
-    }
-    g_free(devfs_path);
-    return hDeviceVCPU;
-}
-
-int hax_host_setup_vcpu_channel(AccelCPUState *vcpu)
-{
-    hax_fd hDeviceVCPU = vcpu->fd;
-    int ret;
-    struct hax_tunnel_info info;
-    DWORD dSize = 0;
-
-    ret = DeviceIoControl(hDeviceVCPU,
-                          HAX_VCPU_IOCTL_SETUP_TUNNEL,
-                          NULL, 0, &info, sizeof(info), &dSize,
-                          (LPOVERLAPPED) NULL);
-    if (!ret) {
-        fprintf(stderr, "Failed to setup the hax tunnel\n");
-        return -1;
-    }
-
-    if (!valid_hax_tunnel_size(info.size)) {
-        fprintf(stderr, "Invalid hax tunnel size %x\n", info.size);
-        ret = -EINVAL;
-        return ret;
-    }
-    vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va);
-    vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va);
-    return 0;
-}
-
-int hax_vcpu_run(AccelCPUState *vcpu)
-{
-    int ret;
-    HANDLE hDeviceVCPU = vcpu->fd;
-    DWORD dSize = 0;
-
-    ret = DeviceIoControl(hDeviceVCPU,
-                          HAX_VCPU_IOCTL_RUN,
-                          NULL, 0, NULL, 0, &dSize, (LPOVERLAPPED) NULL);
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set)
-{
-    int ret;
-    hax_fd fd;
-    HANDLE hDeviceVCPU;
-    DWORD dSize = 0;
-
-    fd = hax_vcpu_get_fd(env);
-    if (hax_invalid_fd(fd)) {
-        return -1;
-    }
-
-    hDeviceVCPU = fd;
-
-    if (set) {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_IOCTL_SET_FPU,
-                              fl, sizeof(*fl), NULL, 0, &dSize,
-                              (LPOVERLAPPED) NULL);
-    } else {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_IOCTL_GET_FPU,
-                              NULL, 0, fl, sizeof(*fl), &dSize,
-                              (LPOVERLAPPED) NULL);
-    }
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set)
-{
-    int ret;
-    hax_fd fd;
-    HANDLE hDeviceVCPU;
-    DWORD dSize = 0;
-
-    fd = hax_vcpu_get_fd(env);
-    if (hax_invalid_fd(fd)) {
-        return -1;
-    }
-    hDeviceVCPU = fd;
-
-    if (set) {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_IOCTL_SET_MSRS,
-                              msrs, sizeof(*msrs),
-                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
-    } else {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_IOCTL_GET_MSRS,
-                              msrs, sizeof(*msrs),
-                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
-    }
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, int set)
-{
-    int ret;
-    hax_fd fd;
-    HANDLE hDeviceVCPU;
-    DWORD dSize;
-
-    fd = hax_vcpu_get_fd(env);
-    if (hax_invalid_fd(fd)) {
-        return -1;
-    }
-
-    hDeviceVCPU = fd;
-
-    if (set) {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_SET_REGS,
-                              state, sizeof(*state),
-                              NULL, 0, &dSize, (LPOVERLAPPED) NULL);
-    } else {
-        ret = DeviceIoControl(hDeviceVCPU,
-                              HAX_VCPU_GET_REGS,
-                              NULL, 0,
-                              state, sizeof(*state), &dSize,
-                              (LPOVERLAPPED) NULL);
-    }
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-int hax_inject_interrupt(CPUArchState *env, int vector)
-{
-    int ret;
-    hax_fd fd;
-    HANDLE hDeviceVCPU;
-    DWORD dSize;
-
-    fd = hax_vcpu_get_fd(env);
-    if (hax_invalid_fd(fd)) {
-        return -1;
-    }
-
-    hDeviceVCPU = fd;
-
-    ret = DeviceIoControl(hDeviceVCPU,
-                          HAX_VCPU_IOCTL_INTERRUPT,
-                          &vector, sizeof(vector), NULL, 0, &dSize,
-                          (LPOVERLAPPED) NULL);
-    if (!ret) {
-        return -EFAULT;
-    } else {
-        return 0;
-    }
-}
-
-static void CALLBACK dummy_apc_func(ULONG_PTR unused)
-{
-}
-
-void hax_kick_vcpu_thread(CPUState *cpu)
-{
-    /*
-     * FIXME: race condition with the exit_request check in
-     * hax_vcpu_hax_exec
-     */
-    cpu->exit_request = 1;
-    if (!qemu_cpu_is_self(cpu)) {
-        if (!QueueUserAPC(dummy_apc_func, cpu->accel->hThread, 0)) {
-            fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
-                    __func__, GetLastError());
-            exit(1);
-        }
-    }
-}
diff --git a/target/i386/hax/hax-windows.h b/target/i386/hax/hax-windows.h
deleted file mode 100644
index b1f5d4f32f..0000000000
--- a/target/i386/hax/hax-windows.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef TARGET_I386_HAX_WINDOWS_H
-#define TARGET_I386_HAX_WINDOWS_H
-
-#include <winioctl.h>
-#include <windef.h>
-
-#include "hax-accel-ops.h"
-
-#define HAX_INVALID_FD INVALID_HANDLE_VALUE
-
-static inline void hax_mod_close(struct hax_state *hax)
-{
-    CloseHandle(hax->fd);
-}
-
-static inline void hax_close_fd(hax_fd fd)
-{
-    CloseHandle(fd);
-}
-
-static inline int hax_invalid_fd(hax_fd fd)
-{
-    return (fd == INVALID_HANDLE_VALUE);
-}
-
-#define HAX_DEVICE_TYPE 0x4000
-
-#define HAX_IOCTL_VERSION       CTL_CODE(HAX_DEVICE_TYPE, 0x900, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_IOCTL_CREATE_VM     CTL_CODE(HAX_DEVICE_TYPE, 0x901, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_IOCTL_CAPABILITY    CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-
-#define HAX_VM_IOCTL_VCPU_CREATE   CTL_CODE(HAX_DEVICE_TYPE, 0x902, \
-                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VM_IOCTL_ALLOC_RAM     CTL_CODE(HAX_DEVICE_TYPE, 0x903, \
-                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VM_IOCTL_SET_RAM       CTL_CODE(HAX_DEVICE_TYPE, 0x904, \
-                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VM_IOCTL_VCPU_DESTROY  CTL_CODE(HAX_DEVICE_TYPE, 0x905, \
-                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VM_IOCTL_ADD_RAMBLOCK  CTL_CODE(HAX_DEVICE_TYPE, 0x913, \
-                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
-
-#define HAX_VCPU_IOCTL_RUN      CTL_CODE(HAX_DEVICE_TYPE, 0x906, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_IOCTL_SET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x907, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_IOCTL_GET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x908, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_IOCTL_SET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x909, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_IOCTL_GET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x90a, \
-                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
-
-#define HAX_VCPU_IOCTL_SETUP_TUNNEL  CTL_CODE(HAX_DEVICE_TYPE, 0x90b, \
-                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_IOCTL_INTERRUPT     CTL_CODE(HAX_DEVICE_TYPE, 0x90c, \
-                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_SET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90d, \
-                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define HAX_VCPU_GET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90e, \
-                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
-
-#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
-                                                  METHOD_BUFFERED,        \
-                                                  FILE_ANY_ACCESS)
-#endif /* TARGET_I386_HAX_WINDOWS_H */
diff --git a/target/i386/hax/meson.build b/target/i386/hax/meson.build
deleted file mode 100644
index 6ac314aa35..0000000000
--- a/target/i386/hax/meson.build
+++ /dev/null
@@ -1,7 +0,0 @@
-i386_system_ss.add(when: 'CONFIG_HAX', if_true: files(
-  'hax-all.c',
-  'hax-mem.c',
-  'hax-accel-ops.c',
-))
-i386_system_ss.add(when: ['CONFIG_HAX', 'CONFIG_POSIX'], if_true: files('hax-posix.c'))
-i386_system_ss.add(when: ['CONFIG_HAX', 'CONFIG_WIN32'], if_true: files('hax-windows.c'))
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 89aa696c6d..2070dd0dda 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -22,10 +22,10 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "sysemu/runstate.h"
-#include "kvm/kvm_i386.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/hw_accel.h"
 #include "monitor/monitor.h"
+#include "kvm/kvm_i386.h"
 #endif
 #include "qemu/log.h"
 #ifdef CONFIG_TCG
diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c
deleted file mode 100644
index e052f1c7b0..0000000000
--- a/target/i386/kvm/kvm-stub.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * QEMU KVM x86 specific function stubs
- *
- * Copyright Linaro Limited 2012
- *
- * Author: Peter Maydell <peter.maydell@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "kvm_i386.h"
-
-#ifndef __OPTIMIZE__
-bool kvm_has_smm(void)
-{
-    return 1;
-}
-
-bool kvm_enable_x2apic(void)
-{
-    return false;
-}
-
-/* This function is only called inside conditionals which we
- * rely on the compiler to optimize out when CONFIG_KVM is not
- * defined.
- */
-uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
-                                      uint32_t index, int reg)
-{
-    abort();
-}
-#endif
-
-bool kvm_hv_vpindex_settable(void)
-{
-    return false;
-}
-
-bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
-{
-    abort();
-}
-
-void kvm_set_max_apic_id(uint32_t max_apic_id)
-{
-    return;
-}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 639a242ad8..af101fcdf6 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -154,9 +154,9 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
 static RateLimit bus_lock_ratelimit_ctrl;
 static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
 
-int kvm_has_pit_state2(void)
+bool kvm_has_pit_state2(void)
 {
-    return has_pit_state2;
+    return !!has_pit_state2;
 }
 
 bool kvm_has_smm(void)
@@ -4729,7 +4729,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
     /*
      * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
      * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
-     * preceed kvm_put_nested_state() when 'real' nested state is set.
+     * precede kvm_put_nested_state() when 'real' nested state is set.
      */
     if (level >= KVM_PUT_RESET_STATE) {
         ret = kvm_put_msr_feature_control(x86_cpu);
@@ -5653,7 +5653,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
             }
 
             /*
-             * Handled untranslated compatibilty format interrupt with
+             * Handled untranslated compatibility format interrupt with
              * extended destination ID in the low bits 11-5. */
             dst.address = kvm_swizzle_msi_ext_dest_id(dst.address);
 
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index e24753abfe..55d4e68c34 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,8 +13,6 @@
 
 #include "sysemu/kvm.h"
 
-#define kvm_apic_in_kernel() (kvm_irqchip_in_kernel())
-
 #ifdef CONFIG_KVM
 
 #define kvm_pit_in_kernel() \
@@ -33,30 +31,40 @@
 #endif  /* CONFIG_KVM */
 
 bool kvm_has_smm(void);
+bool kvm_enable_x2apic(void);
+bool kvm_hv_vpindex_settable(void);
+bool kvm_has_pit_state2(void);
+
+bool kvm_enable_sgx_provisioning(KVMState *s);
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
+
+void kvm_arch_reset_vcpu(X86CPU *cs);
+void kvm_arch_after_reset_vcpu(X86CPU *cpu);
+void kvm_arch_do_init_vcpu(X86CPU *cs);
+uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
+                                      uint32_t index, int reg);
+uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
+
+void kvm_set_max_apic_id(uint32_t max_apic_id);
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
+
+#ifdef CONFIG_KVM
+
 bool kvm_has_adjust_clock(void);
 bool kvm_has_adjust_clock_stable(void);
 bool kvm_has_exception_payload(void);
 void kvm_synchronize_all_tsc(void);
-void kvm_arch_reset_vcpu(X86CPU *cs);
-void kvm_arch_after_reset_vcpu(X86CPU *cpu);
-void kvm_arch_do_init_vcpu(X86CPU *cs);
 
+void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
 void kvm_put_apicbase(X86CPU *cpu, uint64_t value);
 
-bool kvm_enable_x2apic(void);
 bool kvm_has_x2apic_api(void);
 bool kvm_has_waitpkg(void);
 
-bool kvm_hv_vpindex_settable(void);
-bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
-
 uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
 void kvm_update_msi_routes_all(void *private, bool global,
                                uint32_t index, uint32_t mask);
 
-bool kvm_enable_sgx_provisioning(KVMState *s);
-void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
-
 typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
 typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
 typedef struct kvm_msr_handlers {
@@ -68,6 +76,8 @@ typedef struct kvm_msr_handlers {
 bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
                     QEMUWRMSRHandler *wrmsr);
 
-void kvm_set_max_apic_id(uint32_t max_apic_id);
+#endif /* CONFIG_KVM */
+
+void kvm_pc_setup_irq_routing(bool pci_enabled);
 
 #endif
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 40fbde96ca..5d9174bbb5 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -1,5 +1,3 @@
-i386_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
-
 i386_softmmu_kvm_ss = ss.source_set()
 
 i386_softmmu_kvm_ss.add(files(
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index a8146115f0..76348f9d5d 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1033,7 +1033,7 @@ static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
 #define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
 #define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
-/* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
+/* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
 
 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
diff --git a/target/i386/machine.c b/target/i386/machine.c
index c7ac8084b2..a1041ef828 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -282,12 +282,12 @@ static int cpu_pre_save(void *opaque)
      * hypervisor, its exception payload (CR2/DR6 on #PF/#DB)
      * should not be set yet in the respective vCPU register.
      * Thus, in case an exception is pending, it is
-     * important to save the exception payload seperately.
+     * important to save the exception payload separately.
      *
      * Therefore, if an exception is not in a pending state
      * or vCPU is not in guest-mode, it is not important to
      * distinguish between a pending and injected exception
-     * and we don't need to store seperately the exception payload.
+     * and we don't need to store separately the exception payload.
      *
      * In order to preserve better backwards-compatible migration,
      * convert a pending exception to an injected exception in
diff --git a/target/i386/meson.build b/target/i386/meson.build
index 1effe1ed9a..6f1036d469 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -25,7 +25,6 @@ i386_system_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files(
 i386_user_ss = ss.source_set()
 
 subdir('kvm')
-subdir('hax')
 subdir('whpx')
 subdir('nvmm')
 subdir('hvf')
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index a0e425733f..33908c0691 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -20,6 +20,7 @@
 
 #include "crypto/aes.h"
 #include "crypto/aes-round.h"
+#include "crypto/clmul.h"
 
 #if SHIFT == 0
 #define Reg MMXReg
@@ -2122,41 +2123,18 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
 
 #endif
 
-#if SHIFT == 1
-static void clmulq(uint64_t *dest_l, uint64_t *dest_h,
-                          uint64_t a, uint64_t b)
-{
-    uint64_t al, ah, resh, resl;
-
-    ah = 0;
-    al = a;
-    resh = resl = 0;
-
-    while (b) {
-        if (b & 1) {
-            resl ^= al;
-            resh ^= ah;
-        }
-        ah = (ah << 1) | (al >> 63);
-        al <<= 1;
-        b >>= 1;
-    }
-
-    *dest_l = resl;
-    *dest_h = resh;
-}
-#endif
-
 void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
                                     uint32_t ctrl)
 {
-    uint64_t a, b;
-    int i;
+    int a_idx = (ctrl & 1) != 0;
+    int b_idx = (ctrl & 16) != 0;
 
-    for (i = 0; i < 1 << SHIFT; i += 2) {
-        a = v->Q(((ctrl & 1) != 0) + i);
-        b = s->Q(((ctrl & 16) != 0) + i);
-        clmulq(&d->Q(i), &d->Q(i + 1), a, b);
+    for (int i = 0; i < SHIFT; i++) {
+        uint64_t a = v->Q(2 * i + a_idx);
+        uint64_t b = s->Q(2 * i + b_idx);
+        Int128 *r = (Int128 *)&d->ZMM_X(i);
+
+        *r = clmul_64(a, b);
     }
 }
 
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 7b1528248a..e7499c95b1 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -46,9 +46,9 @@ bool sev_es_enabled(void);
 #define sev_es_enabled() 0
 #endif
 
-extern uint32_t sev_get_cbit_position(void);
-extern uint32_t sev_get_reduced_phys_bits(void);
-extern bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
+uint32_t sev_get_cbit_position(void);
+uint32_t sev_get_reduced_phys_bits(void);
+bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
 
 int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
 int sev_inject_launch_secret(const char *hdr, const char *secret,
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 8f93a239dd..0db19cda3b 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -337,7 +337,7 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
     [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
 
     [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
-    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,ph, vex11 cpuid(F16C) p_66),
+    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 cpuid(F16C) p_66),
     [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
     [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
     /* Listed incorrectly as type 4 */
@@ -565,7 +565,7 @@ static const X86OpEntry opcodes_0F3A[256] = {
     [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
     [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
     [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
-    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,ph, V,x,  I,b,  vex11 cpuid(F16C) p_66),
+    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 cpuid(F16C) p_66),
 
     [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) zext2 p_66),
     [0x21] = X86_OP_GROUP0(VINSERTPS),
@@ -805,10 +805,20 @@ static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entr
     case 0x51: entry->gen = gen_VSQRT; break;
     case 0x52: entry->gen = gen_VRSQRT; break;
     case 0x53: entry->gen = gen_VRCP; break;
-    case 0x5A: entry->gen = gen_VCVTfp2fp; break;
     }
 }
 
+static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_0F5A[4] = {
+        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
+        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
+        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
+        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
+    };
+    *entry = *decode_by_prefix(s, opcodes_0F5A);
+}
+
 static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
 {
     static const X86OpEntry opcodes_0F5B[4] = {
@@ -891,7 +901,7 @@ static const X86OpEntry opcodes_0F[256] = {
 
     [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
     [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
-    [0x5a] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* CVTPS2PD */
+    [0x5a] = X86_OP_GROUP0(0F5A),
     [0x5b] = X86_OP_GROUP0(0F5B),
     [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
     [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
@@ -1104,7 +1114,7 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp
         *ot = s->vex_l ? MO_256 : MO_128;
         return true;
 
-    case X86_SIZE_ph: /* SSE/AVX packed half precision */
+    case X86_SIZE_xh: /* SSE/AVX packed half register */
         *ot = s->vex_l ? MO_128 : MO_64;
         return true;
 
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index cb6b8bcf67..a542ec1681 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -92,7 +92,7 @@ typedef enum X86OpSize {
     /* Custom */
     X86_SIZE_d64,
     X86_SIZE_f64,
-    X86_SIZE_ph, /* SSE/AVX packed half precision */
+    X86_SIZE_xh, /* SSE/AVX packed half register */
 } X86OpSize;
 
 typedef enum X86CPUIDFeature {
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4fe8dec427..45a3e55cbf 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1914,12 +1914,22 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     set_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
-    gen_unary_fp_sse(s, env, decode,
-                     gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm,
-                     gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm,
-                     gen_helper_cvtsd2ss, gen_helper_cvtss2sd);
+    if (s->vex_l) {
+        gen_helper_cvtpd2ps_ymm(cpu_env, OP_PTR0, OP_PTR2);
+    } else {
+        gen_helper_cvtpd2ps_xmm(cpu_env, OP_PTR0, OP_PTR2);
+    }
+}
+
+static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (s->vex_l) {
+        gen_helper_cvtps2pd_ymm(cpu_env, OP_PTR0, OP_PTR2);
+    } else {
+        gen_helper_cvtps2pd_xmm(cpu_env, OP_PTR0, OP_PTR2);
+    }
 }
 
 static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1936,6 +1946,16 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     }
 }
 
+static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_cvtsd2ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
+}
+
+static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_cvtss2sd(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
+}
+
 static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 6f3741b635..4430d3d380 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -21,6 +21,7 @@
 #include <math.h>
 #include "cpu.h"
 #include "tcg-cpu.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 #include "fpu/softfloat-macros.h"
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index b5f0abffa3..226689a4f2 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 #include "tcg/helper-tcg.h"
 
diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c
index 1c3610da3b..93506cdd94 100644
--- a/target/i386/tcg/sysemu/fpu_helper.c
+++ b/target/i386/tcg/sysemu/fpu_helper.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "hw/irq.h"
 
@@ -31,7 +32,9 @@ void x86_register_ferr_irq(qemu_irq irq)
 void fpu_check_raise_ferr_irq(CPUX86State *env)
 {
     if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
+        qemu_mutex_lock_iothread();
         qemu_irq_raise(ferr_irq);
+        qemu_mutex_unlock_iothread();
         return;
     }
 }
@@ -45,6 +48,9 @@ void cpu_clear_ignne(void)
 void cpu_set_ignne(void)
 {
     CPUX86State *env = &X86_CPU(first_cpu)->env;
+
+    assert(qemu_mutex_iothread_locked());
+
     env->hflags2 |= HF2_IGNNE_MASK;
     /*
      * We get here in response to a write to port F0h.  The chipset should
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 6f72782fd9..ec717a4e9e 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1074,7 +1074,7 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 }
 
 /* perform a conditional store into register 'reg' according to jump opcode
-   value 'b'. In the fast case, T0 is guaranted not to be used. */
+   value 'b'. In the fast case, T0 is guaranteed not to be used. */
 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 {
     int inv, jcc_op, cond;
@@ -1207,7 +1207,7 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 }
 
 /* generate a conditional jump to label 'l1' according to jump opcode
-   value 'b'. In the fast case, T0 is guaranted not to be used. */
+   value 'b'. In the fast case, T0 is guaranteed not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
@@ -1224,7 +1224,7 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 }
 
 /* Generate a conditional jump to label 'l1' according to jump opcode
-   value 'b'. In the fast case, T0 is guaranted not to be used.
+   value 'b'. In the fast case, T0 is guaranteed not to be used.
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
@@ -4638,7 +4638,11 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                 case 0x0a: /* grp d9/2 */
                     switch (rm) {
                     case 0: /* fnop */
-                        /* check exceptions (FreeBSD FPU probe) */
+                        /*
+                         * check exceptions (FreeBSD FPU probe)
+                         * needs to be treated as I/O because of ferr_irq
+                         */
+                        translator_io_start(&s->base);
                         gen_helper_fwait(cpu_env);
                         update_fip = false;
                         break;
@@ -5370,7 +5374,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
-                /* Needs no atomic ops; we surpressed the normal
+                /* Needs no atomic ops; we suppressed the normal
                    memory load for LOCK above so do it now.  */
                 gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
@@ -5567,6 +5571,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
             (HF_MP_MASK | HF_TS_MASK)) {
             gen_exception(s, EXCP07_PREX);
         } else {
+            /* needs to be treated as I/O because of ferr_irq */
+            translator_io_start(&s->base);
             gen_helper_fwait(cpu_env);
         }
         break;
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 27fc6e1f33..fc7f70fbe5 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -11,6 +11,7 @@
 #include "qapi/error.h"
 #include "qemu/module.h"
 #include "sysemu/qtest.h"
+#include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 #include "cpu.h"
 #include "internals.h"
@@ -18,6 +19,7 @@
 #include "cpu-csr.h"
 #include "sysemu/reset.h"
 #include "tcg/tcg.h"
+#include "vec.h"
 
 const char * const regnames[32] = {
     "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
@@ -53,6 +55,7 @@ static const char * const excp_names[] = {
     [EXCCODE_DBP] = "Debug breakpoint",
     [EXCCODE_BCE] = "Bound Check Exception",
     [EXCCODE_SXD] = "128 bit vector instructions Disable exception",
+    [EXCCODE_ASXD] = "256 bit vector instructions Disable exception",
 };
 
 const char *loongarch_exception_name(int32_t exception)
@@ -188,6 +191,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
     case EXCCODE_FPD:
     case EXCCODE_FPE:
     case EXCCODE_SXD:
+    case EXCCODE_ASXD:
         env->CSR_BADV = env->pc;
         QEMU_FALLTHROUGH;
     case EXCCODE_BCE:
@@ -389,6 +393,7 @@ static void loongarch_la464_initfn(Object *obj)
     data = FIELD_DP32(data, CPUCFG2, FP_DP, 1);
     data = FIELD_DP32(data, CPUCFG2, FP_VER, 1);
     data = FIELD_DP32(data, CPUCFG2, LSX, 1),
+    data = FIELD_DP32(data, CPUCFG2, LASX, 1),
     data = FIELD_DP32(data, CPUCFG2, LLFTP, 1);
     data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1);
     data = FIELD_DP32(data, CPUCFG2, LSPW, 1);
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 4d7201995a..f125a8e49b 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -251,18 +251,20 @@ FIELD(TLB_MISC, ASID, 1, 10)
 FIELD(TLB_MISC, VPPN, 13, 35)
 FIELD(TLB_MISC, PS, 48, 6)
 
-#define LSX_LEN   (128)
+#define LSX_LEN    (128)
+#define LASX_LEN   (256)
+
 typedef union VReg {
-    int8_t   B[LSX_LEN / 8];
-    int16_t  H[LSX_LEN / 16];
-    int32_t  W[LSX_LEN / 32];
-    int64_t  D[LSX_LEN / 64];
-    uint8_t  UB[LSX_LEN / 8];
-    uint16_t UH[LSX_LEN / 16];
-    uint32_t UW[LSX_LEN / 32];
-    uint64_t UD[LSX_LEN / 64];
-    Int128   Q[LSX_LEN / 128];
-}VReg;
+    int8_t   B[LASX_LEN / 8];
+    int16_t  H[LASX_LEN / 16];
+    int32_t  W[LASX_LEN / 32];
+    int64_t  D[LASX_LEN / 64];
+    uint8_t  UB[LASX_LEN / 8];
+    uint16_t UH[LASX_LEN / 16];
+    uint32_t UW[LASX_LEN / 32];
+    uint64_t UD[LASX_LEN / 64];
+    Int128   Q[LASX_LEN / 128];
+} VReg;
 
 typedef union fpr_t fpr_t;
 union fpr_t {
@@ -460,6 +462,7 @@ static inline void set_pc(CPULoongArchState *env, uint64_t value)
 #define HW_FLAGS_CRMD_PG    R_CSR_CRMD_PG_MASK   /* 0x10 */
 #define HW_FLAGS_EUEN_FPE   0x04
 #define HW_FLAGS_EUEN_SXE   0x08
+#define HW_FLAGS_EUEN_ASXE  0x10
 #define HW_FLAGS_VA32       0x20
 
 static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
@@ -470,6 +473,7 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
     *flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK);
     *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE;
     *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE;
+    *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE;
     *flags |= is_va32(env) * HW_FLAGS_VA32;
 }
 
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 5c402d944d..c8a29eac2b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1695,3 +1695,927 @@ INSN_LSX(vstelm_d,         vr_ii)
 INSN_LSX(vstelm_w,         vr_ii)
 INSN_LSX(vstelm_h,         vr_ii)
 INSN_LSX(vstelm_b,         vr_ii)
+
+#define INSN_LASX(insn, type)                               \
+static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
+{                                                           \
+    output_##type ## _x(ctx, a, #insn);                     \
+    return true;                                            \
+}
+
+static void output_cv_x(DisasContext *ctx, arg_cv *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "fcc%d, x%d", a->cd, a->vj);
+}
+
+static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
+}
+
+static void output_vvvv_x(DisasContext *ctx, arg_vvvv *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d, x%d, x%d", a->vd, a->vj, a->vk, a->va);
+}
+
+static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
+}
+
+static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
+}
+
+static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm);
+}
+
+static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj);
+}
+
+static void output_vr_i_x(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, r%d, 0x%x", a->vd, a->rj, a->imm);
+}
+
+static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm);
+}
+
+static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk);
+}
+
+static void output_vrr_x(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, r%d, r%d", a->vd, a->rj, a->rk);
+}
+
+static void output_vr_ii_x(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2);
+}
+
+INSN_LASX(xvadd_b,           vvv)
+INSN_LASX(xvadd_h,           vvv)
+INSN_LASX(xvadd_w,           vvv)
+INSN_LASX(xvadd_d,           vvv)
+INSN_LASX(xvadd_q,           vvv)
+INSN_LASX(xvsub_b,           vvv)
+INSN_LASX(xvsub_h,           vvv)
+INSN_LASX(xvsub_w,           vvv)
+INSN_LASX(xvsub_d,           vvv)
+INSN_LASX(xvsub_q,           vvv)
+
+INSN_LASX(xvaddi_bu,         vv_i)
+INSN_LASX(xvaddi_hu,         vv_i)
+INSN_LASX(xvaddi_wu,         vv_i)
+INSN_LASX(xvaddi_du,         vv_i)
+INSN_LASX(xvsubi_bu,         vv_i)
+INSN_LASX(xvsubi_hu,         vv_i)
+INSN_LASX(xvsubi_wu,         vv_i)
+INSN_LASX(xvsubi_du,         vv_i)
+
+INSN_LASX(xvneg_b,           vv)
+INSN_LASX(xvneg_h,           vv)
+INSN_LASX(xvneg_w,           vv)
+INSN_LASX(xvneg_d,           vv)
+
+INSN_LASX(xvsadd_b,          vvv)
+INSN_LASX(xvsadd_h,          vvv)
+INSN_LASX(xvsadd_w,          vvv)
+INSN_LASX(xvsadd_d,          vvv)
+INSN_LASX(xvsadd_bu,         vvv)
+INSN_LASX(xvsadd_hu,         vvv)
+INSN_LASX(xvsadd_wu,         vvv)
+INSN_LASX(xvsadd_du,         vvv)
+INSN_LASX(xvssub_b,          vvv)
+INSN_LASX(xvssub_h,          vvv)
+INSN_LASX(xvssub_w,          vvv)
+INSN_LASX(xvssub_d,          vvv)
+INSN_LASX(xvssub_bu,         vvv)
+INSN_LASX(xvssub_hu,         vvv)
+INSN_LASX(xvssub_wu,         vvv)
+INSN_LASX(xvssub_du,         vvv)
+
+INSN_LASX(xvhaddw_h_b,       vvv)
+INSN_LASX(xvhaddw_w_h,       vvv)
+INSN_LASX(xvhaddw_d_w,       vvv)
+INSN_LASX(xvhaddw_q_d,       vvv)
+INSN_LASX(xvhaddw_hu_bu,     vvv)
+INSN_LASX(xvhaddw_wu_hu,     vvv)
+INSN_LASX(xvhaddw_du_wu,     vvv)
+INSN_LASX(xvhaddw_qu_du,     vvv)
+INSN_LASX(xvhsubw_h_b,       vvv)
+INSN_LASX(xvhsubw_w_h,       vvv)
+INSN_LASX(xvhsubw_d_w,       vvv)
+INSN_LASX(xvhsubw_q_d,       vvv)
+INSN_LASX(xvhsubw_hu_bu,     vvv)
+INSN_LASX(xvhsubw_wu_hu,     vvv)
+INSN_LASX(xvhsubw_du_wu,     vvv)
+INSN_LASX(xvhsubw_qu_du,     vvv)
+
+INSN_LASX(xvaddwev_h_b,      vvv)
+INSN_LASX(xvaddwev_w_h,      vvv)
+INSN_LASX(xvaddwev_d_w,      vvv)
+INSN_LASX(xvaddwev_q_d,      vvv)
+INSN_LASX(xvaddwod_h_b,      vvv)
+INSN_LASX(xvaddwod_w_h,      vvv)
+INSN_LASX(xvaddwod_d_w,      vvv)
+INSN_LASX(xvaddwod_q_d,      vvv)
+INSN_LASX(xvsubwev_h_b,      vvv)
+INSN_LASX(xvsubwev_w_h,      vvv)
+INSN_LASX(xvsubwev_d_w,      vvv)
+INSN_LASX(xvsubwev_q_d,      vvv)
+INSN_LASX(xvsubwod_h_b,      vvv)
+INSN_LASX(xvsubwod_w_h,      vvv)
+INSN_LASX(xvsubwod_d_w,      vvv)
+INSN_LASX(xvsubwod_q_d,      vvv)
+
+INSN_LASX(xvaddwev_h_bu,     vvv)
+INSN_LASX(xvaddwev_w_hu,     vvv)
+INSN_LASX(xvaddwev_d_wu,     vvv)
+INSN_LASX(xvaddwev_q_du,     vvv)
+INSN_LASX(xvaddwod_h_bu,     vvv)
+INSN_LASX(xvaddwod_w_hu,     vvv)
+INSN_LASX(xvaddwod_d_wu,     vvv)
+INSN_LASX(xvaddwod_q_du,     vvv)
+INSN_LASX(xvsubwev_h_bu,     vvv)
+INSN_LASX(xvsubwev_w_hu,     vvv)
+INSN_LASX(xvsubwev_d_wu,     vvv)
+INSN_LASX(xvsubwev_q_du,     vvv)
+INSN_LASX(xvsubwod_h_bu,     vvv)
+INSN_LASX(xvsubwod_w_hu,     vvv)
+INSN_LASX(xvsubwod_d_wu,     vvv)
+INSN_LASX(xvsubwod_q_du,     vvv)
+
+INSN_LASX(xvaddwev_h_bu_b,   vvv)
+INSN_LASX(xvaddwev_w_hu_h,   vvv)
+INSN_LASX(xvaddwev_d_wu_w,   vvv)
+INSN_LASX(xvaddwev_q_du_d,   vvv)
+INSN_LASX(xvaddwod_h_bu_b,   vvv)
+INSN_LASX(xvaddwod_w_hu_h,   vvv)
+INSN_LASX(xvaddwod_d_wu_w,   vvv)
+INSN_LASX(xvaddwod_q_du_d,   vvv)
+
+INSN_LASX(xvavg_b,           vvv)
+INSN_LASX(xvavg_h,           vvv)
+INSN_LASX(xvavg_w,           vvv)
+INSN_LASX(xvavg_d,           vvv)
+INSN_LASX(xvavg_bu,          vvv)
+INSN_LASX(xvavg_hu,          vvv)
+INSN_LASX(xvavg_wu,          vvv)
+INSN_LASX(xvavg_du,          vvv)
+INSN_LASX(xvavgr_b,          vvv)
+INSN_LASX(xvavgr_h,          vvv)
+INSN_LASX(xvavgr_w,          vvv)
+INSN_LASX(xvavgr_d,          vvv)
+INSN_LASX(xvavgr_bu,         vvv)
+INSN_LASX(xvavgr_hu,         vvv)
+INSN_LASX(xvavgr_wu,         vvv)
+INSN_LASX(xvavgr_du,         vvv)
+
+INSN_LASX(xvabsd_b,          vvv)
+INSN_LASX(xvabsd_h,          vvv)
+INSN_LASX(xvabsd_w,          vvv)
+INSN_LASX(xvabsd_d,          vvv)
+INSN_LASX(xvabsd_bu,         vvv)
+INSN_LASX(xvabsd_hu,         vvv)
+INSN_LASX(xvabsd_wu,         vvv)
+INSN_LASX(xvabsd_du,         vvv)
+
+INSN_LASX(xvadda_b,          vvv)
+INSN_LASX(xvadda_h,          vvv)
+INSN_LASX(xvadda_w,          vvv)
+INSN_LASX(xvadda_d,          vvv)
+
+INSN_LASX(xvmax_b,           vvv)
+INSN_LASX(xvmax_h,           vvv)
+INSN_LASX(xvmax_w,           vvv)
+INSN_LASX(xvmax_d,           vvv)
+INSN_LASX(xvmin_b,           vvv)
+INSN_LASX(xvmin_h,           vvv)
+INSN_LASX(xvmin_w,           vvv)
+INSN_LASX(xvmin_d,           vvv)
+INSN_LASX(xvmax_bu,          vvv)
+INSN_LASX(xvmax_hu,          vvv)
+INSN_LASX(xvmax_wu,          vvv)
+INSN_LASX(xvmax_du,          vvv)
+INSN_LASX(xvmin_bu,          vvv)
+INSN_LASX(xvmin_hu,          vvv)
+INSN_LASX(xvmin_wu,          vvv)
+INSN_LASX(xvmin_du,          vvv)
+
+INSN_LASX(xvmaxi_b,          vv_i)
+INSN_LASX(xvmaxi_h,          vv_i)
+INSN_LASX(xvmaxi_w,          vv_i)
+INSN_LASX(xvmaxi_d,          vv_i)
+INSN_LASX(xvmini_b,          vv_i)
+INSN_LASX(xvmini_h,          vv_i)
+INSN_LASX(xvmini_w,          vv_i)
+INSN_LASX(xvmini_d,          vv_i)
+INSN_LASX(xvmaxi_bu,         vv_i)
+INSN_LASX(xvmaxi_hu,         vv_i)
+INSN_LASX(xvmaxi_wu,         vv_i)
+INSN_LASX(xvmaxi_du,         vv_i)
+INSN_LASX(xvmini_bu,         vv_i)
+INSN_LASX(xvmini_hu,         vv_i)
+INSN_LASX(xvmini_wu,         vv_i)
+INSN_LASX(xvmini_du,         vv_i)
+
+INSN_LASX(xvmul_b,           vvv)
+INSN_LASX(xvmul_h,           vvv)
+INSN_LASX(xvmul_w,           vvv)
+INSN_LASX(xvmul_d,           vvv)
+INSN_LASX(xvmuh_b,           vvv)
+INSN_LASX(xvmuh_h,           vvv)
+INSN_LASX(xvmuh_w,           vvv)
+INSN_LASX(xvmuh_d,           vvv)
+INSN_LASX(xvmuh_bu,          vvv)
+INSN_LASX(xvmuh_hu,          vvv)
+INSN_LASX(xvmuh_wu,          vvv)
+INSN_LASX(xvmuh_du,          vvv)
+
+INSN_LASX(xvmulwev_h_b,      vvv)
+INSN_LASX(xvmulwev_w_h,      vvv)
+INSN_LASX(xvmulwev_d_w,      vvv)
+INSN_LASX(xvmulwev_q_d,      vvv)
+INSN_LASX(xvmulwod_h_b,      vvv)
+INSN_LASX(xvmulwod_w_h,      vvv)
+INSN_LASX(xvmulwod_d_w,      vvv)
+INSN_LASX(xvmulwod_q_d,      vvv)
+INSN_LASX(xvmulwev_h_bu,     vvv)
+INSN_LASX(xvmulwev_w_hu,     vvv)
+INSN_LASX(xvmulwev_d_wu,     vvv)
+INSN_LASX(xvmulwev_q_du,     vvv)
+INSN_LASX(xvmulwod_h_bu,     vvv)
+INSN_LASX(xvmulwod_w_hu,     vvv)
+INSN_LASX(xvmulwod_d_wu,     vvv)
+INSN_LASX(xvmulwod_q_du,     vvv)
+INSN_LASX(xvmulwev_h_bu_b,   vvv)
+INSN_LASX(xvmulwev_w_hu_h,   vvv)
+INSN_LASX(xvmulwev_d_wu_w,   vvv)
+INSN_LASX(xvmulwev_q_du_d,   vvv)
+INSN_LASX(xvmulwod_h_bu_b,   vvv)
+INSN_LASX(xvmulwod_w_hu_h,   vvv)
+INSN_LASX(xvmulwod_d_wu_w,   vvv)
+INSN_LASX(xvmulwod_q_du_d,   vvv)
+
+INSN_LASX(xvmadd_b,          vvv)
+INSN_LASX(xvmadd_h,          vvv)
+INSN_LASX(xvmadd_w,          vvv)
+INSN_LASX(xvmadd_d,          vvv)
+INSN_LASX(xvmsub_b,          vvv)
+INSN_LASX(xvmsub_h,          vvv)
+INSN_LASX(xvmsub_w,          vvv)
+INSN_LASX(xvmsub_d,          vvv)
+
+INSN_LASX(xvmaddwev_h_b,     vvv)
+INSN_LASX(xvmaddwev_w_h,     vvv)
+INSN_LASX(xvmaddwev_d_w,     vvv)
+INSN_LASX(xvmaddwev_q_d,     vvv)
+INSN_LASX(xvmaddwod_h_b,     vvv)
+INSN_LASX(xvmaddwod_w_h,     vvv)
+INSN_LASX(xvmaddwod_d_w,     vvv)
+INSN_LASX(xvmaddwod_q_d,     vvv)
+INSN_LASX(xvmaddwev_h_bu,    vvv)
+INSN_LASX(xvmaddwev_w_hu,    vvv)
+INSN_LASX(xvmaddwev_d_wu,    vvv)
+INSN_LASX(xvmaddwev_q_du,    vvv)
+INSN_LASX(xvmaddwod_h_bu,    vvv)
+INSN_LASX(xvmaddwod_w_hu,    vvv)
+INSN_LASX(xvmaddwod_d_wu,    vvv)
+INSN_LASX(xvmaddwod_q_du,    vvv)
+INSN_LASX(xvmaddwev_h_bu_b,  vvv)
+INSN_LASX(xvmaddwev_w_hu_h,  vvv)
+INSN_LASX(xvmaddwev_d_wu_w,  vvv)
+INSN_LASX(xvmaddwev_q_du_d,  vvv)
+INSN_LASX(xvmaddwod_h_bu_b,  vvv)
+INSN_LASX(xvmaddwod_w_hu_h,  vvv)
+INSN_LASX(xvmaddwod_d_wu_w,  vvv)
+INSN_LASX(xvmaddwod_q_du_d,  vvv)
+
+INSN_LASX(xvdiv_b,           vvv)
+INSN_LASX(xvdiv_h,           vvv)
+INSN_LASX(xvdiv_w,           vvv)
+INSN_LASX(xvdiv_d,           vvv)
+INSN_LASX(xvdiv_bu,          vvv)
+INSN_LASX(xvdiv_hu,          vvv)
+INSN_LASX(xvdiv_wu,          vvv)
+INSN_LASX(xvdiv_du,          vvv)
+INSN_LASX(xvmod_b,           vvv)
+INSN_LASX(xvmod_h,           vvv)
+INSN_LASX(xvmod_w,           vvv)
+INSN_LASX(xvmod_d,           vvv)
+INSN_LASX(xvmod_bu,          vvv)
+INSN_LASX(xvmod_hu,          vvv)
+INSN_LASX(xvmod_wu,          vvv)
+INSN_LASX(xvmod_du,          vvv)
+
+INSN_LASX(xvsat_b,           vv_i)
+INSN_LASX(xvsat_h,           vv_i)
+INSN_LASX(xvsat_w,           vv_i)
+INSN_LASX(xvsat_d,           vv_i)
+INSN_LASX(xvsat_bu,          vv_i)
+INSN_LASX(xvsat_hu,          vv_i)
+INSN_LASX(xvsat_wu,          vv_i)
+INSN_LASX(xvsat_du,          vv_i)
+
+INSN_LASX(xvexth_h_b,        vv)
+INSN_LASX(xvexth_w_h,        vv)
+INSN_LASX(xvexth_d_w,        vv)
+INSN_LASX(xvexth_q_d,        vv)
+INSN_LASX(xvexth_hu_bu,      vv)
+INSN_LASX(xvexth_wu_hu,      vv)
+INSN_LASX(xvexth_du_wu,      vv)
+INSN_LASX(xvexth_qu_du,      vv)
+
+INSN_LASX(vext2xv_h_b,       vv)
+INSN_LASX(vext2xv_w_b,       vv)
+INSN_LASX(vext2xv_d_b,       vv)
+INSN_LASX(vext2xv_w_h,       vv)
+INSN_LASX(vext2xv_d_h,       vv)
+INSN_LASX(vext2xv_d_w,       vv)
+INSN_LASX(vext2xv_hu_bu,     vv)
+INSN_LASX(vext2xv_wu_bu,     vv)
+INSN_LASX(vext2xv_du_bu,     vv)
+INSN_LASX(vext2xv_wu_hu,     vv)
+INSN_LASX(vext2xv_du_hu,     vv)
+INSN_LASX(vext2xv_du_wu,     vv)
+
+INSN_LASX(xvsigncov_b,       vvv)
+INSN_LASX(xvsigncov_h,       vvv)
+INSN_LASX(xvsigncov_w,       vvv)
+INSN_LASX(xvsigncov_d,       vvv)
+
+INSN_LASX(xvmskltz_b,        vv)
+INSN_LASX(xvmskltz_h,        vv)
+INSN_LASX(xvmskltz_w,        vv)
+INSN_LASX(xvmskltz_d,        vv)
+INSN_LASX(xvmskgez_b,        vv)
+INSN_LASX(xvmsknz_b,         vv)
+
+INSN_LASX(xvldi,             v_i)
+
+INSN_LASX(xvand_v,           vvv)
+INSN_LASX(xvor_v,            vvv)
+INSN_LASX(xvxor_v,           vvv)
+INSN_LASX(xvnor_v,           vvv)
+INSN_LASX(xvandn_v,          vvv)
+INSN_LASX(xvorn_v,           vvv)
+
+INSN_LASX(xvandi_b,          vv_i)
+INSN_LASX(xvori_b,           vv_i)
+INSN_LASX(xvxori_b,          vv_i)
+INSN_LASX(xvnori_b,          vv_i)
+
+INSN_LASX(xvsll_b,           vvv)
+INSN_LASX(xvsll_h,           vvv)
+INSN_LASX(xvsll_w,           vvv)
+INSN_LASX(xvsll_d,           vvv)
+INSN_LASX(xvslli_b,          vv_i)
+INSN_LASX(xvslli_h,          vv_i)
+INSN_LASX(xvslli_w,          vv_i)
+INSN_LASX(xvslli_d,          vv_i)
+
+INSN_LASX(xvsrl_b,           vvv)
+INSN_LASX(xvsrl_h,           vvv)
+INSN_LASX(xvsrl_w,           vvv)
+INSN_LASX(xvsrl_d,           vvv)
+INSN_LASX(xvsrli_b,          vv_i)
+INSN_LASX(xvsrli_h,          vv_i)
+INSN_LASX(xvsrli_w,          vv_i)
+INSN_LASX(xvsrli_d,          vv_i)
+
+INSN_LASX(xvsra_b,           vvv)
+INSN_LASX(xvsra_h,           vvv)
+INSN_LASX(xvsra_w,           vvv)
+INSN_LASX(xvsra_d,           vvv)
+INSN_LASX(xvsrai_b,          vv_i)
+INSN_LASX(xvsrai_h,          vv_i)
+INSN_LASX(xvsrai_w,          vv_i)
+INSN_LASX(xvsrai_d,          vv_i)
+
+INSN_LASX(xvrotr_b,          vvv)
+INSN_LASX(xvrotr_h,          vvv)
+INSN_LASX(xvrotr_w,          vvv)
+INSN_LASX(xvrotr_d,          vvv)
+INSN_LASX(xvrotri_b,         vv_i)
+INSN_LASX(xvrotri_h,         vv_i)
+INSN_LASX(xvrotri_w,         vv_i)
+INSN_LASX(xvrotri_d,         vv_i)
+
+INSN_LASX(xvsllwil_h_b,      vv_i)
+INSN_LASX(xvsllwil_w_h,      vv_i)
+INSN_LASX(xvsllwil_d_w,      vv_i)
+INSN_LASX(xvextl_q_d,        vv)
+INSN_LASX(xvsllwil_hu_bu,    vv_i)
+INSN_LASX(xvsllwil_wu_hu,    vv_i)
+INSN_LASX(xvsllwil_du_wu,    vv_i)
+INSN_LASX(xvextl_qu_du,      vv)
+
+INSN_LASX(xvsrlr_b,          vvv)
+INSN_LASX(xvsrlr_h,          vvv)
+INSN_LASX(xvsrlr_w,          vvv)
+INSN_LASX(xvsrlr_d,          vvv)
+INSN_LASX(xvsrlri_b,         vv_i)
+INSN_LASX(xvsrlri_h,         vv_i)
+INSN_LASX(xvsrlri_w,         vv_i)
+INSN_LASX(xvsrlri_d,         vv_i)
+
+INSN_LASX(xvsrar_b,          vvv)
+INSN_LASX(xvsrar_h,          vvv)
+INSN_LASX(xvsrar_w,          vvv)
+INSN_LASX(xvsrar_d,          vvv)
+INSN_LASX(xvsrari_b,         vv_i)
+INSN_LASX(xvsrari_h,         vv_i)
+INSN_LASX(xvsrari_w,         vv_i)
+INSN_LASX(xvsrari_d,         vv_i)
+
+INSN_LASX(xvsrln_b_h,        vvv)
+INSN_LASX(xvsrln_h_w,        vvv)
+INSN_LASX(xvsrln_w_d,        vvv)
+INSN_LASX(xvsran_b_h,        vvv)
+INSN_LASX(xvsran_h_w,        vvv)
+INSN_LASX(xvsran_w_d,        vvv)
+
+INSN_LASX(xvsrlni_b_h,       vv_i)
+INSN_LASX(xvsrlni_h_w,       vv_i)
+INSN_LASX(xvsrlni_w_d,       vv_i)
+INSN_LASX(xvsrlni_d_q,       vv_i)
+INSN_LASX(xvsrani_b_h,       vv_i)
+INSN_LASX(xvsrani_h_w,       vv_i)
+INSN_LASX(xvsrani_w_d,       vv_i)
+INSN_LASX(xvsrani_d_q,       vv_i)
+
+INSN_LASX(xvsrlrn_b_h,       vvv)
+INSN_LASX(xvsrlrn_h_w,       vvv)
+INSN_LASX(xvsrlrn_w_d,       vvv)
+INSN_LASX(xvsrarn_b_h,       vvv)
+INSN_LASX(xvsrarn_h_w,       vvv)
+INSN_LASX(xvsrarn_w_d,       vvv)
+
+INSN_LASX(xvsrlrni_b_h,      vv_i)
+INSN_LASX(xvsrlrni_h_w,      vv_i)
+INSN_LASX(xvsrlrni_w_d,      vv_i)
+INSN_LASX(xvsrlrni_d_q,      vv_i)
+INSN_LASX(xvsrarni_b_h,      vv_i)
+INSN_LASX(xvsrarni_h_w,      vv_i)
+INSN_LASX(xvsrarni_w_d,      vv_i)
+INSN_LASX(xvsrarni_d_q,      vv_i)
+
+INSN_LASX(xvssrln_b_h,       vvv)
+INSN_LASX(xvssrln_h_w,       vvv)
+INSN_LASX(xvssrln_w_d,       vvv)
+INSN_LASX(xvssran_b_h,       vvv)
+INSN_LASX(xvssran_h_w,       vvv)
+INSN_LASX(xvssran_w_d,       vvv)
+INSN_LASX(xvssrln_bu_h,      vvv)
+INSN_LASX(xvssrln_hu_w,      vvv)
+INSN_LASX(xvssrln_wu_d,      vvv)
+INSN_LASX(xvssran_bu_h,      vvv)
+INSN_LASX(xvssran_hu_w,      vvv)
+INSN_LASX(xvssran_wu_d,      vvv)
+
+INSN_LASX(xvssrlni_b_h,      vv_i)
+INSN_LASX(xvssrlni_h_w,      vv_i)
+INSN_LASX(xvssrlni_w_d,      vv_i)
+INSN_LASX(xvssrlni_d_q,      vv_i)
+INSN_LASX(xvssrani_b_h,      vv_i)
+INSN_LASX(xvssrani_h_w,      vv_i)
+INSN_LASX(xvssrani_w_d,      vv_i)
+INSN_LASX(xvssrani_d_q,      vv_i)
+INSN_LASX(xvssrlni_bu_h,     vv_i)
+INSN_LASX(xvssrlni_hu_w,     vv_i)
+INSN_LASX(xvssrlni_wu_d,     vv_i)
+INSN_LASX(xvssrlni_du_q,     vv_i)
+INSN_LASX(xvssrani_bu_h,     vv_i)
+INSN_LASX(xvssrani_hu_w,     vv_i)
+INSN_LASX(xvssrani_wu_d,     vv_i)
+INSN_LASX(xvssrani_du_q,     vv_i)
+
+INSN_LASX(xvssrlrn_b_h,      vvv)
+INSN_LASX(xvssrlrn_h_w,      vvv)
+INSN_LASX(xvssrlrn_w_d,      vvv)
+INSN_LASX(xvssrarn_b_h,      vvv)
+INSN_LASX(xvssrarn_h_w,      vvv)
+INSN_LASX(xvssrarn_w_d,      vvv)
+INSN_LASX(xvssrlrn_bu_h,     vvv)
+INSN_LASX(xvssrlrn_hu_w,     vvv)
+INSN_LASX(xvssrlrn_wu_d,     vvv)
+INSN_LASX(xvssrarn_bu_h,     vvv)
+INSN_LASX(xvssrarn_hu_w,     vvv)
+INSN_LASX(xvssrarn_wu_d,     vvv)
+
+INSN_LASX(xvssrlrni_b_h,     vv_i)
+INSN_LASX(xvssrlrni_h_w,     vv_i)
+INSN_LASX(xvssrlrni_w_d,     vv_i)
+INSN_LASX(xvssrlrni_d_q,     vv_i)
+INSN_LASX(xvssrlrni_bu_h,    vv_i)
+INSN_LASX(xvssrlrni_hu_w,    vv_i)
+INSN_LASX(xvssrlrni_wu_d,    vv_i)
+INSN_LASX(xvssrlrni_du_q,    vv_i)
+INSN_LASX(xvssrarni_b_h,     vv_i)
+INSN_LASX(xvssrarni_h_w,     vv_i)
+INSN_LASX(xvssrarni_w_d,     vv_i)
+INSN_LASX(xvssrarni_d_q,     vv_i)
+INSN_LASX(xvssrarni_bu_h,    vv_i)
+INSN_LASX(xvssrarni_hu_w,    vv_i)
+INSN_LASX(xvssrarni_wu_d,    vv_i)
+INSN_LASX(xvssrarni_du_q,    vv_i)
+
+INSN_LASX(xvclo_b,           vv)
+INSN_LASX(xvclo_h,           vv)
+INSN_LASX(xvclo_w,           vv)
+INSN_LASX(xvclo_d,           vv)
+INSN_LASX(xvclz_b,           vv)
+INSN_LASX(xvclz_h,           vv)
+INSN_LASX(xvclz_w,           vv)
+INSN_LASX(xvclz_d,           vv)
+
+INSN_LASX(xvpcnt_b,          vv)
+INSN_LASX(xvpcnt_h,          vv)
+INSN_LASX(xvpcnt_w,          vv)
+INSN_LASX(xvpcnt_d,          vv)
+
+INSN_LASX(xvbitclr_b,        vvv)
+INSN_LASX(xvbitclr_h,        vvv)
+INSN_LASX(xvbitclr_w,        vvv)
+INSN_LASX(xvbitclr_d,        vvv)
+INSN_LASX(xvbitclri_b,       vv_i)
+INSN_LASX(xvbitclri_h,       vv_i)
+INSN_LASX(xvbitclri_w,       vv_i)
+INSN_LASX(xvbitclri_d,       vv_i)
+INSN_LASX(xvbitset_b,        vvv)
+INSN_LASX(xvbitset_h,        vvv)
+INSN_LASX(xvbitset_w,        vvv)
+INSN_LASX(xvbitset_d,        vvv)
+INSN_LASX(xvbitseti_b,       vv_i)
+INSN_LASX(xvbitseti_h,       vv_i)
+INSN_LASX(xvbitseti_w,       vv_i)
+INSN_LASX(xvbitseti_d,       vv_i)
+INSN_LASX(xvbitrev_b,        vvv)
+INSN_LASX(xvbitrev_h,        vvv)
+INSN_LASX(xvbitrev_w,        vvv)
+INSN_LASX(xvbitrev_d,        vvv)
+INSN_LASX(xvbitrevi_b,       vv_i)
+INSN_LASX(xvbitrevi_h,       vv_i)
+INSN_LASX(xvbitrevi_w,       vv_i)
+INSN_LASX(xvbitrevi_d,       vv_i)
+
+INSN_LASX(xvfrstp_b,         vvv)
+INSN_LASX(xvfrstp_h,         vvv)
+INSN_LASX(xvfrstpi_b,        vv_i)
+INSN_LASX(xvfrstpi_h,        vv_i)
+
+INSN_LASX(xvfadd_s,          vvv)
+INSN_LASX(xvfadd_d,          vvv)
+INSN_LASX(xvfsub_s,          vvv)
+INSN_LASX(xvfsub_d,          vvv)
+INSN_LASX(xvfmul_s,          vvv)
+INSN_LASX(xvfmul_d,          vvv)
+INSN_LASX(xvfdiv_s,          vvv)
+INSN_LASX(xvfdiv_d,          vvv)
+
+INSN_LASX(xvfmadd_s,         vvvv)
+INSN_LASX(xvfmadd_d,         vvvv)
+INSN_LASX(xvfmsub_s,         vvvv)
+INSN_LASX(xvfmsub_d,         vvvv)
+INSN_LASX(xvfnmadd_s,        vvvv)
+INSN_LASX(xvfnmadd_d,        vvvv)
+INSN_LASX(xvfnmsub_s,        vvvv)
+INSN_LASX(xvfnmsub_d,        vvvv)
+
+INSN_LASX(xvfmax_s,          vvv)
+INSN_LASX(xvfmax_d,          vvv)
+INSN_LASX(xvfmin_s,          vvv)
+INSN_LASX(xvfmin_d,          vvv)
+
+INSN_LASX(xvfmaxa_s,         vvv)
+INSN_LASX(xvfmaxa_d,         vvv)
+INSN_LASX(xvfmina_s,         vvv)
+INSN_LASX(xvfmina_d,         vvv)
+
+INSN_LASX(xvflogb_s,         vv)
+INSN_LASX(xvflogb_d,         vv)
+
+INSN_LASX(xvfclass_s,        vv)
+INSN_LASX(xvfclass_d,        vv)
+
+INSN_LASX(xvfsqrt_s,         vv)
+INSN_LASX(xvfsqrt_d,         vv)
+INSN_LASX(xvfrecip_s,        vv)
+INSN_LASX(xvfrecip_d,        vv)
+INSN_LASX(xvfrsqrt_s,        vv)
+INSN_LASX(xvfrsqrt_d,        vv)
+
+INSN_LASX(xvfcvtl_s_h,       vv)
+INSN_LASX(xvfcvth_s_h,       vv)
+INSN_LASX(xvfcvtl_d_s,       vv)
+INSN_LASX(xvfcvth_d_s,       vv)
+INSN_LASX(xvfcvt_h_s,        vvv)
+INSN_LASX(xvfcvt_s_d,        vvv)
+
+INSN_LASX(xvfrint_s,         vv)
+INSN_LASX(xvfrint_d,         vv)
+INSN_LASX(xvfrintrm_s,       vv)
+INSN_LASX(xvfrintrm_d,       vv)
+INSN_LASX(xvfrintrp_s,       vv)
+INSN_LASX(xvfrintrp_d,       vv)
+INSN_LASX(xvfrintrz_s,       vv)
+INSN_LASX(xvfrintrz_d,       vv)
+INSN_LASX(xvfrintrne_s,      vv)
+INSN_LASX(xvfrintrne_d,      vv)
+
+INSN_LASX(xvftint_w_s,       vv)
+INSN_LASX(xvftint_l_d,       vv)
+INSN_LASX(xvftintrm_w_s,     vv)
+INSN_LASX(xvftintrm_l_d,     vv)
+INSN_LASX(xvftintrp_w_s,     vv)
+INSN_LASX(xvftintrp_l_d,     vv)
+INSN_LASX(xvftintrz_w_s,     vv)
+INSN_LASX(xvftintrz_l_d,     vv)
+INSN_LASX(xvftintrne_w_s,    vv)
+INSN_LASX(xvftintrne_l_d,    vv)
+INSN_LASX(xvftint_wu_s,      vv)
+INSN_LASX(xvftint_lu_d,      vv)
+INSN_LASX(xvftintrz_wu_s,    vv)
+INSN_LASX(xvftintrz_lu_d,    vv)
+INSN_LASX(xvftint_w_d,       vvv)
+INSN_LASX(xvftintrm_w_d,     vvv)
+INSN_LASX(xvftintrp_w_d,     vvv)
+INSN_LASX(xvftintrz_w_d,     vvv)
+INSN_LASX(xvftintrne_w_d,    vvv)
+INSN_LASX(xvftintl_l_s,      vv)
+INSN_LASX(xvftinth_l_s,      vv)
+INSN_LASX(xvftintrml_l_s,    vv)
+INSN_LASX(xvftintrmh_l_s,    vv)
+INSN_LASX(xvftintrpl_l_s,    vv)
+INSN_LASX(xvftintrph_l_s,    vv)
+INSN_LASX(xvftintrzl_l_s,    vv)
+INSN_LASX(xvftintrzh_l_s,    vv)
+INSN_LASX(xvftintrnel_l_s,   vv)
+INSN_LASX(xvftintrneh_l_s,   vv)
+
+INSN_LASX(xvffint_s_w,       vv)
+INSN_LASX(xvffint_s_wu,      vv)
+INSN_LASX(xvffint_d_l,       vv)
+INSN_LASX(xvffint_d_lu,      vv)
+INSN_LASX(xvffintl_d_w,      vv)
+INSN_LASX(xvffinth_d_w,      vv)
+INSN_LASX(xvffint_s_l,       vvv)
+
+INSN_LASX(xvseq_b,           vvv)
+INSN_LASX(xvseq_h,           vvv)
+INSN_LASX(xvseq_w,           vvv)
+INSN_LASX(xvseq_d,           vvv)
+INSN_LASX(xvseqi_b,          vv_i)
+INSN_LASX(xvseqi_h,          vv_i)
+INSN_LASX(xvseqi_w,          vv_i)
+INSN_LASX(xvseqi_d,          vv_i)
+
+INSN_LASX(xvsle_b,           vvv)
+INSN_LASX(xvsle_h,           vvv)
+INSN_LASX(xvsle_w,           vvv)
+INSN_LASX(xvsle_d,           vvv)
+INSN_LASX(xvslei_b,          vv_i)
+INSN_LASX(xvslei_h,          vv_i)
+INSN_LASX(xvslei_w,          vv_i)
+INSN_LASX(xvslei_d,          vv_i)
+INSN_LASX(xvsle_bu,          vvv)
+INSN_LASX(xvsle_hu,          vvv)
+INSN_LASX(xvsle_wu,          vvv)
+INSN_LASX(xvsle_du,          vvv)
+INSN_LASX(xvslei_bu,         vv_i)
+INSN_LASX(xvslei_hu,         vv_i)
+INSN_LASX(xvslei_wu,         vv_i)
+INSN_LASX(xvslei_du,         vv_i)
+
+INSN_LASX(xvslt_b,           vvv)
+INSN_LASX(xvslt_h,           vvv)
+INSN_LASX(xvslt_w,           vvv)
+INSN_LASX(xvslt_d,           vvv)
+INSN_LASX(xvslti_b,          vv_i)
+INSN_LASX(xvslti_h,          vv_i)
+INSN_LASX(xvslti_w,          vv_i)
+INSN_LASX(xvslti_d,          vv_i)
+INSN_LASX(xvslt_bu,          vvv)
+INSN_LASX(xvslt_hu,          vvv)
+INSN_LASX(xvslt_wu,          vvv)
+INSN_LASX(xvslt_du,          vvv)
+INSN_LASX(xvslti_bu,         vv_i)
+INSN_LASX(xvslti_hu,         vv_i)
+INSN_LASX(xvslti_wu,         vv_i)
+INSN_LASX(xvslti_du,         vv_i)
+
+#define output_xvfcmp(C, PREFIX, SUFFIX)                                    \
+{                                                                           \
+    (C)->info->fprintf_func((C)->info->stream, "%08x  %s%s\tx%d, x%d, x%d", \
+                            (C)->insn, PREFIX, SUFFIX, a->vd,               \
+                            a->vj, a->vk);                                  \
+}
+static bool output_xxx_fcond(DisasContext *ctx, arg_vvv_fcond * a,
+                             const char *suffix)
+{
+    bool ret = true;
+    switch (a->fcond) {
+    case 0x0:
+        output_xvfcmp(ctx, "xvfcmp_caf_", suffix);
+        break;
+    case 0x1:
+        output_xvfcmp(ctx, "xvfcmp_saf_", suffix);
+        break;
+    case 0x2:
+        output_xvfcmp(ctx, "xvfcmp_clt_", suffix);
+        break;
+    case 0x3:
+        output_xvfcmp(ctx, "xvfcmp_slt_", suffix);
+        break;
+    case 0x4:
+        output_xvfcmp(ctx, "xvfcmp_ceq_", suffix);
+        break;
+    case 0x5:
+        output_xvfcmp(ctx, "xvfcmp_seq_", suffix);
+        break;
+    case 0x6:
+        output_xvfcmp(ctx, "xvfcmp_cle_", suffix);
+        break;
+    case 0x7:
+        output_xvfcmp(ctx, "xvfcmp_sle_", suffix);
+        break;
+    case 0x8:
+        output_xvfcmp(ctx, "xvfcmp_cun_", suffix);
+        break;
+    case 0x9:
+        output_xvfcmp(ctx, "xvfcmp_sun_", suffix);
+        break;
+    case 0xA:
+        output_xvfcmp(ctx, "xvfcmp_cult_", suffix);
+        break;
+    case 0xB:
+        output_xvfcmp(ctx, "xvfcmp_sult_", suffix);
+        break;
+    case 0xC:
+        output_xvfcmp(ctx, "xvfcmp_cueq_", suffix);
+        break;
+    case 0xD:
+        output_xvfcmp(ctx, "xvfcmp_sueq_", suffix);
+        break;
+    case 0xE:
+        output_xvfcmp(ctx, "xvfcmp_cule_", suffix);
+        break;
+    case 0xF:
+        output_xvfcmp(ctx, "xvfcmp_sule_", suffix);
+        break;
+    case 0x10:
+        output_xvfcmp(ctx, "xvfcmp_cne_", suffix);
+        break;
+    case 0x11:
+        output_xvfcmp(ctx, "xvfcmp_sne_", suffix);
+        break;
+    case 0x14:
+        output_xvfcmp(ctx, "xvfcmp_cor_", suffix);
+        break;
+    case 0x15:
+        output_xvfcmp(ctx, "xvfcmp_sor_", suffix);
+        break;
+    case 0x18:
+        output_xvfcmp(ctx, "xvfcmp_cune_", suffix);
+        break;
+    case 0x19:
+        output_xvfcmp(ctx, "xvfcmp_sune_", suffix);
+        break;
+    default:
+        ret = false;
+    }
+    return ret;
+}
+
+#define LASX_FCMP_INSN(suffix)                            \
+static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
+                                       arg_vvv_fcond * a) \
+{                                                         \
+    return output_xxx_fcond(ctx, a, #suffix);             \
+}
+
+LASX_FCMP_INSN(s)
+LASX_FCMP_INSN(d)
+
+INSN_LASX(xvbitsel_v,        vvvv)
+INSN_LASX(xvbitseli_b,       vv_i)
+
+INSN_LASX(xvseteqz_v,        cv)
+INSN_LASX(xvsetnez_v,        cv)
+INSN_LASX(xvsetanyeqz_b,     cv)
+INSN_LASX(xvsetanyeqz_h,     cv)
+INSN_LASX(xvsetanyeqz_w,     cv)
+INSN_LASX(xvsetanyeqz_d,     cv)
+INSN_LASX(xvsetallnez_b,     cv)
+INSN_LASX(xvsetallnez_h,     cv)
+INSN_LASX(xvsetallnez_w,     cv)
+INSN_LASX(xvsetallnez_d,     cv)
+
+INSN_LASX(xvinsgr2vr_w,      vr_i)
+INSN_LASX(xvinsgr2vr_d,      vr_i)
+INSN_LASX(xvpickve2gr_w,     rv_i)
+INSN_LASX(xvpickve2gr_d,     rv_i)
+INSN_LASX(xvpickve2gr_wu,    rv_i)
+INSN_LASX(xvpickve2gr_du,    rv_i)
+
+INSN_LASX(xvreplgr2vr_b,     vr)
+INSN_LASX(xvreplgr2vr_h,     vr)
+INSN_LASX(xvreplgr2vr_w,     vr)
+INSN_LASX(xvreplgr2vr_d,     vr)
+
+INSN_LASX(xvreplve_b,        vvr)
+INSN_LASX(xvreplve_h,        vvr)
+INSN_LASX(xvreplve_w,        vvr)
+INSN_LASX(xvreplve_d,        vvr)
+INSN_LASX(xvrepl128vei_b,    vv_i)
+INSN_LASX(xvrepl128vei_h,    vv_i)
+INSN_LASX(xvrepl128vei_w,    vv_i)
+INSN_LASX(xvrepl128vei_d,    vv_i)
+
+INSN_LASX(xvreplve0_b,       vv)
+INSN_LASX(xvreplve0_h,       vv)
+INSN_LASX(xvreplve0_w,       vv)
+INSN_LASX(xvreplve0_d,       vv)
+INSN_LASX(xvreplve0_q,       vv)
+
+INSN_LASX(xvinsve0_w,        vv_i)
+INSN_LASX(xvinsve0_d,        vv_i)
+
+INSN_LASX(xvpickve_w,        vv_i)
+INSN_LASX(xvpickve_d,        vv_i)
+
+INSN_LASX(xvbsll_v,          vv_i)
+INSN_LASX(xvbsrl_v,          vv_i)
+
+INSN_LASX(xvpackev_b,        vvv)
+INSN_LASX(xvpackev_h,        vvv)
+INSN_LASX(xvpackev_w,        vvv)
+INSN_LASX(xvpackev_d,        vvv)
+INSN_LASX(xvpackod_b,        vvv)
+INSN_LASX(xvpackod_h,        vvv)
+INSN_LASX(xvpackod_w,        vvv)
+INSN_LASX(xvpackod_d,        vvv)
+
+INSN_LASX(xvpickev_b,        vvv)
+INSN_LASX(xvpickev_h,        vvv)
+INSN_LASX(xvpickev_w,        vvv)
+INSN_LASX(xvpickev_d,        vvv)
+INSN_LASX(xvpickod_b,        vvv)
+INSN_LASX(xvpickod_h,        vvv)
+INSN_LASX(xvpickod_w,        vvv)
+INSN_LASX(xvpickod_d,        vvv)
+
+INSN_LASX(xvilvl_b,          vvv)
+INSN_LASX(xvilvl_h,          vvv)
+INSN_LASX(xvilvl_w,          vvv)
+INSN_LASX(xvilvl_d,          vvv)
+INSN_LASX(xvilvh_b,          vvv)
+INSN_LASX(xvilvh_h,          vvv)
+INSN_LASX(xvilvh_w,          vvv)
+INSN_LASX(xvilvh_d,          vvv)
+
+INSN_LASX(xvshuf_b,          vvvv)
+INSN_LASX(xvshuf_h,          vvv)
+INSN_LASX(xvshuf_w,          vvv)
+INSN_LASX(xvshuf_d,          vvv)
+
+INSN_LASX(xvperm_w,          vvv)
+
+INSN_LASX(xvshuf4i_b,        vv_i)
+INSN_LASX(xvshuf4i_h,        vv_i)
+INSN_LASX(xvshuf4i_w,        vv_i)
+INSN_LASX(xvshuf4i_d,        vv_i)
+
+INSN_LASX(xvpermi_w,         vv_i)
+INSN_LASX(xvpermi_d,         vv_i)
+INSN_LASX(xvpermi_q,         vv_i)
+
+INSN_LASX(xvextrins_d,       vv_i)
+INSN_LASX(xvextrins_w,       vv_i)
+INSN_LASX(xvextrins_h,       vv_i)
+INSN_LASX(xvextrins_b,       vv_i)
+
+INSN_LASX(xvld,              vr_i)
+INSN_LASX(xvst,              vr_i)
+INSN_LASX(xvldx,             vrr)
+INSN_LASX(xvstx,             vrr)
+
+INSN_LASX(xvldrepl_d,        vr_i)
+INSN_LASX(xvldrepl_w,        vr_i)
+INSN_LASX(xvldrepl_h,        vr_i)
+INSN_LASX(xvldrepl_b,        vr_i)
+INSN_LASX(xvstelm_d,         vr_ii)
+INSN_LASX(xvstelm_w,         vr_ii)
+INSN_LASX(xvstelm_h,         vr_ii)
+INSN_LASX(xvstelm_b,         vr_ii)
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
index b09804b62f..5fc2f19e96 100644
--- a/target/loongarch/gdbstub.c
+++ b/target/loongarch/gdbstub.c
@@ -11,6 +11,7 @@
 #include "internals.h"
 #include "exec/gdbstub.h"
 #include "gdbstub/helpers.h"
+#include "vec.h"
 
 uint64_t read_fcc(CPULoongArchState *env)
 {
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index ffb1e0b0bf..b3b64a0215 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -133,22 +133,22 @@ DEF_HELPER_1(idle, void, env)
 #endif
 
 /* LoongArch LSX  */
-DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -305,22 +305,22 @@ DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
@@ -331,161 +331,174 @@ DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_3(vexth_h_b, void, env, i32, i32)
-DEF_HELPER_3(vexth_w_h, void, env, i32, i32)
-DEF_HELPER_3(vexth_d_w, void, env, i32, i32)
-DEF_HELPER_3(vexth_q_d, void, env, i32, i32)
-DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
-DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
-DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
-DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_3(vmskltz_b, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_h, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
-DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
-DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
+DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
-DEF_HELPER_3(vextl_q_d, void, env, i32, i32)
-DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
-DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_3(vclo_b, void, env, i32, i32)
-DEF_HELPER_3(vclo_h, void, env, i32, i32)
-DEF_HELPER_3(vclo_w, void, env, i32, i32)
-DEF_HELPER_3(vclo_d, void, env, i32, i32)
-DEF_HELPER_3(vclz_b, void, env, i32, i32)
-DEF_HELPER_3(vclz_h, void, env, i32, i32)
-DEF_HELPER_3(vclz_w, void, env, i32, i32)
-DEF_HELPER_3(vclz_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
-DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -514,107 +527,107 @@ DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
 
-DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
 
-DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
 
-DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vflogb_s, void, env, i32, i32)
-DEF_HELPER_3(vflogb_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vfclass_s, void, env, i32, i32)
-DEF_HELPER_3(vfclass_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vfsqrt_s, void, env, i32, i32)
-DEF_HELPER_3(vfsqrt_d, void, env, i32, i32)
-DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
-DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
-DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
-DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
-DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
-DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
-DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
-DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrz_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrz_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrp_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrp_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrm_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrm_d, void, env, i32, i32)
-DEF_HELPER_3(vfrint_s, void, env, i32, i32)
-DEF_HELPER_3(vfrint_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftint_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftint_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
-DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
-DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
-DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32)
-DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftinth_l_s, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
 
-DEF_HELPER_3(vffint_s_w, void, env, i32, i32)
-DEF_HELPER_3(vffint_d_l, void, env, i32, i32)
-DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
-DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
-DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
-DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
-DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
 
 DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
@@ -639,61 +652,69 @@ DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32)
 
 DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
+DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32)
 
-DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32)
-DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
-DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
-DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
similarity index 61%
rename from target/loongarch/insn_trans/trans_lsx.c.inc
rename to target/loongarch/insn_trans/trans_vec.c.inc
index 5fbf2718f7..c647137372 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1,78 +1,254 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * LSX translate functions
+ * LoongArch vector translate functions
  * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
  */
 
 #ifndef CONFIG_USER_ONLY
-#define CHECK_SXE do { \
-    if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
-        generate_exception(ctx, EXCCODE_SXD); \
-        return true; \
-    } \
-} while (0)
+
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
+{
+    if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
+        generate_exception(ctx, EXCCODE_SXD);
+        return false;
+    }
+
+    if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
+        generate_exception(ctx, EXCCODE_ASXD);
+        return false;
+    }
+
+    return true;
+}
+
 #else
-#define CHECK_SXE
+
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
+{
+    return true;
+}
+
 #endif
 
+static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+                            gen_helper_gvec_4_ptr *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       vec_full_offset(a->vk),
+                       vec_full_offset(a->va),
+                       cpu_env,
+                       oprsz, ctx->vl / 8, 0, fn);
+    return true;
+}
+
+static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
+                         gen_helper_gvec_4_ptr *fn)
+{
+    return gen_vvvv_ptr_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
+                         gen_helper_gvec_4_ptr *fn)
+{
+    return gen_vvvv_ptr_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+                        gen_helper_gvec_4 *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       vec_full_offset(a->vk),
+                       vec_full_offset(a->va),
+                       oprsz, ctx->vl / 8, 0, fn);
+    return true;
+}
+
 static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
-                     void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
-                                  TCGv_i32, TCGv_i32))
+                     gen_helper_gvec_4 *fn)
 {
-    TCGv_i32 vd = tcg_constant_i32(a->vd);
-    TCGv_i32 vj = tcg_constant_i32(a->vj);
-    TCGv_i32 vk = tcg_constant_i32(a->vk);
-    TCGv_i32 va = tcg_constant_i32(a->va);
+    return gen_vvvv_vl(ctx, a, 16, fn);
+}
 
-    CHECK_SXE;
-    func(cpu_env, vd, vj, vk, va);
+static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
+                     gen_helper_gvec_4 *fn)
+{
+    return gen_vvvv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+                           gen_helper_gvec_3_ptr *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+    tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       vec_full_offset(a->vk),
+                       cpu_env,
+                       oprsz, ctx->vl / 8, 0, fn);
     return true;
 }
 
-static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
-                    void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
+                        gen_helper_gvec_3_ptr *fn)
 {
-    TCGv_i32 vd = tcg_constant_i32(a->vd);
-    TCGv_i32 vj = tcg_constant_i32(a->vj);
-    TCGv_i32 vk = tcg_constant_i32(a->vk);
+    return gen_vvv_ptr_vl(ctx, a, 16, fn);
+}
 
-    CHECK_SXE;
+static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
+                        gen_helper_gvec_3_ptr *fn)
+{
+    return gen_vvv_ptr_vl(ctx, a, 32, fn);
+}
 
-    func(cpu_env, vd, vj, vk);
+static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+                       gen_helper_gvec_3 *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       vec_full_offset(a->vk),
+                       oprsz, ctx->vl / 8, 0, fn);
     return true;
 }
 
-static bool gen_vv(DisasContext *ctx, arg_vv *a,
-                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
 {
-    TCGv_i32 vd = tcg_constant_i32(a->vd);
-    TCGv_i32 vj = tcg_constant_i32(a->vj);
+    return gen_vvv_vl(ctx, a, 16, fn);
+}
 
-    CHECK_SXE;
-    func(cpu_env, vd, vj);
+static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
+{
+    return gen_vvv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+                          gen_helper_gvec_2_ptr *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       cpu_env,
+                       oprsz, ctx->vl / 8, 0, fn);
     return true;
 }
 
-static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
-                     void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
+                       gen_helper_gvec_2_ptr *fn)
 {
-    TCGv_i32 vd = tcg_constant_i32(a->vd);
-    TCGv_i32 vj = tcg_constant_i32(a->vj);
-    TCGv_i32 imm = tcg_constant_i32(a->imm);
+    return gen_vv_ptr_vl(ctx, a, 16, fn);
+}
 
-    CHECK_SXE;
-    func(cpu_env, vd, vj, imm);
+static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
+                       gen_helper_gvec_2_ptr *fn)
+{
+    return gen_vv_ptr_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+                      gen_helper_gvec_2 *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
+                       vec_full_offset(a->vj),
+                       oprsz, ctx->vl / 8, 0, fn);
+    return true;
+}
+
+static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
+    return gen_vv_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
+    return gen_vv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
+                        gen_helper_gvec_2i *fn)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
+                        vec_full_offset(a->vj),
+                        tcg_constant_i64(a->imm),
+                        oprsz, ctx->vl / 8, 0, fn);
+    return true;
+}
+
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
+    return gen_vv_i_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
+    return gen_vv_i_vl(ctx, a, 32, fn);
+}
+
+static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
+                      void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    if (!check_vec(ctx, sz)) {
+        return true;
+    }
+
+    TCGv_i32 vj = tcg_constant_i32(a->vj);
+    TCGv_i32 cd = tcg_constant_i32(a->cd);
+    TCGv_i32 oprsz = tcg_constant_i32(sz);
+
+    func(cpu_env, oprsz, cd, vj);
     return true;
 }
 
 static bool gen_cv(DisasContext *ctx, arg_cv *a,
-                    void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
 {
-    TCGv_i32 vj = tcg_constant_i32(a->vj);
-    TCGv_i32 cd = tcg_constant_i32(a->cd);
+    return gen_cv_vl(ctx, a, 16, func);
+}
 
-    CHECK_SXE;
-    func(cpu_env, cd, vj);
+static bool gen_cx(DisasContext *ctx, arg_cv *a,
+                   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    return gen_cv_vl(ctx, a, 32, func);
+}
+
+static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
+                        uint32_t oprsz, MemOp mop,
+                        void (*func)(unsigned, uint32_t, uint32_t,
+                                     uint32_t, uint32_t, uint32_t))
+{
+    uint32_t vd_ofs = vec_full_offset(a->vd);
+    uint32_t vj_ofs = vec_full_offset(a->vj);
+    uint32_t vk_ofs = vec_full_offset(a->vk);
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
     return true;
 }
 
@@ -80,30 +256,60 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
                      void (*func)(unsigned, uint32_t, uint32_t,
                                   uint32_t, uint32_t, uint32_t))
 {
-    uint32_t vd_ofs, vj_ofs, vk_ofs;
+    return gvec_vvv_vl(ctx, a, 16, mop, func);
+}
 
-    CHECK_SXE;
+static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
+                     void (*func)(unsigned, uint32_t, uint32_t,
+                                  uint32_t, uint32_t, uint32_t))
+{
+    return gvec_vvv_vl(ctx, a, 32, mop, func);
+}
 
-    vd_ofs = vec_full_offset(a->vd);
-    vj_ofs = vec_full_offset(a->vj);
-    vk_ofs = vec_full_offset(a->vk);
+static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
+                       uint32_t oprsz, MemOp mop,
+                       void (*func)(unsigned, uint32_t, uint32_t,
+                                    uint32_t, uint32_t))
+{
+    uint32_t vd_ofs = vec_full_offset(a->vd);
+    uint32_t vj_ofs = vec_full_offset(a->vj);
 
-    func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
     return true;
 }
 
+
 static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
                     void (*func)(unsigned, uint32_t, uint32_t,
                                  uint32_t, uint32_t))
 {
-    uint32_t vd_ofs, vj_ofs;
+    return gvec_vv_vl(ctx, a, 16, mop, func);
+}
 
-    CHECK_SXE;
+static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
+                    void (*func)(unsigned, uint32_t, uint32_t,
+                                 uint32_t, uint32_t))
+{
+    return gvec_vv_vl(ctx, a, 32, mop, func);
+}
 
-    vd_ofs = vec_full_offset(a->vd);
-    vj_ofs = vec_full_offset(a->vj);
+static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
+                         uint32_t oprsz, MemOp mop,
+                         void (*func)(unsigned, uint32_t, uint32_t,
+                                      int64_t, uint32_t, uint32_t))
+{
+    uint32_t vd_ofs = vec_full_offset(a->vd);
+    uint32_t vj_ofs = vec_full_offset(a->vj);
 
-    func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
     return true;
 }
 
@@ -111,73 +317,108 @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
                       void (*func)(unsigned, uint32_t, uint32_t,
                                    int64_t, uint32_t, uint32_t))
 {
-    uint32_t vd_ofs, vj_ofs;
+    return gvec_vv_i_vl(ctx, a, 16, mop, func);
+}
 
-    CHECK_SXE;
+static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
+                      void (*func)(unsigned, uint32_t, uint32_t,
+                                   int64_t, uint32_t, uint32_t))
+{
+    return gvec_vv_i_vl(ctx,a, 32, mop, func);
+}
 
-    vd_ofs = vec_full_offset(a->vd);
-    vj_ofs = vec_full_offset(a->vj);
+static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
+                         uint32_t oprsz, MemOp mop)
+{
+    uint32_t vd_ofs = vec_full_offset(a->vd);
+    uint32_t vj_ofs = vec_full_offset(a->vj);
 
-    func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
     return true;
 }
 
 static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
 {
-    uint32_t vd_ofs, vj_ofs;
+    return gvec_subi_vl(ctx, a, 16, mop);
+}
 
-    CHECK_SXE;
-
-    vd_ofs = vec_full_offset(a->vd);
-    vj_ofs = vec_full_offset(a->vj);
-
-    tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
-    return true;
+static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+    return gvec_subi_vl(ctx, a, 32, mop);
 }
 
 TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
 TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
 TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
 TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
+TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
+TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
+TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
+TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
 
-#define VADDSUB_Q(NAME)                                        \
-static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
-{                                                              \
-    TCGv_i64 rh, rl, ah, al, bh, bl;                           \
-                                                               \
-    if (!avail_LSX(ctx)) {                                     \
-        return false;                                          \
-    }                                                          \
-                                                               \
-    CHECK_SXE;                                                 \
-                                                               \
-    rh = tcg_temp_new_i64();                                   \
-    rl = tcg_temp_new_i64();                                   \
-    ah = tcg_temp_new_i64();                                   \
-    al = tcg_temp_new_i64();                                   \
-    bh = tcg_temp_new_i64();                                   \
-    bl = tcg_temp_new_i64();                                   \
-                                                               \
-    get_vreg64(ah, a->vj, 1);                                  \
-    get_vreg64(al, a->vj, 0);                                  \
-    get_vreg64(bh, a->vk, 1);                                  \
-    get_vreg64(bl, a->vk, 0);                                  \
-                                                               \
-    tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh);           \
-                                                               \
-    set_vreg64(rh, a->vd, 1);                                  \
-    set_vreg64(rl, a->vd, 0);                                  \
-                                                               \
-    return true;                                               \
+static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+                             void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+                                          TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    int i;
+    TCGv_i64 rh, rl, ah, al, bh, bl;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    rh = tcg_temp_new_i64();
+    rl = tcg_temp_new_i64();
+    ah = tcg_temp_new_i64();
+    al = tcg_temp_new_i64();
+    bh = tcg_temp_new_i64();
+    bl = tcg_temp_new_i64();
+
+    for (i = 0; i < oprsz / 16; i++) {
+        get_vreg64(ah, a->vj, 1 + i * 2);
+        get_vreg64(al, a->vj, i * 2);
+        get_vreg64(bh, a->vk, 1 + i * 2);
+        get_vreg64(bl, a->vk, i * 2);
+
+        func(rl, rh, al, ah, bl, bh);
+
+        set_vreg64(rh, a->vd, 1 + i * 2);
+        set_vreg64(rl, a->vd, i * 2);
+    }
+    return true;
 }
 
-VADDSUB_Q(add)
-VADDSUB_Q(sub)
+static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
+                          void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+                                       TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    return gen_vaddsub_q_vl(ctx, a, 16, func);
+}
+
+static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
+                           void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+                                        TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    return gen_vaddsub_q_vl(ctx, a, 32, func);
+}
 
 TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
 TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
 TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
 TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
+TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
+TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
+TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
+TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
+
+TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
+TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
+TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
+TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
 
 TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
 TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
@@ -187,11 +428,23 @@ TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
 TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
 TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
 TRANS(vsubi_du, LSX, gvec_subi, MO_64)
+TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
+TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
+TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
+TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
+TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
+TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
+TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
+TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
 
 TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
 TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
 TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
 TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
+TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
+TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
+TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
+TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
 
 TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
 TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
@@ -210,6 +463,23 @@ TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
 TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
 TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
 
+TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
+TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
+TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
+TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
+TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
+TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
+TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
+TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
+TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
+TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
+TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
+TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
+
 TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
 TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
 TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
@@ -227,6 +497,23 @@ TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
 TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
 TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
 
+TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
+TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
+TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
+TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
+TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
+TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
+TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
+TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
+TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
+TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
+TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
+TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
+TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
+TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
+TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
+TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
+
 static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
     TCGv_vec t1, t2;
@@ -309,6 +596,10 @@ TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
 TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
 TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
 TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
+TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
+TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
+TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
+TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
 
 static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
 {
@@ -388,6 +679,11 @@ TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
 TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
 TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
 TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
+TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
+TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
+TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
+TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
+
 
 static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -471,6 +767,10 @@ TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
 TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
 TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
 TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
+TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
+TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
+TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
+TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
 
 static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -550,6 +850,10 @@ TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
 TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
 TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
 TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
+TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
+TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
+TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
+TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
 
 static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -625,6 +929,10 @@ TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
 TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
 TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
 TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
+TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
+TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
+TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
+TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
 
 static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -704,6 +1012,10 @@ TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
 TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
 TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
 TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
+TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
+TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
+TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
+TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
 
 static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -779,6 +1091,10 @@ TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
 TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
 TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
 TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
+TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
+TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
+TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
+TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
 
 static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -858,6 +1174,10 @@ TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
 TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
 TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
 TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
+TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
+TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
+TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
+TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
 
 static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -941,6 +1261,10 @@ TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
 TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
 TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
 TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
+TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
+TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
+TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
+TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
 
 static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1021,6 +1345,10 @@ TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
 TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
 TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
 TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
+TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
+TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
+TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
+TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
 
 static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
                     void (*gen_shr_vec)(unsigned, TCGv_vec,
@@ -1137,6 +1465,14 @@ TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
 TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
 TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
 TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
+TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
+TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
+TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
+TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
+TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
+TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
+TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
+TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
 
 static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -1218,6 +1554,14 @@ TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
 TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
 TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
 TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
+TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
+TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
+TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
+TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
+TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
+TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
+TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
+TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
 
 static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1313,6 +1657,14 @@ TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
 TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
 TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
 TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
+TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
+TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
+TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
+TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
+TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
+TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
+TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
+TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
 
 static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1366,6 +1718,10 @@ TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
 TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
 TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
 TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
+TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
+TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
+TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
+TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
 
 TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
 TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
@@ -1375,6 +1731,14 @@ TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
 TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
 TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
 TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
+TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
+TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
+TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
+TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
+TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
+TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
+TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
+TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
 
 TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
 TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
@@ -1384,6 +1748,14 @@ TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
 TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
 TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
 TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
+TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
+TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
+TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
+TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
+TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
+TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
+TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
+TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
 
 static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
 {
@@ -1485,6 +1857,14 @@ TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
 TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
 TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
 TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
+TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
+TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
+TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
+TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
+TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
+TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
+TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
+TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
 
 static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                        int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -1566,11 +1946,23 @@ TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
 TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
 TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
 TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
+TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
+TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
+TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
+TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
+TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
+TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
+TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
+TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
 
 TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
 TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
 TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
 TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
+TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
+TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
+TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
+TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
 
 static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
 {
@@ -1615,6 +2007,10 @@ TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
 TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
 TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
 TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
+TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
+TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
+TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
+TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
 
 static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
 {
@@ -1659,6 +2055,10 @@ TRANS(vmuh_bu, LSX, gvec_vvv, MO_8,  do_vmuh_u)
 TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
 TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
 TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
+TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8,  do_vmuh_u)
+TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
+TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
+TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
 
 static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1731,6 +2131,9 @@ static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
 TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
 TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
+TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
+TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
+TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
 
 static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
                                TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1738,37 +2141,62 @@ static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
     tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
 }
 
-#define VMUL_Q(NAME, FN, idx1, idx2)                      \
-static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
-{                                                         \
-    TCGv_i64 rh, rl, arg1, arg2;                          \
-                                                          \
-    if (!avail_LSX(ctx)) {                                \
-        return false;                                     \
-    }                                                     \
-                                                          \
-    rh = tcg_temp_new_i64();                              \
-    rl = tcg_temp_new_i64();                              \
-    arg1 = tcg_temp_new_i64();                            \
-    arg2 = tcg_temp_new_i64();                            \
-                                                          \
-    get_vreg64(arg1, a->vj, idx1);                        \
-    get_vreg64(arg2, a->vk, idx2);                        \
-                                                          \
-    tcg_gen_## FN ##_i64(rl, rh, arg1, arg2);             \
-                                                          \
-    set_vreg64(rh, a->vd, 1);                             \
-    set_vreg64(rl, a->vd, 0);                             \
-                                                          \
-    return true;                                          \
+static bool gen_vmul_q_vl(DisasContext *ctx,
+                          arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+                          void (*func)(TCGv_i64, TCGv_i64,
+                                       TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 rh, rl, arg1, arg2;
+    int i;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    rh = tcg_temp_new_i64();
+    rl = tcg_temp_new_i64();
+    arg1 = tcg_temp_new_i64();
+    arg2 = tcg_temp_new_i64();
+
+    for (i = 0; i < oprsz / 16; i++) {
+        get_vreg64(arg1, a->vj, 2 * i + idx1);
+        get_vreg64(arg2, a->vk, 2 * i + idx2);
+
+        func(rl, rh, arg1, arg2);
+
+        set_vreg64(rh, a->vd, 2 * i + 1);
+        set_vreg64(rl, a->vd, 2 * i);
+    }
+
+    return true;
 }
 
-VMUL_Q(vmulwev_q_d, muls2, 0, 0)
-VMUL_Q(vmulwod_q_d, muls2, 1, 1)
-VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
-VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
-VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
-VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
+static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+                       void (*func)(TCGv_i64, TCGv_i64,
+                                    TCGv_i64, TCGv_i64))
+{
+    return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+                        void (*func)(TCGv_i64, TCGv_i64,
+                                     TCGv_i64, TCGv_i64))
+{
+    return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
+}
+
+TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
 
 static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1839,6 +2267,9 @@ static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
 TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
 TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
+TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
+TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
+TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
 
 static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1909,6 +2340,9 @@ static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
 TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
 TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
+TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
+TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
+TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
 
 static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -1979,6 +2413,9 @@ static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
 TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
 TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
+TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
+TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
+TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
 
 static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2051,6 +2488,9 @@ static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
 TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
 TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
+TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
+TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
+TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
 
 static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2120,6 +2560,9 @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
 TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
 TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
+TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
+TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
+TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
 
 static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2194,6 +2637,10 @@ TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
 TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
 TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
 TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
+TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
+TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
+TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
+TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
 
 static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2268,6 +2715,10 @@ TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
 TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
 TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
 TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
+TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
+TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
+TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
+TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
 
 static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2342,43 +2793,69 @@ static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
 TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
 TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
+TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
+TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
+TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
 
-#define VMADD_Q(NAME, FN, idx1, idx2)                     \
-static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
-{                                                         \
-    TCGv_i64 rh, rl, arg1, arg2, th, tl;                  \
-                                                          \
-    if (!avail_LSX(ctx)) {                                \
-        return false;                                     \
-    }                                                     \
-                                                          \
-    rh = tcg_temp_new_i64();                              \
-    rl = tcg_temp_new_i64();                              \
-    arg1 = tcg_temp_new_i64();                            \
-    arg2 = tcg_temp_new_i64();                            \
-    th = tcg_temp_new_i64();                              \
-    tl = tcg_temp_new_i64();                              \
-                                                          \
-    get_vreg64(arg1, a->vj, idx1);                        \
-    get_vreg64(arg2, a->vk, idx2);                        \
-    get_vreg64(rh, a->vd, 1);                             \
-    get_vreg64(rl, a->vd, 0);                             \
-                                                          \
-    tcg_gen_## FN ##_i64(tl, th, arg1, arg2);             \
-    tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);             \
-                                                          \
-    set_vreg64(rh, a->vd, 1);                             \
-    set_vreg64(rl, a->vd, 0);                             \
-                                                          \
-    return true;                                          \
+static bool gen_vmadd_q_vl(DisasContext * ctx,
+                           arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+                           void (*func)(TCGv_i64, TCGv_i64,
+                                        TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 rh, rl, arg1, arg2, th, tl;
+    int i;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    rh = tcg_temp_new_i64();
+    rl = tcg_temp_new_i64();
+    arg1 = tcg_temp_new_i64();
+    arg2 = tcg_temp_new_i64();
+    th = tcg_temp_new_i64();
+    tl = tcg_temp_new_i64();
+
+    for (i = 0; i < oprsz / 16; i++) {
+        get_vreg64(arg1, a->vj, 2 * i + idx1);
+        get_vreg64(arg2, a->vk, 2 * i + idx2);
+        get_vreg64(rh, a->vd, 2 * i + 1);
+        get_vreg64(rl, a->vd, 2 * i);
+
+        func(tl, th, arg1, arg2);
+        tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
+
+        set_vreg64(rh, a->vd, 2 * i + 1);
+        set_vreg64(rl, a->vd, 2 * i);
+    }
+
+    return true;
 }
 
-VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
-VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
-VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
-VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
-VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
-VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
+static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+                        void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+                         void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
+}
+
+TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
 
 static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2450,6 +2927,9 @@ static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
 TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
 TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
+TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
+TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
+TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
 
 static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2520,6 +3000,9 @@ static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
 TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
 TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
+TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
+TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
+TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
 
 static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2591,6 +3074,9 @@ static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
 TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
 TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
+TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
+TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
+TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
 
 static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2664,6 +3150,9 @@ static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
 TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
 TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
+TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
+TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
+TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
 
 static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2736,6 +3225,9 @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
 TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
 TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
+TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
+TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
+TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
 
 TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
 TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
@@ -2753,6 +3245,22 @@ TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
 TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
 TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
 TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
+TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
+TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
+TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
+TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
+TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
+TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
+TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
+TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
+TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
+TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
+TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
+TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
+TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
+TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
+TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
+TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
 
 static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
 {
@@ -2805,6 +3313,10 @@ TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
 TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
 TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
 TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
+TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
+TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
+TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
+TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
 
 static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
 {
@@ -2854,6 +3366,10 @@ TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
 TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
 TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
 TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
+TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
+TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
+TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
+TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
 
 TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
 TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
@@ -2863,6 +3379,27 @@ TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
 TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
 TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
 TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
+TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
+TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
+TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
+TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
+TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
+TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
+TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
+TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
+
+TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
+TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
+TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
+TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
+TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
+TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
+TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
+TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
+TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
+TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
+TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
+TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
 
 static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 {
@@ -2916,6 +3453,10 @@ TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
 TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
 TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
 TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
+TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
+TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
+TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
+TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
 
 TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
 TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
@@ -2923,6 +3464,12 @@ TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
 TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
 TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
 TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
+TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
+TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
+TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
+TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
+TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
+TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
 
 #define EXPAND_BYTE(bit)  ((uint64_t)(bit ? 0xff : 0))
 
@@ -3040,17 +3587,15 @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
     return data;
 }
 
-static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
+static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
 {
     int sel, vece;
     uint64_t value;
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
     sel = (a->imm >> 12) & 0x1;
 
     if (sel) {
@@ -3061,37 +3606,29 @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
         vece = (a->imm >> 10) & 0x3;
     }
 
-    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
+    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
                          tcg_constant_i64(value));
     return true;
 }
 
-TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
-TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
-TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
-TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
+TRANS(vldi, LSX, gen_vldi, 16)
+TRANS(xvldi, LASX, gen_vldi, 32)
 
-static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
+static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
 {
     uint32_t vd_ofs, vj_ofs, vk_ofs;
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
     vd_ofs = vec_full_offset(a->vd);
     vj_ofs = vec_full_offset(a->vj);
     vk_ofs = vec_full_offset(a->vk);
 
-    tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
+    tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
     return true;
 }
-TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
-TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
-TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
-TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
 
 static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
 {
@@ -3124,7 +3661,26 @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
     tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
 }
 
+TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
+TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
+TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
+TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
+TRANS(vandn_v, LSX, gen_vandn_v, 16)
+TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
+TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
+TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
+TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
 TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
+TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
+TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
+TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
+TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
+TRANS(xvandn_v, LASX, gen_vandn_v, 32)
+TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
+TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
+TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
+TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
+TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
 
 TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
 TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
@@ -3134,6 +3690,14 @@ TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
 TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
 TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
 TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
+TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
+TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
+TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
+TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
+TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
+TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
+TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
+TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
 
 TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
 TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
@@ -3143,6 +3707,14 @@ TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
 TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
 TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
 TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
+TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
+TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
+TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
+TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
+TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
+TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
+TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
+TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
 
 TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
 TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
@@ -3152,6 +3724,14 @@ TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
 TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
 TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
 TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
+TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
+TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
+TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
+TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
+TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
+TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
+TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
+TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
 
 TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
 TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
@@ -3161,6 +3741,14 @@ TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
 TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
 TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
 TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
+TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
+TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
+TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
+TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
+TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
 
 TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
 TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
@@ -3170,6 +3758,14 @@ TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
 TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
 TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
 TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
+TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
+TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
+TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
+TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
+TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
+TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
+TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
+TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
 
 TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
 TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
@@ -3179,6 +3775,14 @@ TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
 TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
 TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
 TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
+TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
+TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
+TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
+TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
+TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
+TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
+TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
+TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
 
 TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
 TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
@@ -3188,6 +3792,14 @@ TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
 TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
 TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
 TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
+TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
+TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
+TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
+TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
+TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
+TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
+TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
+TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
 
 TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
 TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
@@ -3195,6 +3807,12 @@ TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
 TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
 TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
 TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
+TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
+TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
+TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
+TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
+TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
+TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
 
 TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
 TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
@@ -3204,6 +3822,14 @@ TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
 TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
 TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
 TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
+TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
+TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
+TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
+TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
+TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
+TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
+TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
+TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
 
 TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
 TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
@@ -3211,6 +3837,12 @@ TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
 TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
 TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
 TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
+TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
+TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
+TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
+TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
+TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
+TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
 
 TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
 TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
@@ -3220,6 +3852,14 @@ TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
 TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
 TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
 TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
+TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
+TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
+TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
+TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
+TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
+TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
+TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
+TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
 
 TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
 TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
@@ -3233,6 +3873,18 @@ TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
 TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
 TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
 TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
+TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
+TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
+TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
+TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
+TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
+TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
+TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
+TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
+TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
+TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
+TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
+TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
 
 TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
 TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
@@ -3250,6 +3902,22 @@ TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
 TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
 TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
 TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
+TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
+TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
+TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
+TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
+TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
+TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
+TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
+TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
+TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
+TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
+TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
+TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
+TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
+TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
+TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
+TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
 
 TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
 TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
@@ -3263,6 +3931,18 @@ TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
 TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
 TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
 TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
+TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
+TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
+TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
+TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
+TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
+TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
+TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
+TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
+TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
+TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
+TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
+TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
 
 TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
 TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
@@ -3280,6 +3960,22 @@ TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
 TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
 TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
 TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
+TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
+TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
+TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
+TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
+TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
+TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
+TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
+TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
+TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
+TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
+TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
+TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
+TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
+TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
+TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
+TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
 
 TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
 TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
@@ -3289,11 +3985,23 @@ TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
 TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
 TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
 TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
+TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
+TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
+TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
+TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
+TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
+TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
+TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
+TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
 
 TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
 TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
 TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
 TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
+TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
+TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
+TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
+TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
 
 static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
                     void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
@@ -3365,6 +4073,10 @@ TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
 TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
 TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
 TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
+TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
+TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
+TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
+TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
 
 static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
                      void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
@@ -3435,6 +4147,10 @@ TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
 TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
 TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
 TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
+TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
+TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
+TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
+TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
 
 static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -3476,6 +4192,10 @@ TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
 TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
 TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
 TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
+TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
+TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
+TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
+TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
 
 static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                         int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -3517,6 +4237,10 @@ TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
 TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
 TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
 TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
+TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
+TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
+TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
+TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
 
 static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                        uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -3558,6 +4282,10 @@ TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
 TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
 TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
 TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
+TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
+TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
+TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
+TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
 
 static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
                         int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -3599,356 +4327,409 @@ TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
 TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
 TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
 TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
+TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
+TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
+TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
+TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
 
 TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
 TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
 TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
 TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
+TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
+TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
+TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
+TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
 
-TRANS(vfadd_s, LSX, gen_vvv, gen_helper_vfadd_s)
-TRANS(vfadd_d, LSX, gen_vvv, gen_helper_vfadd_d)
-TRANS(vfsub_s, LSX, gen_vvv, gen_helper_vfsub_s)
-TRANS(vfsub_d, LSX, gen_vvv, gen_helper_vfsub_d)
-TRANS(vfmul_s, LSX, gen_vvv, gen_helper_vfmul_s)
-TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d)
-TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s)
-TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d)
+TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
+TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
+TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
+TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
+TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
+TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
+TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
+TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
+TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
+TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
+TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
+TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
+TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
+TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
+TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
+TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
 
-TRANS(vfmadd_s, LSX, gen_vvvv, gen_helper_vfmadd_s)
-TRANS(vfmadd_d, LSX, gen_vvvv, gen_helper_vfmadd_d)
-TRANS(vfmsub_s, LSX, gen_vvvv, gen_helper_vfmsub_s)
-TRANS(vfmsub_d, LSX, gen_vvvv, gen_helper_vfmsub_d)
-TRANS(vfnmadd_s, LSX, gen_vvvv, gen_helper_vfnmadd_s)
-TRANS(vfnmadd_d, LSX, gen_vvvv, gen_helper_vfnmadd_d)
-TRANS(vfnmsub_s, LSX, gen_vvvv, gen_helper_vfnmsub_s)
-TRANS(vfnmsub_d, LSX, gen_vvvv, gen_helper_vfnmsub_d)
+TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
+TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
+TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
+TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
+TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
+TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
+TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
+TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
+TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
+TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
+TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
+TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
+TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
+TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
+TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
+TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
 
-TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s)
-TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d)
-TRANS(vfmin_s, LSX, gen_vvv, gen_helper_vfmin_s)
-TRANS(vfmin_d, LSX, gen_vvv, gen_helper_vfmin_d)
+TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
+TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
+TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
+TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
+TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
+TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
+TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
+TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
 
-TRANS(vfmaxa_s, LSX, gen_vvv, gen_helper_vfmaxa_s)
-TRANS(vfmaxa_d, LSX, gen_vvv, gen_helper_vfmaxa_d)
-TRANS(vfmina_s, LSX, gen_vvv, gen_helper_vfmina_s)
-TRANS(vfmina_d, LSX, gen_vvv, gen_helper_vfmina_d)
+TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
+TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
+TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
+TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
+TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
+TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
+TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
+TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
 
-TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s)
-TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d)
+TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
+TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
+TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
+TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
 
-TRANS(vfclass_s, LSX, gen_vv, gen_helper_vfclass_s)
-TRANS(vfclass_d, LSX, gen_vv, gen_helper_vfclass_d)
+TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
+TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
+TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
+TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
 
-TRANS(vfsqrt_s, LSX, gen_vv, gen_helper_vfsqrt_s)
-TRANS(vfsqrt_d, LSX, gen_vv, gen_helper_vfsqrt_d)
-TRANS(vfrecip_s, LSX, gen_vv, gen_helper_vfrecip_s)
-TRANS(vfrecip_d, LSX, gen_vv, gen_helper_vfrecip_d)
-TRANS(vfrsqrt_s, LSX, gen_vv, gen_helper_vfrsqrt_s)
-TRANS(vfrsqrt_d, LSX, gen_vv, gen_helper_vfrsqrt_d)
+TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
+TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
+TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
+TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
+TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
+TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
+TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
+TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
+TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
+TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
+TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
+TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
 
-TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h)
-TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h)
-TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s)
-TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s)
-TRANS(vfcvt_h_s, LSX, gen_vvv, gen_helper_vfcvt_h_s)
-TRANS(vfcvt_s_d, LSX, gen_vvv, gen_helper_vfcvt_s_d)
+TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
+TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
+TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
+TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
+TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
+TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
+TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
+TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
+TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
+TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
+TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
+TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
 
-TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s)
-TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d)
-TRANS(vfrintrz_s, LSX, gen_vv, gen_helper_vfrintrz_s)
-TRANS(vfrintrz_d, LSX, gen_vv, gen_helper_vfrintrz_d)
-TRANS(vfrintrp_s, LSX, gen_vv, gen_helper_vfrintrp_s)
-TRANS(vfrintrp_d, LSX, gen_vv, gen_helper_vfrintrp_d)
-TRANS(vfrintrm_s, LSX, gen_vv, gen_helper_vfrintrm_s)
-TRANS(vfrintrm_d, LSX, gen_vv, gen_helper_vfrintrm_d)
-TRANS(vfrint_s, LSX, gen_vv, gen_helper_vfrint_s)
-TRANS(vfrint_d, LSX, gen_vv, gen_helper_vfrint_d)
+TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
+TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
+TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
+TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
+TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
+TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
+TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
+TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
+TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
+TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
+TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
+TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
+TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
+TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
+TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
+TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
+TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
+TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
+TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
+TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
 
-TRANS(vftintrne_w_s, LSX, gen_vv, gen_helper_vftintrne_w_s)
-TRANS(vftintrne_l_d, LSX, gen_vv, gen_helper_vftintrne_l_d)
-TRANS(vftintrz_w_s, LSX, gen_vv, gen_helper_vftintrz_w_s)
-TRANS(vftintrz_l_d, LSX, gen_vv, gen_helper_vftintrz_l_d)
-TRANS(vftintrp_w_s, LSX, gen_vv, gen_helper_vftintrp_w_s)
-TRANS(vftintrp_l_d, LSX, gen_vv, gen_helper_vftintrp_l_d)
-TRANS(vftintrm_w_s, LSX, gen_vv, gen_helper_vftintrm_w_s)
-TRANS(vftintrm_l_d, LSX, gen_vv, gen_helper_vftintrm_l_d)
-TRANS(vftint_w_s, LSX, gen_vv, gen_helper_vftint_w_s)
-TRANS(vftint_l_d, LSX, gen_vv, gen_helper_vftint_l_d)
-TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s)
-TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d)
-TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s)
-TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d)
-TRANS(vftintrne_w_d, LSX, gen_vvv, gen_helper_vftintrne_w_d)
-TRANS(vftintrz_w_d, LSX, gen_vvv, gen_helper_vftintrz_w_d)
-TRANS(vftintrp_w_d, LSX, gen_vvv, gen_helper_vftintrp_w_d)
-TRANS(vftintrm_w_d, LSX, gen_vvv, gen_helper_vftintrm_w_d)
-TRANS(vftint_w_d, LSX, gen_vvv, gen_helper_vftint_w_d)
-TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s)
-TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s)
-TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s)
-TRANS(vftintrzh_l_s, LSX, gen_vv, gen_helper_vftintrzh_l_s)
-TRANS(vftintrpl_l_s, LSX, gen_vv, gen_helper_vftintrpl_l_s)
-TRANS(vftintrph_l_s, LSX, gen_vv, gen_helper_vftintrph_l_s)
-TRANS(vftintrml_l_s, LSX, gen_vv, gen_helper_vftintrml_l_s)
-TRANS(vftintrmh_l_s, LSX, gen_vv, gen_helper_vftintrmh_l_s)
-TRANS(vftintl_l_s, LSX, gen_vv, gen_helper_vftintl_l_s)
-TRANS(vftinth_l_s, LSX, gen_vv, gen_helper_vftinth_l_s)
+TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
+TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
+TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
+TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
+TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
+TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
+TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
+TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
+TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
+TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
+TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
+TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
+TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
+TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
+TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
+TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
+TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
+TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
+TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
+TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
+TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
+TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
+TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
+TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
+TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
+TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
+TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
+TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
+TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
+TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
+TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
+TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
+TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
+TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
+TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
+TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
+TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
+TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
+TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
+TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
+TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
+TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
+TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
+TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
+TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
+TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
+TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
+TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
+TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
+TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
+TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
+TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
+TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
+TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
+TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
+TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
+TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
+TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
 
-TRANS(vffint_s_w, LSX, gen_vv, gen_helper_vffint_s_w)
-TRANS(vffint_d_l, LSX, gen_vv, gen_helper_vffint_d_l)
-TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu)
-TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu)
-TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w)
-TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w)
-TRANS(vffint_s_l, LSX, gen_vvv, gen_helper_vffint_s_l)
+TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
+TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
+TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
+TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
+TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
+TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
+TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
+TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
+TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
+TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
+TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
+TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
+TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
+TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
 
-static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
+static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
+                      uint32_t oprsz, MemOp mop, TCGCond cond)
 {
     uint32_t vd_ofs, vj_ofs, vk_ofs;
 
-    CHECK_SXE;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
 
     vd_ofs = vec_full_offset(a->vd);
     vj_ofs = vec_full_offset(a->vj);
     vk_ofs = vec_full_offset(a->vk);
 
-    tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+    tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
     return true;
 }
 
-static void do_cmpi_vec(TCGCond cond,
-                        unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+static bool do_cmp(DisasContext *ctx, arg_vvv *a,
+                   MemOp mop, TCGCond cond)
 {
-    tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+    return do_cmp_vl(ctx, a, 16, mop, cond);
 }
 
-static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
+                    MemOp mop, TCGCond cond)
 {
-    do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm);
+    return do_cmp_vl(ctx, a, 32, mop, cond);
 }
 
-static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
+                       uint32_t oprsz, MemOp mop, TCGCond cond)
 {
-    do_cmpi_vec(TCG_COND_LE, vece, t, a, imm);
+    uint32_t vd_ofs, vj_ofs;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    vd_ofs = vec_full_offset(a->vd);
+    vj_ofs = vec_full_offset(a->vj);
+
+    tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
+    return true;
 }
 
-static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
+                    MemOp mop, TCGCond cond)
 {
-    do_cmpi_vec(TCG_COND_LT, vece, t, a, imm);
+    return do_cmpi_vl(ctx, a, 16, mop, cond);
 }
 
-static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
+                     MemOp mop, TCGCond cond)
 {
-    do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm);
+    return do_cmpi_vl(ctx, a, 32, mop, cond);
 }
 
-static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
-    do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
-}
-
-#define DO_CMPI_S(NAME)                                                \
-static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
-{                                                                      \
-    uint32_t vd_ofs, vj_ofs;                                           \
-                                                                       \
-    CHECK_SXE;                                                         \
-                                                                       \
-    static const TCGOpcode vecop_list[] = {                            \
-        INDEX_op_cmp_vec, 0                                            \
-    };                                                                 \
-    static const GVecGen2i op[4] = {                                   \
-        {                                                              \
-            .fniv = gen_## NAME ##_s_vec,                              \
-            .fnoi = gen_helper_## NAME ##_b,                           \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_8                                               \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_s_vec,                              \
-            .fnoi = gen_helper_## NAME ##_h,                           \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_16                                              \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_s_vec,                              \
-            .fnoi = gen_helper_## NAME ##_w,                           \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_32                                              \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_s_vec,                              \
-            .fnoi = gen_helper_## NAME ##_d,                           \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_64                                              \
-        }                                                              \
-    };                                                                 \
-                                                                       \
-    vd_ofs = vec_full_offset(a->vd);                                   \
-    vj_ofs = vec_full_offset(a->vj);                                   \
-                                                                       \
-    tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]);  \
-                                                                       \
-    return true;                                                       \
-}
-
-DO_CMPI_S(vseqi)
-DO_CMPI_S(vslei)
-DO_CMPI_S(vslti)
-
-#define DO_CMPI_U(NAME)                                                \
-static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
-{                                                                      \
-    uint32_t vd_ofs, vj_ofs;                                           \
-                                                                       \
-    CHECK_SXE;                                                         \
-                                                                       \
-    static const TCGOpcode vecop_list[] = {                            \
-        INDEX_op_cmp_vec, 0                                            \
-    };                                                                 \
-    static const GVecGen2i op[4] = {                                   \
-        {                                                              \
-            .fniv = gen_## NAME ##_u_vec,                              \
-            .fnoi = gen_helper_## NAME ##_bu,                          \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_8                                               \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_u_vec,                              \
-            .fnoi = gen_helper_## NAME ##_hu,                          \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_16                                              \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_u_vec,                              \
-            .fnoi = gen_helper_## NAME ##_wu,                          \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_32                                              \
-        },                                                             \
-        {                                                              \
-            .fniv = gen_## NAME ##_u_vec,                              \
-            .fnoi = gen_helper_## NAME ##_du,                          \
-            .opt_opc = vecop_list,                                     \
-            .vece = MO_64                                              \
-        }                                                              \
-    };                                                                 \
-                                                                       \
-    vd_ofs = vec_full_offset(a->vd);                                   \
-    vj_ofs = vec_full_offset(a->vj);                                   \
-                                                                       \
-    tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]);  \
-                                                                       \
-    return true;                                                       \
-}
-
-DO_CMPI_U(vslei)
-DO_CMPI_U(vslti)
-
 TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
 TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
 TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
 TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
-TRANS(vseqi_b, LSX, do_vseqi_s, MO_8)
-TRANS(vseqi_h, LSX, do_vseqi_s, MO_16)
-TRANS(vseqi_w, LSX, do_vseqi_s, MO_32)
-TRANS(vseqi_d, LSX, do_vseqi_s, MO_64)
+TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
+TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
+TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
+TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
+TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
+TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
+TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
+TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
+TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
+TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
+TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
+TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
 
 TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
 TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
 TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
 TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
-TRANS(vslei_b, LSX, do_vslei_s, MO_8)
-TRANS(vslei_h, LSX, do_vslei_s, MO_16)
-TRANS(vslei_w, LSX, do_vslei_s, MO_32)
-TRANS(vslei_d, LSX, do_vslei_s, MO_64)
+TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
+TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
+TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
+TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
 TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
 TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
 TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
 TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
-TRANS(vslei_bu, LSX, do_vslei_u, MO_8)
-TRANS(vslei_hu, LSX, do_vslei_u, MO_16)
-TRANS(vslei_wu, LSX, do_vslei_u, MO_32)
-TRANS(vslei_du, LSX, do_vslei_u, MO_64)
+TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
+TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
+TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
+TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
+TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
+TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
+TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
+TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
+TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
+TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
+TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
+TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
+TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
+TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
+TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
+TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
+TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
+TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
+TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
+TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
 
 TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
 TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
 TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
 TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
-TRANS(vslti_b, LSX, do_vslti_s, MO_8)
-TRANS(vslti_h, LSX, do_vslti_s, MO_16)
-TRANS(vslti_w, LSX, do_vslti_s, MO_32)
-TRANS(vslti_d, LSX, do_vslti_s, MO_64)
+TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
+TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
+TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
+TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
 TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
 TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
 TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
 TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
-TRANS(vslti_bu, LSX, do_vslti_u, MO_8)
-TRANS(vslti_hu, LSX, do_vslti_u, MO_16)
-TRANS(vslti_wu, LSX, do_vslti_u, MO_32)
-TRANS(vslti_du, LSX, do_vslti_u, MO_64)
+TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
+TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
+TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
+TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
+TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
+TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
+TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
+TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
+TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
+TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
+TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
+TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
+TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
+TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
+TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
+TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
+TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
+TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
+TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
+TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
 
-static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
+static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
 {
     uint32_t flags;
-    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
     TCGv_i32 vd = tcg_constant_i32(a->vd);
     TCGv_i32 vj = tcg_constant_i32(a->vj);
     TCGv_i32 vk = tcg_constant_i32(a->vk);
+    TCGv_i32 oprsz = tcg_constant_i32(sz);
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, sz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
     fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
     flags = get_fcmp_flags(a->fcond >> 1);
-    fn(cpu_env, vd, vj, vk,  tcg_constant_i32(flags));
+    fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
 
     return true;
 }
 
-static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
+static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
 {
     uint32_t flags;
-    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+    void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
     TCGv_i32 vd = tcg_constant_i32(a->vd);
     TCGv_i32 vj = tcg_constant_i32(a->vj);
     TCGv_i32 vk = tcg_constant_i32(a->vk);
+    TCGv_i32 oprsz = tcg_constant_i32(sz);
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, sz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
     fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
     flags = get_fcmp_flags(a->fcond >> 1);
-    fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
+    fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
 
     return true;
 }
 
-static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
-{
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
+TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
+TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
+TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
+TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
 
-    CHECK_SXE;
+static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
+{
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
 
     tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
                         vec_full_offset(a->vk), vec_full_offset(a->vj),
-                        16, ctx->vl/8);
+                        oprsz, ctx->vl / 8);
     return true;
 }
 
+TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
+TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
+
 static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
 {
     tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
 }
 
-static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
+static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
 {
     static const GVecGen2i op = {
        .fniv = gen_vbitseli,
@@ -3957,17 +4738,18 @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
        .load_dest = true
     };
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
     tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
-                    16, ctx->vl/8, a->imm, &op);
+                    oprsz, ctx->vl / 8, a->imm , &op);
     return true;
 }
 
+TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
+TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
+
 #define VSET(NAME, COND)                                                       \
 static bool trans_## NAME (DisasContext *ctx, arg_cv *a)                       \
 {                                                                              \
@@ -3984,7 +4766,10 @@ static bool trans_## NAME (DisasContext *ctx, arg_cv *a)                       \
         return false;                                                          \
     }                                                                          \
                                                                                \
-    CHECK_SXE;                                                                 \
+    if (!check_vec(ctx, 16)) {                                                 \
+        return true;                                                           \
+    }                                                                          \
+                                                                               \
     tcg_gen_or_i64(t1, al, ah);                                                \
     tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
     tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
@@ -4004,193 +4789,156 @@ TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
 TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
 TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
 
-static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
+#define XVSET(NAME, COND)                                                      \
+static bool trans_## NAME(DisasContext *ctx, arg_cv * a)                       \
+{                                                                              \
+    TCGv_i64 t1, t2, d[4];                                                     \
+                                                                               \
+    d[0] = tcg_temp_new_i64();                                                 \
+    d[1] = tcg_temp_new_i64();                                                 \
+    d[2] = tcg_temp_new_i64();                                                 \
+    d[3] = tcg_temp_new_i64();                                                 \
+    t1 = tcg_temp_new_i64();                                                   \
+    t2 = tcg_temp_new_i64();                                                   \
+                                                                               \
+    get_vreg64(d[0], a->vj, 0);                                                \
+    get_vreg64(d[1], a->vj, 1);                                                \
+    get_vreg64(d[2], a->vj, 2);                                                \
+    get_vreg64(d[3], a->vj, 3);                                                \
+                                                                               \
+    if (!avail_LASX(ctx)) {                                                    \
+        return false;                                                          \
+    }                                                                          \
+                                                                               \
+    if (!check_vec(ctx, 32)) {                                                 \
+        return true;                                                           \
+    }                                                                          \
+                                                                               \
+    tcg_gen_or_i64(t1, d[0], d[1]);                                            \
+    tcg_gen_or_i64(t2, d[2], d[3]);                                            \
+    tcg_gen_or_i64(t1, t2, t1);                                                \
+    tcg_gen_setcondi_i64(COND, t1, t1, 0);                                     \
+    tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
+                                                                               \
+    return true;                                                               \
+}
+
+XVSET(xvseteqz_v, TCG_COND_EQ)
+XVSET(xvsetnez_v, TCG_COND_NE)
+
+TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
+TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
+TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
+TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
+TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
+TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
+TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
+TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
+
+static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
+                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
 {
     TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-    tcg_gen_st8_i64(src, cpu_env,
-                    offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
+    func(src, cpu_env, vec_reg_offset(a->vd, a->imm, mop));
+
     return true;
 }
 
-static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
+static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
+                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    return gen_g2v_vl(ctx, a, 16, mop, func);
+}
+
+static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
+                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    return gen_g2v_vl(ctx, a, 32, mop, func);
+}
+
+TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
+TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
+TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
+TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
+TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
+TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
+
+static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
+                       void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    func(dst, cpu_env, vec_reg_offset(a->vj, a->imm, mop));
+
+    return true;
+}
+
+static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
+                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    return gen_v2g_vl(ctx, a, 16, mop, func);
+}
+
+static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
+                    void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    return gen_v2g_vl(ctx, a, 32, mop, func);
+}
+
+TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
+TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
+TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
+TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
+TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
+TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
+TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
+TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
+TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
+TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
+TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
+TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
+
+static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
+                        uint32_t oprsz, MemOp mop)
 {
     TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-    tcg_gen_st16_i64(src, cpu_env,
-                    offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
-    return true;
-}
-
-static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
-{
-    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_st32_i64(src, cpu_env,
-                     offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
-    return true;
-}
-
-static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
-{
-    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_st_i64(src, cpu_env,
-                   offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld8s_i64(dst, cpu_env,
-                     offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld16s_i64(dst, cpu_env,
-                      offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld32s_i64(dst, cpu_env,
-                      offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld_i64(dst, cpu_env,
-                   offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld8u_i64(dst, cpu_env,
-                     offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld16u_i64(dst, cpu_env,
-                      offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld32u_i64(dst, cpu_env,
-                      offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
-    return true;
-}
-
-static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
-{
-    TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-    tcg_gen_ld_i64(dst, cpu_env,
-                   offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
+    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
+                         oprsz, ctx->vl/8, src);
     return true;
 }
 
 static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
 {
-    TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
+    return gvec_dup_vl(ctx, a, 16, mop);
+}
 
-    if (!avail_LSX(ctx)) {
-        return false;
-    }
-
-    CHECK_SXE;
-
-    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
-                         16, ctx->vl/8, src);
-    return true;
+static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
+{
+    return gvec_dup_vl(ctx, a, 32, mop);
 }
 
 TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
 TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
 TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
 TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
+TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
+TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
+TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
+TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
 
 static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
 {
@@ -4198,7 +4946,10 @@ static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
+
     tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
                          offsetof(CPULoongArchState,
                                   fpr[a->vj].vreg.B((a->imm))),
@@ -4212,7 +4963,10 @@ static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
+
     tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
                          offsetof(CPULoongArchState,
                                   fpr[a->vj].vreg.H((a->imm))),
@@ -4225,7 +4979,10 @@ static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
+
     tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
                          offsetof(CPULoongArchState,
                                   fpr[a->vj].vreg.W((a->imm))),
@@ -4238,7 +4995,10 @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
+
     tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
                          offsetof(CPULoongArchState,
                                   fpr[a->vj].vreg.D((a->imm))),
@@ -4246,106 +5006,169 @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
     return true;
 }
 
-static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
-                        void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
+                           uint32_t oprsz, int vece, int bit,
+                           void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
 {
+    int i;
     TCGv_i64 t0 = tcg_temp_new_i64();
     TCGv_ptr t1 = tcg_temp_new_ptr();
     TCGv_i64 t2 = tcg_temp_new_i64();
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, oprsz)) {
+        return true;
     }
 
-    CHECK_SXE;
-
-    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
+    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
     tcg_gen_shli_i64(t0, t0, vece);
     if (HOST_BIG_ENDIAN) {
-        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
+        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
     }
 
     tcg_gen_trunc_i64_ptr(t1, t0);
     tcg_gen_add_ptr(t1, t1, cpu_env);
-    func(t2, t1, vec_full_offset(a->vj));
-    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
+
+    for (i = 0; i < oprsz; i += 16) {
+        func(t2, t1, vec_full_offset(a->vj) + i);
+        tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
+    }
 
     return true;
 }
 
+static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+                        void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+    return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
+}
+
+static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+                         void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+    return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
+}
+
 TRANS(vreplve_b, LSX, gen_vreplve, MO_8,  8, tcg_gen_ld8u_i64)
 TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
 TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
 TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
+TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8,  8, tcg_gen_ld8u_i64)
+TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
+TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
+TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
 
-static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
+static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
 {
-    int ofs;
-    TCGv_i64 desthigh, destlow, high, low;
+    int i;
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, 32)) {
+        return true;
     }
 
-    CHECK_SXE;
+    for (i = 0; i < 32; i += 16) {
+        tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
+                             vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
 
-    desthigh = tcg_temp_new_i64();
-    destlow = tcg_temp_new_i64();
-    high = tcg_temp_new_i64();
-    low = tcg_temp_new_i64();
+    }
+    return true;
+}
 
-    get_vreg64(low, a->vj, 0);
+TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
+TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
+TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
+TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
 
-    ofs = ((a->imm) & 0xf) * 8;
-    if (ofs < 64) {
-        get_vreg64(high, a->vj, 1);
-        tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
-        tcg_gen_shli_i64(destlow, low, ofs);
-    } else {
-        tcg_gen_shli_i64(desthigh, low, ofs - 64);
-        destlow = tcg_constant_i64(0);
+static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
+{
+    if (!check_vec(ctx, 32)) {
+        return true;
     }
 
-    set_vreg64(desthigh, a->vd, 1);
-    set_vreg64(destlow, a->vd, 0);
+    tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
+                         vec_full_offset(a->vj), 32, 32);
+    return true;
+}
+
+TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
+TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
+TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
+TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
+TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
+
+TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
+TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
+
+TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
+TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
+
+static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+    int i, ofs;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    for (i = 0; i < oprsz / 16; i++) {
+        TCGv desthigh = tcg_temp_new_i64();
+        TCGv destlow = tcg_temp_new_i64();
+        TCGv high = tcg_temp_new_i64();
+        TCGv low = tcg_temp_new_i64();
+
+        get_vreg64(low, a->vj, 2 * i);
+
+        ofs = ((a->imm) & 0xf) * 8;
+        if (ofs < 64) {
+            get_vreg64(high, a->vj, 2 * i + 1);
+            tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
+            tcg_gen_shli_i64(destlow, low, ofs);
+        } else {
+            tcg_gen_shli_i64(desthigh, low, ofs - 64);
+            destlow = tcg_constant_i64(0);
+        }
+        set_vreg64(desthigh, a->vd, 2 * i + 1);
+        set_vreg64(destlow, a->vd, 2 * i);
+    }
 
     return true;
 }
 
-static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
 {
-    TCGv_i64 desthigh, destlow, high, low;
-    int ofs;
+    int i, ofs;
 
-    if (!avail_LSX(ctx)) {
-        return false;
+    if (!check_vec(ctx, 32)) {
+        return true;
     }
 
-    CHECK_SXE;
+    for (i = 0; i < oprsz / 16; i++) {
+        TCGv desthigh = tcg_temp_new_i64();
+        TCGv destlow = tcg_temp_new_i64();
+        TCGv high = tcg_temp_new_i64();
+        TCGv low = tcg_temp_new_i64();
+        get_vreg64(high, a->vj, 2 * i + 1);
 
-    desthigh = tcg_temp_new_i64();
-    destlow = tcg_temp_new_i64();
-    high = tcg_temp_new_i64();
-    low = tcg_temp_new_i64();
-
-    get_vreg64(high, a->vj, 1);
-
-    ofs = ((a->imm) & 0xf) * 8;
-    if (ofs < 64) {
-        get_vreg64(low, a->vj, 0);
-        tcg_gen_extract2_i64(destlow, low, high, ofs);
-        tcg_gen_shri_i64(desthigh, high, ofs);
-    } else {
-        tcg_gen_shri_i64(destlow, high, ofs - 64);
-        desthigh = tcg_constant_i64(0);
+        ofs = ((a->imm) & 0xf) * 8;
+        if (ofs < 64) {
+            get_vreg64(low, a->vj, 2 * i);
+            tcg_gen_extract2_i64(destlow, low, high, ofs);
+            tcg_gen_shri_i64(desthigh, high, ofs);
+        } else {
+            tcg_gen_shri_i64(destlow, high, ofs - 64);
+            desthigh = tcg_constant_i64(0);
+        }
+        set_vreg64(desthigh, a->vd, 2 * i + 1);
+        set_vreg64(destlow, a->vd, 2 * i);
     }
 
-    set_vreg64(desthigh, a->vd, 1);
-    set_vreg64(destlow, a->vd, 0);
-
     return true;
 }
 
+TRANS(vbsll_v, LSX, do_vbsll_v, 16)
+TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
+TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
+TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
+
 TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
 TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
 TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
@@ -4354,6 +5177,14 @@ TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
 TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
 TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
 TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
+TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
+TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
+TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
+TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
+TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
+TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
+TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
+TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
 
 TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
 TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
@@ -4363,6 +5194,14 @@ TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
 TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
 TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
 TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
+TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
+TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
+TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
+TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
+TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
+TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
+TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
+TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
 
 TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
 TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
@@ -4372,22 +5211,46 @@ TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
 TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
 TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
 TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
+TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
+TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
+TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
+TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
+TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
+TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
+TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
+TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
 
 TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
 TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
 TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
 TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
+TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
+TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
+TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
+TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
 TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
 TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
 TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
 TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
+TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
+TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
+TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
+TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
 
+TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
 TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
+TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
+TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
+TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
 
 TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
 TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
 TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
 TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
+TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
+TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
+TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
+TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
 
 static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
 {
@@ -4399,7 +5262,9 @@ static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
 
     addr = gpr_src(ctx, a->rj, EXT_NONE);
     val = tcg_temp_new_i128();
@@ -4426,7 +5291,9 @@ static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
 
     addr = gpr_src(ctx, a->rj, EXT_NONE);
     val = tcg_temp_new_i128();
@@ -4453,7 +5320,9 @@ static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
 
     src1 = gpr_src(ctx, a->rj, EXT_NONE);
     src2 = gpr_src(ctx, a->rk, EXT_NONE);
@@ -4480,7 +5349,9 @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
         return false;
     }
 
-    CHECK_SXE;
+    if (!check_vec(ctx, 16)) {
+        return true;
+    }
 
     src1 = gpr_src(ctx, a->rj, EXT_NONE);
     src2 = gpr_src(ctx, a->rk, EXT_NONE);
@@ -4497,59 +5368,155 @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
     return true;
 }
 
-#define VLDREPL(NAME, MO)                                                 \
-static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a)                \
-{                                                                         \
-    TCGv addr;                                                            \
-    TCGv_i64 val;                                                         \
-                                                                          \
-    if (!avail_LSX(ctx)) {                                                \
-        return false;                                                     \
-    }                                                                     \
-                                                                          \
-    CHECK_SXE;                                                            \
-                                                                          \
-    addr = gpr_src(ctx, a->rj, EXT_NONE);                                 \
-    val = tcg_temp_new_i64();                                             \
-                                                                          \
-    addr = make_address_i(ctx, addr, a->imm);                             \
-                                                                          \
-    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO);                     \
-    tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
-                                                                          \
-    return true;                                                          \
+static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
+                          uint32_t oprsz, MemOp mop)
+{
+    TCGv addr;
+    TCGv_i64 val;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    addr = gpr_src(ctx, a->rj, EXT_NONE);
+    val = tcg_temp_new_i64();
+
+    addr = make_address_i(ctx, addr, a->imm);
+
+    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
+    tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
+
+    return true;
 }
 
-VLDREPL(vldrepl_b, MO_8)
-VLDREPL(vldrepl_h, MO_16)
-VLDREPL(vldrepl_w, MO_32)
-VLDREPL(vldrepl_d, MO_64)
-
-#define VSTELM(NAME, MO, E)                                                  \
-static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a)                  \
-{                                                                            \
-    TCGv addr;                                                               \
-    TCGv_i64 val;                                                            \
-                                                                             \
-    if (!avail_LSX(ctx)) {                                                   \
-        return false;                                                        \
-    }                                                                        \
-                                                                             \
-    CHECK_SXE;                                                               \
-                                                                             \
-    addr = gpr_src(ctx, a->rj, EXT_NONE);                                    \
-    val = tcg_temp_new_i64();                                                \
-                                                                             \
-    addr = make_address_i(ctx, addr, a->imm);                                \
-                                                                             \
-    tcg_gen_ld_i64(val, cpu_env,                                             \
-                   offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
-    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO);                        \
-                                                                             \
-    return true;                                                             \
+static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+    return do_vldrepl_vl(ctx, a, 16, mop);
 }
 
-VSTELM(vstelm_b, MO_8, B)
-VSTELM(vstelm_h, MO_16, H)
-VSTELM(vstelm_w, MO_32, W)
-VSTELM(vstelm_d, MO_64, D)
+static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+    return do_vldrepl_vl(ctx, a, 32, mop);
+}
+
+TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
+TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
+TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
+TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
+TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
+TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
+TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
+TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
+
+static bool do_vstelm_vl(DisasContext *ctx,
+                         arg_vr_ii *a, uint32_t oprsz, MemOp mop)
+{
+    TCGv addr;
+    TCGv_i64 val;
+
+    if (!check_vec(ctx, oprsz)) {
+        return true;
+    }
+
+    addr = gpr_src(ctx, a->rj, EXT_NONE);
+    val = tcg_temp_new_i64();
+
+    addr = make_address_i(ctx, addr, a->imm);
+    tcg_gen_ld_i64(val, cpu_env, vec_reg_offset(a->vd, a->imm2, mop));
+    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
+    return true;
+}
+
+static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
+{
+    return do_vstelm_vl(ctx, a, 16, mop);
+}
+
+static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
+{
+    return do_vstelm_vl(ctx, a, 32, mop);
+}
+
+TRANS(vstelm_b, LSX, do_vstelm, MO_8)
+TRANS(vstelm_h, LSX, do_vstelm, MO_16)
+TRANS(vstelm_w, LSX, do_vstelm, MO_32)
+TRANS(vstelm_d, LSX, do_vstelm, MO_64)
+TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
+TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
+TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
+TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
+
+static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
+                            void (*func)(DisasContext *, int, TCGv))
+{
+    TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+    TCGv temp = NULL;
+
+    if (!check_vec(ctx, 32)) {
+        return true;
+    }
+
+    if (a->imm) {
+        temp = tcg_temp_new();
+        tcg_gen_addi_tl(temp, addr, a->imm);
+        addr = temp;
+    }
+
+    func(ctx, a->vd, addr);
+    return true;
+}
+
+static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
+{
+    int i;
+    TCGv temp = tcg_temp_new();
+    TCGv dest = tcg_temp_new();
+
+    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+    set_vreg64(dest, vreg, 0);
+
+    for (i = 1; i < 4; i++) {
+        tcg_gen_addi_tl(temp, addr, 8 * i);
+        tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+        set_vreg64(dest, vreg, i);
+    }
+}
+
+static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
+{
+    int i;
+    TCGv temp = tcg_temp_new();
+    TCGv dest = tcg_temp_new();
+
+    get_vreg64(dest, vreg, 0);
+    tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+
+    for (i = 1; i < 4; i++) {
+        tcg_gen_addi_tl(temp, addr, 8 * i);
+        get_vreg64(dest, vreg, i);
+        tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+    }
+}
+
+TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
+TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
+
+static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
+                             void (*func)(DisasContext*, int, TCGv))
+{
+    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+    TCGv addr = tcg_temp_new();
+
+    if (!check_vec(ctx, 32)) {
+        return true;
+    }
+
+    tcg_gen_add_tl(addr, src1, src2);
+    func(ctx, a->vd, addr);
+
+    return true;
+}
+
+TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
+TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index c9c3bc2c73..64b308f9fb 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -550,6 +550,10 @@ dbcl             0000 00000010 10101 ...............      @i15
 @vr_i8i2      .... ........ imm2:2 ........ rj:5 vd:5    &vr_ii imm=%i8s2
 @vr_i8i3       .... ....... imm2:3 ........ rj:5 vd:5    &vr_ii imm=%i8s1
 @vr_i8i4          .... ...... imm2:4 imm:s8 rj:5 vd:5    &vr_ii
+@vr_i8i2x     .... ........ imm2:2 ........ rj:5 vd:5    &vr_ii imm=%i8s3
+@vr_i8i3x      .... ....... imm2:3 ........ rj:5 vd:5    &vr_ii imm=%i8s2
+@vr_i8i4x       .... ...... imm2:4 ........ rj:5 vd:5    &vr_ii imm=%i8s1
+@vr_i8i5x          .... ..... imm2:5 imm:s8 rj:5 vd:5    &vr_ii
 @vrr               .... ........ ..... rk:5 rj:5 vd:5    &vrr
 @v_i13                   .... ........ .. imm:13 vd:5    &v_i
 
@@ -1296,3 +1300,781 @@ vstelm_d         0011 00010001 0 . ........ ..... .....   @vr_i8i1
 vstelm_w         0011 00010010 .. ........ ..... .....    @vr_i8i2
 vstelm_h         0011 0001010 ... ........ ..... .....    @vr_i8i3
 vstelm_b         0011 000110 .... ........ ..... .....    @vr_i8i4
+
+#
+# LoongArch LASX instructions
+#
+xvadd_b          0111 01000000 10100 ..... ..... .....    @vvv
+xvadd_h          0111 01000000 10101 ..... ..... .....    @vvv
+xvadd_w          0111 01000000 10110 ..... ..... .....    @vvv
+xvadd_d          0111 01000000 10111 ..... ..... .....    @vvv
+xvadd_q          0111 01010010 11010 ..... ..... .....    @vvv
+xvsub_b          0111 01000000 11000 ..... ..... .....    @vvv
+xvsub_h          0111 01000000 11001 ..... ..... .....    @vvv
+xvsub_w          0111 01000000 11010 ..... ..... .....    @vvv
+xvsub_d          0111 01000000 11011 ..... ..... .....    @vvv
+xvsub_q          0111 01010010 11011 ..... ..... .....    @vvv
+
+xvaddi_bu        0111 01101000 10100 ..... ..... .....    @vv_ui5
+xvaddi_hu        0111 01101000 10101 ..... ..... .....    @vv_ui5
+xvaddi_wu        0111 01101000 10110 ..... ..... .....    @vv_ui5
+xvaddi_du        0111 01101000 10111 ..... ..... .....    @vv_ui5
+xvsubi_bu        0111 01101000 11000 ..... ..... .....    @vv_ui5
+xvsubi_hu        0111 01101000 11001 ..... ..... .....    @vv_ui5
+xvsubi_wu        0111 01101000 11010 ..... ..... .....    @vv_ui5
+xvsubi_du        0111 01101000 11011 ..... ..... .....    @vv_ui5
+
+xvneg_b          0111 01101001 11000 01100 ..... .....    @vv
+xvneg_h          0111 01101001 11000 01101 ..... .....    @vv
+xvneg_w          0111 01101001 11000 01110 ..... .....    @vv
+xvneg_d          0111 01101001 11000 01111 ..... .....    @vv
+
+xvsadd_b         0111 01000100 01100 ..... ..... .....    @vvv
+xvsadd_h         0111 01000100 01101 ..... ..... .....    @vvv
+xvsadd_w         0111 01000100 01110 ..... ..... .....    @vvv
+xvsadd_d         0111 01000100 01111 ..... ..... .....    @vvv
+xvsadd_bu        0111 01000100 10100 ..... ..... .....    @vvv
+xvsadd_hu        0111 01000100 10101 ..... ..... .....    @vvv
+xvsadd_wu        0111 01000100 10110 ..... ..... .....    @vvv
+xvsadd_du        0111 01000100 10111 ..... ..... .....    @vvv
+
+xvssub_b         0111 01000100 10000 ..... ..... .....    @vvv
+xvssub_h         0111 01000100 10001 ..... ..... .....    @vvv
+xvssub_w         0111 01000100 10010 ..... ..... .....    @vvv
+xvssub_d         0111 01000100 10011 ..... ..... .....    @vvv
+xvssub_bu        0111 01000100 11000 ..... ..... .....    @vvv
+xvssub_hu        0111 01000100 11001 ..... ..... .....    @vvv
+xvssub_wu        0111 01000100 11010 ..... ..... .....    @vvv
+xvssub_du        0111 01000100 11011 ..... ..... .....    @vvv
+
+xvhaddw_h_b      0111 01000101 01000 ..... ..... .....    @vvv
+xvhaddw_w_h      0111 01000101 01001 ..... ..... .....    @vvv
+xvhaddw_d_w      0111 01000101 01010 ..... ..... .....    @vvv
+xvhaddw_q_d      0111 01000101 01011 ..... ..... .....    @vvv
+xvhaddw_hu_bu    0111 01000101 10000 ..... ..... .....    @vvv
+xvhaddw_wu_hu    0111 01000101 10001 ..... ..... .....    @vvv
+xvhaddw_du_wu    0111 01000101 10010 ..... ..... .....    @vvv
+xvhaddw_qu_du    0111 01000101 10011 ..... ..... .....    @vvv
+
+xvhsubw_h_b      0111 01000101 01100 ..... ..... .....    @vvv
+xvhsubw_w_h      0111 01000101 01101 ..... ..... .....    @vvv
+xvhsubw_d_w      0111 01000101 01110 ..... ..... .....    @vvv
+xvhsubw_q_d      0111 01000101 01111 ..... ..... .....    @vvv
+xvhsubw_hu_bu    0111 01000101 10100 ..... ..... .....    @vvv
+xvhsubw_wu_hu    0111 01000101 10101 ..... ..... .....    @vvv
+xvhsubw_du_wu    0111 01000101 10110 ..... ..... .....    @vvv
+xvhsubw_qu_du    0111 01000101 10111 ..... ..... .....    @vvv
+
+xvaddwev_h_b     0111 01000001 11100 ..... ..... .....    @vvv
+xvaddwev_w_h     0111 01000001 11101 ..... ..... .....    @vvv
+xvaddwev_d_w     0111 01000001 11110 ..... ..... .....    @vvv
+xvaddwev_q_d     0111 01000001 11111 ..... ..... .....    @vvv
+xvaddwod_h_b     0111 01000010 00100 ..... ..... .....    @vvv
+xvaddwod_w_h     0111 01000010 00101 ..... ..... .....    @vvv
+xvaddwod_d_w     0111 01000010 00110 ..... ..... .....    @vvv
+xvaddwod_q_d     0111 01000010 00111 ..... ..... .....    @vvv
+
+xvsubwev_h_b     0111 01000010 00000 ..... ..... .....    @vvv
+xvsubwev_w_h     0111 01000010 00001 ..... ..... .....    @vvv
+xvsubwev_d_w     0111 01000010 00010 ..... ..... .....    @vvv
+xvsubwev_q_d     0111 01000010 00011 ..... ..... .....    @vvv
+xvsubwod_h_b     0111 01000010 01000 ..... ..... .....    @vvv
+xvsubwod_w_h     0111 01000010 01001 ..... ..... .....    @vvv
+xvsubwod_d_w     0111 01000010 01010 ..... ..... .....    @vvv
+xvsubwod_q_d     0111 01000010 01011 ..... ..... .....    @vvv
+
+xvaddwev_h_bu    0111 01000010 11100 ..... ..... .....    @vvv
+xvaddwev_w_hu    0111 01000010 11101 ..... ..... .....    @vvv
+xvaddwev_d_wu    0111 01000010 11110 ..... ..... .....    @vvv
+xvaddwev_q_du    0111 01000010 11111 ..... ..... .....    @vvv
+xvaddwod_h_bu    0111 01000011 00100 ..... ..... .....    @vvv
+xvaddwod_w_hu    0111 01000011 00101 ..... ..... .....    @vvv
+xvaddwod_d_wu    0111 01000011 00110 ..... ..... .....    @vvv
+xvaddwod_q_du    0111 01000011 00111 ..... ..... .....    @vvv
+
+xvsubwev_h_bu    0111 01000011 00000 ..... ..... .....    @vvv
+xvsubwev_w_hu    0111 01000011 00001 ..... ..... .....    @vvv
+xvsubwev_d_wu    0111 01000011 00010 ..... ..... .....    @vvv
+xvsubwev_q_du    0111 01000011 00011 ..... ..... .....    @vvv
+xvsubwod_h_bu    0111 01000011 01000 ..... ..... .....    @vvv
+xvsubwod_w_hu    0111 01000011 01001 ..... ..... .....    @vvv
+xvsubwod_d_wu    0111 01000011 01010 ..... ..... .....    @vvv
+xvsubwod_q_du    0111 01000011 01011 ..... ..... .....    @vvv
+
+xvaddwev_h_bu_b  0111 01000011 11100 ..... ..... .....    @vvv
+xvaddwev_w_hu_h  0111 01000011 11101 ..... ..... .....    @vvv
+xvaddwev_d_wu_w  0111 01000011 11110 ..... ..... .....    @vvv
+xvaddwev_q_du_d  0111 01000011 11111 ..... ..... .....    @vvv
+xvaddwod_h_bu_b  0111 01000100 00000 ..... ..... .....    @vvv
+xvaddwod_w_hu_h  0111 01000100 00001 ..... ..... .....    @vvv
+xvaddwod_d_wu_w  0111 01000100 00010 ..... ..... .....    @vvv
+xvaddwod_q_du_d  0111 01000100 00011 ..... ..... .....    @vvv
+
+xvavg_b          0111 01000110 01000 ..... ..... .....    @vvv
+xvavg_h          0111 01000110 01001 ..... ..... .....    @vvv
+xvavg_w          0111 01000110 01010 ..... ..... .....    @vvv
+xvavg_d          0111 01000110 01011 ..... ..... .....    @vvv
+xvavg_bu         0111 01000110 01100 ..... ..... .....    @vvv
+xvavg_hu         0111 01000110 01101 ..... ..... .....    @vvv
+xvavg_wu         0111 01000110 01110 ..... ..... .....    @vvv
+xvavg_du         0111 01000110 01111 ..... ..... .....    @vvv
+xvavgr_b         0111 01000110 10000 ..... ..... .....    @vvv
+xvavgr_h         0111 01000110 10001 ..... ..... .....    @vvv
+xvavgr_w         0111 01000110 10010 ..... ..... .....    @vvv
+xvavgr_d         0111 01000110 10011 ..... ..... .....    @vvv
+xvavgr_bu        0111 01000110 10100 ..... ..... .....    @vvv
+xvavgr_hu        0111 01000110 10101 ..... ..... .....    @vvv
+xvavgr_wu        0111 01000110 10110 ..... ..... .....    @vvv
+xvavgr_du        0111 01000110 10111 ..... ..... .....    @vvv
+
+xvabsd_b         0111 01000110 00000 ..... ..... .....    @vvv
+xvabsd_h         0111 01000110 00001 ..... ..... .....    @vvv
+xvabsd_w         0111 01000110 00010 ..... ..... .....    @vvv
+xvabsd_d         0111 01000110 00011 ..... ..... .....    @vvv
+xvabsd_bu        0111 01000110 00100 ..... ..... .....    @vvv
+xvabsd_hu        0111 01000110 00101 ..... ..... .....    @vvv
+xvabsd_wu        0111 01000110 00110 ..... ..... .....    @vvv
+xvabsd_du        0111 01000110 00111 ..... ..... .....    @vvv
+
+xvadda_b         0111 01000101 11000 ..... ..... .....    @vvv
+xvadda_h         0111 01000101 11001 ..... ..... .....    @vvv
+xvadda_w         0111 01000101 11010 ..... ..... .....    @vvv
+xvadda_d         0111 01000101 11011 ..... ..... .....    @vvv
+
+xvmax_b          0111 01000111 00000 ..... ..... .....    @vvv
+xvmax_h          0111 01000111 00001 ..... ..... .....    @vvv
+xvmax_w          0111 01000111 00010 ..... ..... .....    @vvv
+xvmax_d          0111 01000111 00011 ..... ..... .....    @vvv
+xvmax_bu         0111 01000111 01000 ..... ..... .....    @vvv
+xvmax_hu         0111 01000111 01001 ..... ..... .....    @vvv
+xvmax_wu         0111 01000111 01010 ..... ..... .....    @vvv
+xvmax_du         0111 01000111 01011 ..... ..... .....    @vvv
+
+xvmaxi_b         0111 01101001 00000 ..... ..... .....    @vv_i5
+xvmaxi_h         0111 01101001 00001 ..... ..... .....    @vv_i5
+xvmaxi_w         0111 01101001 00010 ..... ..... .....    @vv_i5
+xvmaxi_d         0111 01101001 00011 ..... ..... .....    @vv_i5
+xvmaxi_bu        0111 01101001 01000 ..... ..... .....    @vv_ui5
+xvmaxi_hu        0111 01101001 01001 ..... ..... .....    @vv_ui5
+xvmaxi_wu        0111 01101001 01010 ..... ..... .....    @vv_ui5
+xvmaxi_du        0111 01101001 01011 ..... ..... .....    @vv_ui5
+
+xvmin_b          0111 01000111 00100 ..... ..... .....    @vvv
+xvmin_h          0111 01000111 00101 ..... ..... .....    @vvv
+xvmin_w          0111 01000111 00110 ..... ..... .....    @vvv
+xvmin_d          0111 01000111 00111 ..... ..... .....    @vvv
+xvmin_bu         0111 01000111 01100 ..... ..... .....    @vvv
+xvmin_hu         0111 01000111 01101 ..... ..... .....    @vvv
+xvmin_wu         0111 01000111 01110 ..... ..... .....    @vvv
+xvmin_du         0111 01000111 01111 ..... ..... .....    @vvv
+
+xvmini_b         0111 01101001 00100 ..... ..... .....    @vv_i5
+xvmini_h         0111 01101001 00101 ..... ..... .....    @vv_i5
+xvmini_w         0111 01101001 00110 ..... ..... .....    @vv_i5
+xvmini_d         0111 01101001 00111 ..... ..... .....    @vv_i5
+xvmini_bu        0111 01101001 01100 ..... ..... .....    @vv_ui5
+xvmini_hu        0111 01101001 01101 ..... ..... .....    @vv_ui5
+xvmini_wu        0111 01101001 01110 ..... ..... .....    @vv_ui5
+xvmini_du        0111 01101001 01111 ..... ..... .....    @vv_ui5
+
+xvmul_b          0111 01001000 01000 ..... ..... .....    @vvv
+xvmul_h          0111 01001000 01001 ..... ..... .....    @vvv
+xvmul_w          0111 01001000 01010 ..... ..... .....    @vvv
+xvmul_d          0111 01001000 01011 ..... ..... .....    @vvv
+xvmuh_b          0111 01001000 01100 ..... ..... .....    @vvv
+xvmuh_h          0111 01001000 01101 ..... ..... .....    @vvv
+xvmuh_w          0111 01001000 01110 ..... ..... .....    @vvv
+xvmuh_d          0111 01001000 01111 ..... ..... .....    @vvv
+xvmuh_bu         0111 01001000 10000 ..... ..... .....    @vvv
+xvmuh_hu         0111 01001000 10001 ..... ..... .....    @vvv
+xvmuh_wu         0111 01001000 10010 ..... ..... .....    @vvv
+xvmuh_du         0111 01001000 10011 ..... ..... .....    @vvv
+
+xvmulwev_h_b     0111 01001001 00000 ..... ..... .....    @vvv
+xvmulwev_w_h     0111 01001001 00001 ..... ..... .....    @vvv
+xvmulwev_d_w     0111 01001001 00010 ..... ..... .....    @vvv
+xvmulwev_q_d     0111 01001001 00011 ..... ..... .....    @vvv
+xvmulwod_h_b     0111 01001001 00100 ..... ..... .....    @vvv
+xvmulwod_w_h     0111 01001001 00101 ..... ..... .....    @vvv
+xvmulwod_d_w     0111 01001001 00110 ..... ..... .....    @vvv
+xvmulwod_q_d     0111 01001001 00111 ..... ..... .....    @vvv
+xvmulwev_h_bu    0111 01001001 10000 ..... ..... .....    @vvv
+xvmulwev_w_hu    0111 01001001 10001 ..... ..... .....    @vvv
+xvmulwev_d_wu    0111 01001001 10010 ..... ..... .....    @vvv
+xvmulwev_q_du    0111 01001001 10011 ..... ..... .....    @vvv
+xvmulwod_h_bu    0111 01001001 10100 ..... ..... .....    @vvv
+xvmulwod_w_hu    0111 01001001 10101 ..... ..... .....    @vvv
+xvmulwod_d_wu    0111 01001001 10110 ..... ..... .....    @vvv
+xvmulwod_q_du    0111 01001001 10111 ..... ..... .....    @vvv
+xvmulwev_h_bu_b  0111 01001010 00000 ..... ..... .....    @vvv
+xvmulwev_w_hu_h  0111 01001010 00001 ..... ..... .....    @vvv
+xvmulwev_d_wu_w  0111 01001010 00010 ..... ..... .....    @vvv
+xvmulwev_q_du_d  0111 01001010 00011 ..... ..... .....    @vvv
+xvmulwod_h_bu_b  0111 01001010 00100 ..... ..... .....    @vvv
+xvmulwod_w_hu_h  0111 01001010 00101 ..... ..... .....    @vvv
+xvmulwod_d_wu_w  0111 01001010 00110 ..... ..... .....    @vvv
+xvmulwod_q_du_d  0111 01001010 00111 ..... ..... .....    @vvv
+
+xvmadd_b         0111 01001010 10000 ..... ..... .....    @vvv
+xvmadd_h         0111 01001010 10001 ..... ..... .....    @vvv
+xvmadd_w         0111 01001010 10010 ..... ..... .....    @vvv
+xvmadd_d         0111 01001010 10011 ..... ..... .....    @vvv
+xvmsub_b         0111 01001010 10100 ..... ..... .....    @vvv
+xvmsub_h         0111 01001010 10101 ..... ..... .....    @vvv
+xvmsub_w         0111 01001010 10110 ..... ..... .....    @vvv
+xvmsub_d         0111 01001010 10111 ..... ..... .....    @vvv
+
+xvmaddwev_h_b    0111 01001010 11000 ..... ..... .....    @vvv
+xvmaddwev_w_h    0111 01001010 11001 ..... ..... .....    @vvv
+xvmaddwev_d_w    0111 01001010 11010 ..... ..... .....    @vvv
+xvmaddwev_q_d    0111 01001010 11011 ..... ..... .....    @vvv
+xvmaddwod_h_b    0111 01001010 11100 ..... ..... .....    @vvv
+xvmaddwod_w_h    0111 01001010 11101 ..... ..... .....    @vvv
+xvmaddwod_d_w    0111 01001010 11110 ..... ..... .....    @vvv
+xvmaddwod_q_d    0111 01001010 11111 ..... ..... .....    @vvv
+xvmaddwev_h_bu   0111 01001011 01000 ..... ..... .....    @vvv
+xvmaddwev_w_hu   0111 01001011 01001 ..... ..... .....    @vvv
+xvmaddwev_d_wu   0111 01001011 01010 ..... ..... .....    @vvv
+xvmaddwev_q_du   0111 01001011 01011 ..... ..... .....    @vvv
+xvmaddwod_h_bu   0111 01001011 01100 ..... ..... .....    @vvv
+xvmaddwod_w_hu   0111 01001011 01101 ..... ..... .....    @vvv
+xvmaddwod_d_wu   0111 01001011 01110 ..... ..... .....    @vvv
+xvmaddwod_q_du   0111 01001011 01111 ..... ..... .....    @vvv
+xvmaddwev_h_bu_b 0111 01001011 11000 ..... ..... .....    @vvv
+xvmaddwev_w_hu_h 0111 01001011 11001 ..... ..... .....    @vvv
+xvmaddwev_d_wu_w 0111 01001011 11010 ..... ..... .....    @vvv
+xvmaddwev_q_du_d 0111 01001011 11011 ..... ..... .....    @vvv
+xvmaddwod_h_bu_b 0111 01001011 11100 ..... ..... .....    @vvv
+xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... .....    @vvv
+xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... .....    @vvv
+xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... .....    @vvv
+
+xvdiv_b          0111 01001110 00000 ..... ..... .....    @vvv
+xvdiv_h          0111 01001110 00001 ..... ..... .....    @vvv
+xvdiv_w          0111 01001110 00010 ..... ..... .....    @vvv
+xvdiv_d          0111 01001110 00011 ..... ..... .....    @vvv
+xvmod_b          0111 01001110 00100 ..... ..... .....    @vvv
+xvmod_h          0111 01001110 00101 ..... ..... .....    @vvv
+xvmod_w          0111 01001110 00110 ..... ..... .....    @vvv
+xvmod_d          0111 01001110 00111 ..... ..... .....    @vvv
+xvdiv_bu         0111 01001110 01000 ..... ..... .....    @vvv
+xvdiv_hu         0111 01001110 01001 ..... ..... .....    @vvv
+xvdiv_wu         0111 01001110 01010 ..... ..... .....    @vvv
+xvdiv_du         0111 01001110 01011 ..... ..... .....    @vvv
+xvmod_bu         0111 01001110 01100 ..... ..... .....    @vvv
+xvmod_hu         0111 01001110 01101 ..... ..... .....    @vvv
+xvmod_wu         0111 01001110 01110 ..... ..... .....    @vvv
+xvmod_du         0111 01001110 01111 ..... ..... .....    @vvv
+
+xvsat_b          0111 01110010 01000 01 ... ..... .....   @vv_ui3
+xvsat_h          0111 01110010 01000 1 .... ..... .....   @vv_ui4
+xvsat_w          0111 01110010 01001 ..... ..... .....    @vv_ui5
+xvsat_d          0111 01110010 0101 ...... ..... .....    @vv_ui6
+xvsat_bu         0111 01110010 10000 01 ... ..... .....   @vv_ui3
+xvsat_hu         0111 01110010 10000 1 .... ..... .....   @vv_ui4
+xvsat_wu         0111 01110010 10001 ..... ..... .....    @vv_ui5
+xvsat_du         0111 01110010 1001 ...... ..... .....    @vv_ui6
+
+xvexth_h_b       0111 01101001 11101 11000 ..... .....    @vv
+xvexth_w_h       0111 01101001 11101 11001 ..... .....    @vv
+xvexth_d_w       0111 01101001 11101 11010 ..... .....    @vv
+xvexth_q_d       0111 01101001 11101 11011 ..... .....    @vv
+xvexth_hu_bu     0111 01101001 11101 11100 ..... .....    @vv
+xvexth_wu_hu     0111 01101001 11101 11101 ..... .....    @vv
+xvexth_du_wu     0111 01101001 11101 11110 ..... .....    @vv
+xvexth_qu_du     0111 01101001 11101 11111 ..... .....    @vv
+
+vext2xv_h_b      0111 01101001 11110 00100 ..... .....    @vv
+vext2xv_w_b      0111 01101001 11110 00101 ..... .....    @vv
+vext2xv_d_b      0111 01101001 11110 00110 ..... .....    @vv
+vext2xv_w_h      0111 01101001 11110 00111 ..... .....    @vv
+vext2xv_d_h      0111 01101001 11110 01000 ..... .....    @vv
+vext2xv_d_w      0111 01101001 11110 01001 ..... .....    @vv
+vext2xv_hu_bu    0111 01101001 11110 01010 ..... .....    @vv
+vext2xv_wu_bu    0111 01101001 11110 01011 ..... .....    @vv
+vext2xv_du_bu    0111 01101001 11110 01100 ..... .....    @vv
+vext2xv_wu_hu    0111 01101001 11110 01101 ..... .....    @vv
+vext2xv_du_hu    0111 01101001 11110 01110 ..... .....    @vv
+vext2xv_du_wu    0111 01101001 11110 01111 ..... .....    @vv
+
+xvsigncov_b      0111 01010010 11100 ..... ..... .....    @vvv
+xvsigncov_h      0111 01010010 11101 ..... ..... .....    @vvv
+xvsigncov_w      0111 01010010 11110 ..... ..... .....    @vvv
+xvsigncov_d      0111 01010010 11111 ..... ..... .....    @vvv
+
+xvmskltz_b       0111 01101001 11000 10000 ..... .....    @vv
+xvmskltz_h       0111 01101001 11000 10001 ..... .....    @vv
+xvmskltz_w       0111 01101001 11000 10010 ..... .....    @vv
+xvmskltz_d       0111 01101001 11000 10011 ..... .....    @vv
+xvmskgez_b       0111 01101001 11000 10100 ..... .....    @vv
+xvmsknz_b        0111 01101001 11000 11000 ..... .....    @vv
+
+xvldi            0111 01111110 00 ............. .....     @v_i13
+
+xvand_v          0111 01010010 01100 ..... ..... .....    @vvv
+xvor_v           0111 01010010 01101 ..... ..... .....    @vvv
+xvxor_v          0111 01010010 01110 ..... ..... .....    @vvv
+xvnor_v          0111 01010010 01111 ..... ..... .....    @vvv
+xvandn_v         0111 01010010 10000 ..... ..... .....    @vvv
+xvorn_v          0111 01010010 10001 ..... ..... .....    @vvv
+
+xvandi_b         0111 01111101 00 ........ ..... .....    @vv_ui8
+xvori_b          0111 01111101 01 ........ ..... .....    @vv_ui8
+xvxori_b         0111 01111101 10 ........ ..... .....    @vv_ui8
+xvnori_b         0111 01111101 11 ........ ..... .....    @vv_ui8
+
+xvsll_b          0111 01001110 10000 ..... ..... .....    @vvv
+xvsll_h          0111 01001110 10001 ..... ..... .....    @vvv
+xvsll_w          0111 01001110 10010 ..... ..... .....    @vvv
+xvsll_d          0111 01001110 10011 ..... ..... .....    @vvv
+xvslli_b         0111 01110010 11000 01 ... ..... .....   @vv_ui3
+xvslli_h         0111 01110010 11000 1 .... ..... .....   @vv_ui4
+xvslli_w         0111 01110010 11001 ..... ..... .....    @vv_ui5
+xvslli_d         0111 01110010 1101 ...... ..... .....    @vv_ui6
+xvsrl_b          0111 01001110 10100 ..... ..... .....    @vvv
+xvsrl_h          0111 01001110 10101 ..... ..... .....    @vvv
+xvsrl_w          0111 01001110 10110 ..... ..... .....    @vvv
+xvsrl_d          0111 01001110 10111 ..... ..... .....    @vvv
+xvsrli_b         0111 01110011 00000 01 ... ..... .....   @vv_ui3
+xvsrli_h         0111 01110011 00000 1 .... ..... .....   @vv_ui4
+xvsrli_w         0111 01110011 00001 ..... ..... .....    @vv_ui5
+xvsrli_d         0111 01110011 0001 ...... ..... .....    @vv_ui6
+xvsra_b          0111 01001110 11000 ..... ..... .....    @vvv
+xvsra_h          0111 01001110 11001 ..... ..... .....    @vvv
+xvsra_w          0111 01001110 11010 ..... ..... .....    @vvv
+xvsra_d          0111 01001110 11011 ..... ..... .....    @vvv
+xvsrai_b         0111 01110011 01000 01 ... ..... .....   @vv_ui3
+xvsrai_h         0111 01110011 01000 1 .... ..... .....   @vv_ui4
+xvsrai_w         0111 01110011 01001 ..... ..... .....    @vv_ui5
+xvsrai_d         0111 01110011 0101 ...... ..... .....    @vv_ui6
+xvrotr_b         0111 01001110 11100 ..... ..... .....    @vvv
+xvrotr_h         0111 01001110 11101 ..... ..... .....    @vvv
+xvrotr_w         0111 01001110 11110 ..... ..... .....    @vvv
+xvrotr_d         0111 01001110 11111 ..... ..... .....    @vvv
+xvrotri_b        0111 01101010 00000 01 ... ..... .....   @vv_ui3
+xvrotri_h        0111 01101010 00000 1 .... ..... .....   @vv_ui4
+xvrotri_w        0111 01101010 00001 ..... ..... .....    @vv_ui5
+xvrotri_d        0111 01101010 0001 ...... ..... .....    @vv_ui6
+
+xvsllwil_h_b     0111 01110000 10000 01 ... ..... .....   @vv_ui3
+xvsllwil_w_h     0111 01110000 10000 1 .... ..... .....   @vv_ui4
+xvsllwil_d_w     0111 01110000 10001 ..... ..... .....    @vv_ui5
+xvextl_q_d       0111 01110000 10010 00000 ..... .....    @vv
+xvsllwil_hu_bu   0111 01110000 11000 01 ... ..... .....   @vv_ui3
+xvsllwil_wu_hu   0111 01110000 11000 1 .... ..... .....   @vv_ui4
+xvsllwil_du_wu   0111 01110000 11001 ..... ..... .....    @vv_ui5
+xvextl_qu_du     0111 01110000 11010 00000 ..... .....    @vv
+
+xvsrlr_b         0111 01001111 00000 ..... ..... .....    @vvv
+xvsrlr_h         0111 01001111 00001 ..... ..... .....    @vvv
+xvsrlr_w         0111 01001111 00010 ..... ..... .....    @vvv
+xvsrlr_d         0111 01001111 00011 ..... ..... .....    @vvv
+xvsrlri_b        0111 01101010 01000 01 ... ..... .....   @vv_ui3
+xvsrlri_h        0111 01101010 01000 1 .... ..... .....   @vv_ui4
+xvsrlri_w        0111 01101010 01001 ..... ..... .....    @vv_ui5
+xvsrlri_d        0111 01101010 0101 ...... ..... .....    @vv_ui6
+xvsrar_b         0111 01001111 00100 ..... ..... .....    @vvv
+xvsrar_h         0111 01001111 00101 ..... ..... .....    @vvv
+xvsrar_w         0111 01001111 00110 ..... ..... .....    @vvv
+xvsrar_d         0111 01001111 00111 ..... ..... .....    @vvv
+xvsrari_b        0111 01101010 10000 01 ... ..... .....   @vv_ui3
+xvsrari_h        0111 01101010 10000 1 .... ..... .....   @vv_ui4
+xvsrari_w        0111 01101010 10001 ..... ..... .....    @vv_ui5
+xvsrari_d        0111 01101010 1001 ...... ..... .....    @vv_ui6
+
+xvsrln_b_h       0111 01001111 01001 ..... ..... .....    @vvv
+xvsrln_h_w       0111 01001111 01010 ..... ..... .....    @vvv
+xvsrln_w_d       0111 01001111 01011 ..... ..... .....    @vvv
+xvsran_b_h       0111 01001111 01101 ..... ..... .....    @vvv
+xvsran_h_w       0111 01001111 01110 ..... ..... .....    @vvv
+xvsran_w_d       0111 01001111 01111 ..... ..... .....    @vvv
+
+xvsrlni_b_h      0111 01110100 00000 1 .... ..... .....   @vv_ui4
+xvsrlni_h_w      0111 01110100 00001 ..... ..... .....    @vv_ui5
+xvsrlni_w_d      0111 01110100 0001 ...... ..... .....    @vv_ui6
+xvsrlni_d_q      0111 01110100 001 ....... ..... .....    @vv_ui7
+xvsrani_b_h      0111 01110101 10000 1 .... ..... .....   @vv_ui4
+xvsrani_h_w      0111 01110101 10001 ..... ..... .....    @vv_ui5
+xvsrani_w_d      0111 01110101 1001 ...... ..... .....    @vv_ui6
+xvsrani_d_q      0111 01110101 101 ....... ..... .....    @vv_ui7
+
+xvsrlrn_b_h      0111 01001111 10001 ..... ..... .....    @vvv
+xvsrlrn_h_w      0111 01001111 10010 ..... ..... .....    @vvv
+xvsrlrn_w_d      0111 01001111 10011 ..... ..... .....    @vvv
+xvsrarn_b_h      0111 01001111 10101 ..... ..... .....    @vvv
+xvsrarn_h_w      0111 01001111 10110 ..... ..... .....    @vvv
+xvsrarn_w_d      0111 01001111 10111 ..... ..... .....    @vvv
+
+xvsrlrni_b_h     0111 01110100 01000 1 .... ..... .....   @vv_ui4
+xvsrlrni_h_w     0111 01110100 01001 ..... ..... .....    @vv_ui5
+xvsrlrni_w_d     0111 01110100 0101 ...... ..... .....    @vv_ui6
+xvsrlrni_d_q     0111 01110100 011 ....... ..... .....    @vv_ui7
+xvsrarni_b_h     0111 01110101 11000 1 .... ..... .....   @vv_ui4
+xvsrarni_h_w     0111 01110101 11001 ..... ..... .....    @vv_ui5
+xvsrarni_w_d     0111 01110101 1101 ...... ..... .....    @vv_ui6
+xvsrarni_d_q     0111 01110101 111 ....... ..... .....    @vv_ui7
+
+xvssrln_b_h      0111 01001111 11001 ..... ..... .....    @vvv
+xvssrln_h_w      0111 01001111 11010 ..... ..... .....    @vvv
+xvssrln_w_d      0111 01001111 11011 ..... ..... .....    @vvv
+xvssran_b_h      0111 01001111 11101 ..... ..... .....    @vvv
+xvssran_h_w      0111 01001111 11110 ..... ..... .....    @vvv
+xvssran_w_d      0111 01001111 11111 ..... ..... .....    @vvv
+xvssrln_bu_h     0111 01010000 01001 ..... ..... .....    @vvv
+xvssrln_hu_w     0111 01010000 01010 ..... ..... .....    @vvv
+xvssrln_wu_d     0111 01010000 01011 ..... ..... .....    @vvv
+xvssran_bu_h     0111 01010000 01101 ..... ..... .....    @vvv
+xvssran_hu_w     0111 01010000 01110 ..... ..... .....    @vvv
+xvssran_wu_d     0111 01010000 01111 ..... ..... .....    @vvv
+
+xvssrlni_b_h     0111 01110100 10000 1 .... ..... .....   @vv_ui4
+xvssrlni_h_w     0111 01110100 10001 ..... ..... .....    @vv_ui5
+xvssrlni_w_d     0111 01110100 1001 ...... ..... .....    @vv_ui6
+xvssrlni_d_q     0111 01110100 101 ....... ..... .....    @vv_ui7
+xvssrani_b_h     0111 01110110 00000 1 .... ..... .....   @vv_ui4
+xvssrani_h_w     0111 01110110 00001 ..... ..... .....    @vv_ui5
+xvssrani_w_d     0111 01110110 0001 ...... ..... .....    @vv_ui6
+xvssrani_d_q     0111 01110110 001 ....... ..... .....    @vv_ui7
+xvssrlni_bu_h    0111 01110100 11000 1 .... ..... .....   @vv_ui4
+xvssrlni_hu_w    0111 01110100 11001 ..... ..... .....    @vv_ui5
+xvssrlni_wu_d    0111 01110100 1101 ...... ..... .....    @vv_ui6
+xvssrlni_du_q    0111 01110100 111 ....... ..... .....    @vv_ui7
+xvssrani_bu_h    0111 01110110 01000 1 .... ..... .....   @vv_ui4
+xvssrani_hu_w    0111 01110110 01001 ..... ..... .....    @vv_ui5
+xvssrani_wu_d    0111 01110110 0101 ...... ..... .....    @vv_ui6
+xvssrani_du_q    0111 01110110 011 ....... ..... .....    @vv_ui7
+
+xvssrlrn_b_h     0111 01010000 00001 ..... ..... .....    @vvv
+xvssrlrn_h_w     0111 01010000 00010 ..... ..... .....    @vvv
+xvssrlrn_w_d     0111 01010000 00011 ..... ..... .....    @vvv
+xvssrarn_b_h     0111 01010000 00101 ..... ..... .....    @vvv
+xvssrarn_h_w     0111 01010000 00110 ..... ..... .....    @vvv
+xvssrarn_w_d     0111 01010000 00111 ..... ..... .....    @vvv
+xvssrlrn_bu_h    0111 01010000 10001 ..... ..... .....    @vvv
+xvssrlrn_hu_w    0111 01010000 10010 ..... ..... .....    @vvv
+xvssrlrn_wu_d    0111 01010000 10011 ..... ..... .....    @vvv
+xvssrarn_bu_h    0111 01010000 10101 ..... ..... .....    @vvv
+xvssrarn_hu_w    0111 01010000 10110 ..... ..... .....    @vvv
+xvssrarn_wu_d    0111 01010000 10111 ..... ..... .....    @vvv
+
+xvssrlrni_b_h    0111 01110101 00000 1 .... ..... .....   @vv_ui4
+xvssrlrni_h_w    0111 01110101 00001 ..... ..... .....    @vv_ui5
+xvssrlrni_w_d    0111 01110101 0001 ...... ..... .....    @vv_ui6
+xvssrlrni_d_q    0111 01110101 001 ....... ..... .....    @vv_ui7
+xvssrarni_b_h    0111 01110110 10000 1 .... ..... .....   @vv_ui4
+xvssrarni_h_w    0111 01110110 10001 ..... ..... .....    @vv_ui5
+xvssrarni_w_d    0111 01110110 1001 ...... ..... .....    @vv_ui6
+xvssrarni_d_q    0111 01110110 101 ....... ..... .....    @vv_ui7
+xvssrlrni_bu_h   0111 01110101 01000 1 .... ..... .....   @vv_ui4
+xvssrlrni_hu_w   0111 01110101 01001 ..... ..... .....    @vv_ui5
+xvssrlrni_wu_d   0111 01110101 0101 ...... ..... .....    @vv_ui6
+xvssrlrni_du_q   0111 01110101 011 ....... ..... .....    @vv_ui7
+xvssrarni_bu_h   0111 01110110 11000 1 .... ..... .....   @vv_ui4
+xvssrarni_hu_w   0111 01110110 11001 ..... ..... .....    @vv_ui5
+xvssrarni_wu_d   0111 01110110 1101 ...... ..... .....    @vv_ui6
+xvssrarni_du_q   0111 01110110 111 ....... ..... .....    @vv_ui7
+
+xvclo_b          0111 01101001 11000 00000 ..... .....    @vv
+xvclo_h          0111 01101001 11000 00001 ..... .....    @vv
+xvclo_w          0111 01101001 11000 00010 ..... .....    @vv
+xvclo_d          0111 01101001 11000 00011 ..... .....    @vv
+xvclz_b          0111 01101001 11000 00100 ..... .....    @vv
+xvclz_h          0111 01101001 11000 00101 ..... .....    @vv
+xvclz_w          0111 01101001 11000 00110 ..... .....    @vv
+xvclz_d          0111 01101001 11000 00111 ..... .....    @vv
+
+xvpcnt_b         0111 01101001 11000 01000 ..... .....    @vv
+xvpcnt_h         0111 01101001 11000 01001 ..... .....    @vv
+xvpcnt_w         0111 01101001 11000 01010 ..... .....    @vv
+xvpcnt_d         0111 01101001 11000 01011 ..... .....    @vv
+
+xvbitclr_b       0111 01010000 11000 ..... ..... .....    @vvv
+xvbitclr_h       0111 01010000 11001 ..... ..... .....    @vvv
+xvbitclr_w       0111 01010000 11010 ..... ..... .....    @vvv
+xvbitclr_d       0111 01010000 11011 ..... ..... .....    @vvv
+xvbitclri_b      0111 01110001 00000 01 ... ..... .....   @vv_ui3
+xvbitclri_h      0111 01110001 00000 1 .... ..... .....   @vv_ui4
+xvbitclri_w      0111 01110001 00001 ..... ..... .....    @vv_ui5
+xvbitclri_d      0111 01110001 0001 ...... ..... .....    @vv_ui6
+
+xvbitset_b       0111 01010000 11100 ..... ..... .....    @vvv
+xvbitset_h       0111 01010000 11101 ..... ..... .....    @vvv
+xvbitset_w       0111 01010000 11110 ..... ..... .....    @vvv
+xvbitset_d       0111 01010000 11111 ..... ..... .....    @vvv
+xvbitseti_b      0111 01110001 01000 01 ... ..... .....   @vv_ui3
+xvbitseti_h      0111 01110001 01000 1 .... ..... .....   @vv_ui4
+xvbitseti_w      0111 01110001 01001 ..... ..... .....    @vv_ui5
+xvbitseti_d      0111 01110001 0101 ...... ..... .....    @vv_ui6
+
+xvbitrev_b       0111 01010001 00000 ..... ..... .....    @vvv
+xvbitrev_h       0111 01010001 00001 ..... ..... .....    @vvv
+xvbitrev_w       0111 01010001 00010 ..... ..... .....    @vvv
+xvbitrev_d       0111 01010001 00011 ..... ..... .....    @vvv
+xvbitrevi_b      0111 01110001 10000 01 ... ..... .....   @vv_ui3
+xvbitrevi_h      0111 01110001 10000 1 .... ..... .....   @vv_ui4
+xvbitrevi_w      0111 01110001 10001 ..... ..... .....    @vv_ui5
+xvbitrevi_d      0111 01110001 1001 ...... ..... .....    @vv_ui6
+
+xvfrstp_b        0111 01010010 10110 ..... ..... .....    @vvv
+xvfrstp_h        0111 01010010 10111 ..... ..... .....    @vvv
+xvfrstpi_b       0111 01101001 10100 ..... ..... .....    @vv_ui5
+xvfrstpi_h       0111 01101001 10101 ..... ..... .....    @vv_ui5
+
+xvfadd_s         0111 01010011 00001 ..... ..... .....    @vvv
+xvfadd_d         0111 01010011 00010 ..... ..... .....    @vvv
+xvfsub_s         0111 01010011 00101 ..... ..... .....    @vvv
+xvfsub_d         0111 01010011 00110 ..... ..... .....    @vvv
+xvfmul_s         0111 01010011 10001 ..... ..... .....    @vvv
+xvfmul_d         0111 01010011 10010 ..... ..... .....    @vvv
+xvfdiv_s         0111 01010011 10101 ..... ..... .....    @vvv
+xvfdiv_d         0111 01010011 10110 ..... ..... .....    @vvv
+
+xvfmadd_s        0000 10100001 ..... ..... ..... .....    @vvvv
+xvfmadd_d        0000 10100010 ..... ..... ..... .....    @vvvv
+xvfmsub_s        0000 10100101 ..... ..... ..... .....    @vvvv
+xvfmsub_d        0000 10100110 ..... ..... ..... .....    @vvvv
+xvfnmadd_s       0000 10101001 ..... ..... ..... .....    @vvvv
+xvfnmadd_d       0000 10101010 ..... ..... ..... .....    @vvvv
+xvfnmsub_s       0000 10101101 ..... ..... ..... .....    @vvvv
+xvfnmsub_d       0000 10101110 ..... ..... ..... .....    @vvvv
+
+xvfmax_s         0111 01010011 11001 ..... ..... .....    @vvv
+xvfmax_d         0111 01010011 11010 ..... ..... .....    @vvv
+xvfmin_s         0111 01010011 11101 ..... ..... .....    @vvv
+xvfmin_d         0111 01010011 11110 ..... ..... .....    @vvv
+
+xvfmaxa_s        0111 01010100 00001 ..... ..... .....    @vvv
+xvfmaxa_d        0111 01010100 00010 ..... ..... .....    @vvv
+xvfmina_s        0111 01010100 00101 ..... ..... .....    @vvv
+xvfmina_d        0111 01010100 00110 ..... ..... .....    @vvv
+
+xvflogb_s        0111 01101001 11001 10001 ..... .....    @vv
+xvflogb_d        0111 01101001 11001 10010 ..... .....    @vv
+
+xvfclass_s       0111 01101001 11001 10101 ..... .....    @vv
+xvfclass_d       0111 01101001 11001 10110 ..... .....    @vv
+
+xvfsqrt_s        0111 01101001 11001 11001 ..... .....    @vv
+xvfsqrt_d        0111 01101001 11001 11010 ..... .....    @vv
+xvfrecip_s       0111 01101001 11001 11101 ..... .....    @vv
+xvfrecip_d       0111 01101001 11001 11110 ..... .....    @vv
+xvfrsqrt_s       0111 01101001 11010 00001 ..... .....    @vv
+xvfrsqrt_d       0111 01101001 11010 00010 ..... .....    @vv
+
+xvfcvtl_s_h      0111 01101001 11011 11010 ..... .....    @vv
+xvfcvth_s_h      0111 01101001 11011 11011 ..... .....    @vv
+xvfcvtl_d_s      0111 01101001 11011 11100 ..... .....    @vv
+xvfcvth_d_s      0111 01101001 11011 11101 ..... .....    @vv
+xvfcvt_h_s       0111 01010100 01100 ..... ..... .....    @vvv
+xvfcvt_s_d       0111 01010100 01101 ..... ..... .....    @vvv
+
+xvfrintrne_s     0111 01101001 11010 11101 ..... .....    @vv
+xvfrintrne_d     0111 01101001 11010 11110 ..... .....    @vv
+xvfrintrz_s      0111 01101001 11010 11001 ..... .....    @vv
+xvfrintrz_d      0111 01101001 11010 11010 ..... .....    @vv
+xvfrintrp_s      0111 01101001 11010 10101 ..... .....    @vv
+xvfrintrp_d      0111 01101001 11010 10110 ..... .....    @vv
+xvfrintrm_s      0111 01101001 11010 10001 ..... .....    @vv
+xvfrintrm_d      0111 01101001 11010 10010 ..... .....    @vv
+xvfrint_s        0111 01101001 11010 01101 ..... .....    @vv
+xvfrint_d        0111 01101001 11010 01110 ..... .....    @vv
+
+xvftintrne_w_s   0111 01101001 11100 10100 ..... .....    @vv
+xvftintrne_l_d   0111 01101001 11100 10101 ..... .....    @vv
+xvftintrz_w_s    0111 01101001 11100 10010 ..... .....    @vv
+xvftintrz_l_d    0111 01101001 11100 10011 ..... .....    @vv
+xvftintrp_w_s    0111 01101001 11100 10000 ..... .....    @vv
+xvftintrp_l_d    0111 01101001 11100 10001 ..... .....    @vv
+xvftintrm_w_s    0111 01101001 11100 01110 ..... .....    @vv
+xvftintrm_l_d    0111 01101001 11100 01111 ..... .....    @vv
+xvftint_w_s      0111 01101001 11100 01100 ..... .....    @vv
+xvftint_l_d      0111 01101001 11100 01101 ..... .....    @vv
+xvftintrz_wu_s   0111 01101001 11100 11100 ..... .....    @vv
+xvftintrz_lu_d   0111 01101001 11100 11101 ..... .....    @vv
+xvftint_wu_s     0111 01101001 11100 10110 ..... .....    @vv
+xvftint_lu_d     0111 01101001 11100 10111 ..... .....    @vv
+
+xvftintrne_w_d   0111 01010100 10111 ..... ..... .....    @vvv
+xvftintrz_w_d    0111 01010100 10110 ..... ..... .....    @vvv
+xvftintrp_w_d    0111 01010100 10101 ..... ..... .....    @vvv
+xvftintrm_w_d    0111 01010100 10100 ..... ..... .....    @vvv
+xvftint_w_d      0111 01010100 10011 ..... ..... .....    @vvv
+
+xvftintrnel_l_s  0111 01101001 11101 01000 ..... .....    @vv
+xvftintrneh_l_s  0111 01101001 11101 01001 ..... .....    @vv
+xvftintrzl_l_s   0111 01101001 11101 00110 ..... .....    @vv
+xvftintrzh_l_s   0111 01101001 11101 00111 ..... .....    @vv
+xvftintrpl_l_s   0111 01101001 11101 00100 ..... .....    @vv
+xvftintrph_l_s   0111 01101001 11101 00101 ..... .....    @vv
+xvftintrml_l_s   0111 01101001 11101 00010 ..... .....    @vv
+xvftintrmh_l_s   0111 01101001 11101 00011 ..... .....    @vv
+xvftintl_l_s     0111 01101001 11101 00000 ..... .....    @vv
+xvftinth_l_s     0111 01101001 11101 00001 ..... .....    @vv
+
+xvffint_s_w      0111 01101001 11100 00000 ..... .....    @vv
+xvffint_d_l      0111 01101001 11100 00010 ..... .....    @vv
+xvffint_s_wu     0111 01101001 11100 00001 ..... .....    @vv
+xvffint_d_lu     0111 01101001 11100 00011 ..... .....    @vv
+xvffintl_d_w     0111 01101001 11100 00100 ..... .....    @vv
+xvffinth_d_w     0111 01101001 11100 00101 ..... .....    @vv
+xvffint_s_l      0111 01010100 10000 ..... ..... .....    @vvv
+
+xvseq_b          0111 01000000 00000 ..... ..... .....    @vvv
+xvseq_h          0111 01000000 00001 ..... ..... .....    @vvv
+xvseq_w          0111 01000000 00010 ..... ..... .....    @vvv
+xvseq_d          0111 01000000 00011 ..... ..... .....    @vvv
+xvseqi_b         0111 01101000 00000 ..... ..... .....    @vv_i5
+xvseqi_h         0111 01101000 00001 ..... ..... .....    @vv_i5
+xvseqi_w         0111 01101000 00010 ..... ..... .....    @vv_i5
+xvseqi_d         0111 01101000 00011 ..... ..... .....    @vv_i5
+
+xvsle_b          0111 01000000 00100 ..... ..... .....    @vvv
+xvsle_h          0111 01000000 00101 ..... ..... .....    @vvv
+xvsle_w          0111 01000000 00110 ..... ..... .....    @vvv
+xvsle_d          0111 01000000 00111 ..... ..... .....    @vvv
+xvslei_b         0111 01101000 00100 ..... ..... .....    @vv_i5
+xvslei_h         0111 01101000 00101 ..... ..... .....    @vv_i5
+xvslei_w         0111 01101000 00110 ..... ..... .....    @vv_i5
+xvslei_d         0111 01101000 00111 ..... ..... .....    @vv_i5
+xvsle_bu         0111 01000000 01000 ..... ..... .....    @vvv
+xvsle_hu         0111 01000000 01001 ..... ..... .....    @vvv
+xvsle_wu         0111 01000000 01010 ..... ..... .....    @vvv
+xvsle_du         0111 01000000 01011 ..... ..... .....    @vvv
+xvslei_bu        0111 01101000 01000 ..... ..... .....    @vv_ui5
+xvslei_hu        0111 01101000 01001 ..... ..... .....    @vv_ui5
+xvslei_wu        0111 01101000 01010 ..... ..... .....    @vv_ui5
+xvslei_du        0111 01101000 01011 ..... ..... .....    @vv_ui5
+
+xvslt_b          0111 01000000 01100 ..... ..... .....    @vvv
+xvslt_h          0111 01000000 01101 ..... ..... .....    @vvv
+xvslt_w          0111 01000000 01110 ..... ..... .....    @vvv
+xvslt_d          0111 01000000 01111 ..... ..... .....    @vvv
+xvslti_b         0111 01101000 01100 ..... ..... .....    @vv_i5
+xvslti_h         0111 01101000 01101 ..... ..... .....    @vv_i5
+xvslti_w         0111 01101000 01110 ..... ..... .....    @vv_i5
+xvslti_d         0111 01101000 01111 ..... ..... .....    @vv_i5
+xvslt_bu         0111 01000000 10000 ..... ..... .....    @vvv
+xvslt_hu         0111 01000000 10001 ..... ..... .....    @vvv
+xvslt_wu         0111 01000000 10010 ..... ..... .....    @vvv
+xvslt_du         0111 01000000 10011 ..... ..... .....    @vvv
+xvslti_bu        0111 01101000 10000 ..... ..... .....    @vv_ui5
+xvslti_hu        0111 01101000 10001 ..... ..... .....    @vv_ui5
+xvslti_wu        0111 01101000 10010 ..... ..... .....    @vv_ui5
+xvslti_du        0111 01101000 10011 ..... ..... .....    @vv_ui5
+
+xvfcmp_cond_s    0000 11001001 ..... ..... ..... .....    @vvv_fcond
+xvfcmp_cond_d    0000 11001010 ..... ..... ..... .....    @vvv_fcond
+
+xvbitsel_v       0000 11010010 ..... ..... ..... .....    @vvvv
+
+xvbitseli_b      0111 01111100 01 ........ ..... .....    @vv_ui8
+
+xvseteqz_v       0111 01101001 11001 00110 ..... 00 ...   @cv
+xvsetnez_v       0111 01101001 11001 00111 ..... 00 ...   @cv
+xvsetanyeqz_b    0111 01101001 11001 01000 ..... 00 ...   @cv
+xvsetanyeqz_h    0111 01101001 11001 01001 ..... 00 ...   @cv
+xvsetanyeqz_w    0111 01101001 11001 01010 ..... 00 ...   @cv
+xvsetanyeqz_d    0111 01101001 11001 01011 ..... 00 ...   @cv
+xvsetallnez_b    0111 01101001 11001 01100 ..... 00 ...   @cv
+xvsetallnez_h    0111 01101001 11001 01101 ..... 00 ...   @cv
+xvsetallnez_w    0111 01101001 11001 01110 ..... 00 ...   @cv
+xvsetallnez_d    0111 01101001 11001 01111 ..... 00 ...   @cv
+
+xvinsgr2vr_w     0111 01101110 10111 10 ... ..... .....   @vr_ui3
+xvinsgr2vr_d     0111 01101110 10111 110 .. ..... .....   @vr_ui2
+xvpickve2gr_w    0111 01101110 11111 10 ... ..... .....   @rv_ui3
+xvpickve2gr_d    0111 01101110 11111 110 .. ..... .....   @rv_ui2
+xvpickve2gr_wu   0111 01101111 00111 10 ... ..... .....   @rv_ui3
+xvpickve2gr_du   0111 01101111 00111 110 .. ..... .....   @rv_ui2
+
+xvreplgr2vr_b    0111 01101001 11110 00000 ..... .....    @vr
+xvreplgr2vr_h    0111 01101001 11110 00001 ..... .....    @vr
+xvreplgr2vr_w    0111 01101001 11110 00010 ..... .....    @vr
+xvreplgr2vr_d    0111 01101001 11110 00011 ..... .....    @vr
+
+xvreplve_b       0111 01010010 00100 ..... ..... .....    @vvr
+xvreplve_h       0111 01010010 00101 ..... ..... .....    @vvr
+xvreplve_w       0111 01010010 00110 ..... ..... .....    @vvr
+xvreplve_d       0111 01010010 00111 ..... ..... .....    @vvr
+
+xvrepl128vei_b   0111 01101111 01111 0 .... ..... .....   @vv_ui4
+xvrepl128vei_h   0111 01101111 01111 10 ... ..... .....   @vv_ui3
+xvrepl128vei_w   0111 01101111 01111 110 .. ..... .....   @vv_ui2
+xvrepl128vei_d   0111 01101111 01111 1110 . ..... .....   @vv_ui1
+
+xvreplve0_b      0111 01110000 01110 00000 ..... .....    @vv
+xvreplve0_h      0111 01110000 01111 00000 ..... .....    @vv
+xvreplve0_w      0111 01110000 01111 10000 ..... .....    @vv
+xvreplve0_d      0111 01110000 01111 11000 ..... .....    @vv
+xvreplve0_q      0111 01110000 01111 11100 ..... .....    @vv
+
+xvinsve0_w       0111 01101111 11111 10 ... ..... .....   @vv_ui3
+xvinsve0_d       0111 01101111 11111 110 .. ..... .....   @vv_ui2
+
+xvpickve_w       0111 01110000 00111 10 ... ..... .....   @vv_ui3
+xvpickve_d       0111 01110000 00111 110 .. ..... .....   @vv_ui2
+
+xvbsll_v         0111 01101000 11100 ..... ..... .....    @vv_ui5
+xvbsrl_v         0111 01101000 11101 ..... ..... .....    @vv_ui5
+
+xvpackev_b       0111 01010001 01100 ..... ..... .....    @vvv
+xvpackev_h       0111 01010001 01101 ..... ..... .....    @vvv
+xvpackev_w       0111 01010001 01110 ..... ..... .....    @vvv
+xvpackev_d       0111 01010001 01111 ..... ..... .....    @vvv
+xvpackod_b       0111 01010001 10000 ..... ..... .....    @vvv
+xvpackod_h       0111 01010001 10001 ..... ..... .....    @vvv
+xvpackod_w       0111 01010001 10010 ..... ..... .....    @vvv
+xvpackod_d       0111 01010001 10011 ..... ..... .....    @vvv
+
+xvpickev_b       0111 01010001 11100 ..... ..... .....    @vvv
+xvpickev_h       0111 01010001 11101 ..... ..... .....    @vvv
+xvpickev_w       0111 01010001 11110 ..... ..... .....    @vvv
+xvpickev_d       0111 01010001 11111 ..... ..... .....    @vvv
+xvpickod_b       0111 01010010 00000 ..... ..... .....    @vvv
+xvpickod_h       0111 01010010 00001 ..... ..... .....    @vvv
+xvpickod_w       0111 01010010 00010 ..... ..... .....    @vvv
+xvpickod_d       0111 01010010 00011 ..... ..... .....    @vvv
+
+xvilvl_b         0111 01010001 10100 ..... ..... .....    @vvv
+xvilvl_h         0111 01010001 10101 ..... ..... .....    @vvv
+xvilvl_w         0111 01010001 10110 ..... ..... .....    @vvv
+xvilvl_d         0111 01010001 10111 ..... ..... .....    @vvv
+xvilvh_b         0111 01010001 11000 ..... ..... .....    @vvv
+xvilvh_h         0111 01010001 11001 ..... ..... .....    @vvv
+xvilvh_w         0111 01010001 11010 ..... ..... .....    @vvv
+xvilvh_d         0111 01010001 11011 ..... ..... .....    @vvv
+
+xvshuf_b         0000 11010110 ..... ..... ..... .....    @vvvv
+xvshuf_h         0111 01010111 10101 ..... ..... .....    @vvv
+xvshuf_w         0111 01010111 10110 ..... ..... .....    @vvv
+xvshuf_d         0111 01010111 10111 ..... ..... .....    @vvv
+
+xvperm_w         0111 01010111 11010 ..... ..... .....    @vvv
+
+xvshuf4i_b       0111 01111001 00 ........ ..... .....    @vv_ui8
+xvshuf4i_h       0111 01111001 01 ........ ..... .....    @vv_ui8
+xvshuf4i_w       0111 01111001 10 ........ ..... .....    @vv_ui8
+xvshuf4i_d       0111 01111001 11 ........ ..... .....    @vv_ui8
+
+xvpermi_w        0111 01111110 01 ........ ..... .....    @vv_ui8
+xvpermi_d        0111 01111110 10 ........ ..... .....    @vv_ui8
+xvpermi_q        0111 01111110 11 ........ ..... .....    @vv_ui8
+
+xvextrins_d      0111 01111000 00 ........ ..... .....    @vv_ui8
+xvextrins_w      0111 01111000 01 ........ ..... .....    @vv_ui8
+xvextrins_h      0111 01111000 10 ........ ..... .....    @vv_ui8
+xvextrins_b      0111 01111000 11 ........ ..... .....    @vv_ui8
+
+xvld             0010 110010 ............ ..... .....     @vr_i12
+xvst             0010 110011 ............ ..... .....     @vr_i12
+xvldx            0011 10000100 10000 ..... ..... .....    @vrr
+xvstx            0011 10000100 11000 ..... ..... .....    @vrr
+
+xvldrepl_d       0011 00100001 0 ......... ..... .....    @vr_i9
+xvldrepl_w       0011 00100010 .......... ..... .....     @vr_i10
+xvldrepl_h       0011 0010010 ........... ..... .....     @vr_i11
+xvldrepl_b       0011 001010 ............ ..... .....     @vr_i12
+xvstelm_d        0011 00110001 .. ........ ..... .....    @vr_i8i2x
+xvstelm_w        0011 0011001 ... ........ ..... .....    @vr_i8i3x
+xvstelm_h        0011 001101 .... ........ ..... .....    @vr_i8i4x
+xvstelm_b        0011 00111 ..... ........ ..... .....    @vr_i8i5x
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 7b0f29c942..c492863cc5 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -21,28 +21,6 @@
 /* Global bit for huge page */
 #define LOONGARCH_HGLOBAL_SHIFT     12
 
-#if  HOST_BIG_ENDIAN
-#define B(x)  B[15 - (x)]
-#define H(x)  H[7 - (x)]
-#define W(x)  W[3 - (x)]
-#define D(x)  D[1 - (x)]
-#define UB(x) UB[15 - (x)]
-#define UH(x) UH[7 - (x)]
-#define UW(x) UW[3 - (x)]
-#define UD(x) UD[1 -(x)]
-#define Q(x)  Q[x]
-#else
-#define B(x)  B[x]
-#define H(x)  H[x]
-#define W(x)  W[x]
-#define D(x)  D[x]
-#define UB(x) UB[x]
-#define UH(x) UH[x]
-#define UW(x) UW[x]
-#define UD(x) UD[x]
-#define Q(x)  Q[x]
-#endif
-
 void loongarch_translate_init(void);
 
 void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c
index dda9845d6c..6cd01d5f09 100644
--- a/target/loongarch/iocsr_helper.c
+++ b/target/loongarch/iocsr_helper.c
@@ -6,7 +6,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
deleted file mode 100644
index 9571f0aef0..0000000000
--- a/target/loongarch/lsx_helper.c
+++ /dev/null
@@ -1,3004 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * QEMU LoongArch LSX helper functions.
- *
- * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
-#include "fpu/softfloat.h"
-#include "internals.h"
-#include "tcg/tcg.h"
-
-#define DO_ADD(a, b)  (a + b)
-#define DO_SUB(a, b)  (a - b)
-
-#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
-void HELPER(NAME)(CPULoongArchState *env,                            \
-                  uint32_t vd, uint32_t vj, uint32_t vk)             \
-{                                                                    \
-    int i;                                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                                 \
-    VReg *Vj = &(env->fpr[vj].vreg);                                 \
-    VReg *Vk = &(env->fpr[vk].vreg);                                 \
-    typedef __typeof(Vd->E1(0)) TD;                                  \
-                                                                     \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                              \
-        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
-    }                                                                \
-}
-
-DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
-DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
-DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vhaddw_q_d)(CPULoongArchState *env,
-                        uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
-}
-
-DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
-DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
-DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vhsubw_q_d)(CPULoongArchState *env,
-                        uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
-}
-
-DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
-DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
-DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vhaddw_qu_du)(CPULoongArchState *env,
-                          uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
-                          int128_make64((uint64_t)Vk->D(0)));
-}
-
-DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
-DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
-DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
-
-void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
-                          uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
-                          int128_make64((uint64_t)Vk->D(0)));
-}
-
-#define DO_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)      \
-{                                                                \
-    int i;                                                       \
-    VReg *Vd = (VReg *)vd;                                       \
-    VReg *Vj = (VReg *)vj;                                       \
-    VReg *Vk = (VReg *)vk;                                       \
-    typedef __typeof(Vd->E1(0)) TD;                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                          \
-        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
-    }                                                            \
-}
-
-#define DO_ODD(NAME, BIT, E1, E2, DO_OP)                                 \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)              \
-{                                                                        \
-    int i;                                                               \
-    VReg *Vd = (VReg *)vd;                                               \
-    VReg *Vj = (VReg *)vj;                                               \
-    VReg *Vk = (VReg *)vk;                                               \
-    typedef __typeof(Vd->E1(0)) TD;                                      \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                  \
-        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
-    }                                                                    \
-}
-
-void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
-}
-
-DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
-DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
-DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
-}
-
-DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
-DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
-DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
-}
-
-DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
-DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
-DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
-}
-
-DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
-DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
-DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
-                          int128_make64((uint64_t)Vk->D(0)));
-}
-
-DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
-DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
-DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
-                          int128_make64((uint64_t)Vk->D(1)));
-}
-
-DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
-DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
-DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)),
-                          int128_make64((uint64_t)Vk->D(0)));
-}
-
-DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
-DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
-DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
-
-void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
-                          int128_make64((uint64_t)Vk->D(1)));
-}
-
-DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
-DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
-DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
-
-#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)             \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)           \
-{                                                                     \
-    int i;                                                            \
-    VReg *Vd = (VReg *)vd;                                            \
-    VReg *Vj = (VReg *)vj;                                            \
-    VReg *Vk = (VReg *)vk;                                            \
-    typedef __typeof(Vd->ES1(0)) TDS;                                 \
-    typedef __typeof(Vd->EU1(0)) TDU;                                 \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                               \
-        Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
-    }                                                                 \
-}
-
-#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)                      \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)                   \
-{                                                                             \
-    int i;                                                                    \
-    VReg *Vd = (VReg *)vd;                                                    \
-    VReg *Vj = (VReg *)vj;                                                    \
-    VReg *Vk = (VReg *)vk;                                                    \
-    typedef __typeof(Vd->ES1(0)) TDS;                                         \
-    typedef __typeof(Vd->EU1(0)) TDU;                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
-        Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
-    }                                                                         \
-}
-
-void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
-                          int128_makes64(Vk->D(0)));
-}
-
-DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
-DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
-DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-
-void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
-                          int128_makes64(Vk->D(1)));
-}
-
-DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
-DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
-DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-
-#define DO_VAVG(a, b)  ((a >> 1) + (b >> 1) + (a & b & 1))
-#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
-
-#define DO_3OP(NAME, BIT, E, DO_OP)                         \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));               \
-    }                                                       \
-}
-
-DO_3OP(vavg_b, 8, B, DO_VAVG)
-DO_3OP(vavg_h, 16, H, DO_VAVG)
-DO_3OP(vavg_w, 32, W, DO_VAVG)
-DO_3OP(vavg_d, 64, D, DO_VAVG)
-DO_3OP(vavgr_b, 8, B, DO_VAVGR)
-DO_3OP(vavgr_h, 16, H, DO_VAVGR)
-DO_3OP(vavgr_w, 32, W, DO_VAVGR)
-DO_3OP(vavgr_d, 64, D, DO_VAVGR)
-DO_3OP(vavg_bu, 8, UB, DO_VAVG)
-DO_3OP(vavg_hu, 16, UH, DO_VAVG)
-DO_3OP(vavg_wu, 32, UW, DO_VAVG)
-DO_3OP(vavg_du, 64, UD, DO_VAVG)
-DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
-DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
-DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
-DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
-
-#define DO_VABSD(a, b)  ((a > b) ? (a -b) : (b-a))
-
-DO_3OP(vabsd_b, 8, B, DO_VABSD)
-DO_3OP(vabsd_h, 16, H, DO_VABSD)
-DO_3OP(vabsd_w, 32, W, DO_VABSD)
-DO_3OP(vabsd_d, 64, D, DO_VABSD)
-DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
-DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
-DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
-DO_3OP(vabsd_du, 64, UD, DO_VABSD)
-
-#define DO_VABS(a)  ((a < 0) ? (-a) : (a))
-
-#define DO_VADDA(NAME, BIT, E, DO_OP)                       \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i));       \
-    }                                                       \
-}
-
-DO_VADDA(vadda_b, 8, B, DO_VABS)
-DO_VADDA(vadda_h, 16, H, DO_VABS)
-DO_VADDA(vadda_w, 32, W, DO_VABS)
-DO_VADDA(vadda_d, 64, D, DO_VABS)
-
-#define DO_MIN(a, b) (a < b ? a : b)
-#define DO_MAX(a, b) (a > b ? a : b)
-
-#define VMINMAXI(NAME, BIT, E, DO_OP)                           \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-    typedef __typeof(Vd->E(0)) TD;                              \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                    \
-    }                                                           \
-}
-
-VMINMAXI(vmini_b, 8, B, DO_MIN)
-VMINMAXI(vmini_h, 16, H, DO_MIN)
-VMINMAXI(vmini_w, 32, W, DO_MIN)
-VMINMAXI(vmini_d, 64, D, DO_MIN)
-VMINMAXI(vmaxi_b, 8, B, DO_MAX)
-VMINMAXI(vmaxi_h, 16, H, DO_MAX)
-VMINMAXI(vmaxi_w, 32, W, DO_MAX)
-VMINMAXI(vmaxi_d, 64, D, DO_MAX)
-VMINMAXI(vmini_bu, 8, UB, DO_MIN)
-VMINMAXI(vmini_hu, 16, UH, DO_MIN)
-VMINMAXI(vmini_wu, 32, UW, DO_MIN)
-VMINMAXI(vmini_du, 64, UD, DO_MIN)
-VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
-VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
-VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
-VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
-
-#define DO_VMUH(NAME, BIT, E1, E2, DO_OP)                   \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    typedef __typeof(Vd->E1(0)) T;                          \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
-    }                                                       \
-}
-
-void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    uint64_t l, h1, h2;
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    muls64(&l, &h1, Vj->D(0), Vk->D(0));
-    muls64(&l, &h2, Vj->D(1), Vk->D(1));
-
-    Vd->D(0) = h1;
-    Vd->D(1) = h2;
-}
-
-DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
-DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
-DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
-
-void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v)
-{
-    uint64_t l, h1, h2;
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-    VReg *Vk = (VReg *)vk;
-
-    mulu64(&l, &h1, Vj->D(0), Vk->D(0));
-    mulu64(&l, &h2, Vj->D(1), Vk->D(1));
-
-    Vd->D(0) = h1;
-    Vd->D(1) = h2;
-}
-
-DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
-DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
-DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
-
-#define DO_MUL(a, b) (a * b)
-
-DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
-DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
-DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
-
-DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
-DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
-DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
-
-DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
-DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
-DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
-
-DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
-DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
-DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
-
-DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define DO_MADD(a, b, c)  (a + b * c)
-#define DO_MSUB(a, b, c)  (a - b * c)
-
-#define VMADDSUB(NAME, BIT, E, DO_OP)                       \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i));     \
-    }                                                       \
-}
-
-VMADDSUB(vmadd_b, 8, B, DO_MADD)
-VMADDSUB(vmadd_h, 16, H, DO_MADD)
-VMADDSUB(vmadd_w, 32, W, DO_MADD)
-VMADDSUB(vmadd_d, 64, D, DO_MADD)
-VMADDSUB(vmsub_b, 8, B, DO_MSUB)
-VMADDSUB(vmsub_h, 16, H, DO_MSUB)
-VMADDSUB(vmsub_w, 32, W, DO_MSUB)
-VMADDSUB(vmsub_d, 64, D, DO_MSUB)
-
-#define VMADDWEV(NAME, BIT, E1, E2, DO_OP)                        \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)       \
-{                                                                 \
-    int i;                                                        \
-    VReg *Vd = (VReg *)vd;                                        \
-    VReg *Vj = (VReg *)vj;                                        \
-    VReg *Vk = (VReg *)vk;                                        \
-    typedef __typeof(Vd->E1(0)) TD;                               \
-                                                                  \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                           \
-        Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
-    }                                                             \
-}
-
-VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
-VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
-VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
-VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
-VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
-VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
-
-#define VMADDWOD(NAME, BIT, E1, E2, DO_OP)                  \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    typedef __typeof(Vd->E1(0)) TD;                         \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1),           \
-                           (TD)Vk->E2(2 * i + 1));          \
-    }                                                       \
-}
-
-VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
-VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
-VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
-VMADDWOD(vmaddwod_h_bu, 16,  UH, UB, DO_MUL)
-VMADDWOD(vmaddwod_w_hu, 32,  UW, UH, DO_MUL)
-VMADDWOD(vmaddwod_d_wu, 64,  UD, UW, DO_MUL)
-
-#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)  \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    typedef __typeof(Vd->ES1(0)) TS1;                       \
-    typedef __typeof(Vd->EU1(0)) TU1;                       \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i),            \
-                            (TS1)Vk->ES2(2 * i));           \
-    }                                                       \
-}
-
-VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)  \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-    typedef __typeof(Vd->ES1(0)) TS1;                       \
-    typedef __typeof(Vd->EU1(0)) TU1;                       \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1),         \
-                            (TS1)Vk->ES2(2 * i + 1));        \
-    }                                                       \
-}
-
-VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
-#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
-#define DO_DIV(N, M)  (unlikely(M == 0) ? 0 :\
-        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
-#define DO_REM(N, M)  (unlikely(M == 0) ? 0 :\
-        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
-
-#define VDIV(NAME, BIT, E, DO_OP)                           \
-void HELPER(NAME)(CPULoongArchState *env,                   \
-                  uint32_t vd, uint32_t vj, uint32_t vk)    \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = &(env->fpr[vd].vreg);                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                        \
-    VReg *Vk = &(env->fpr[vk].vreg);                        \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));               \
-    }                                                       \
-}
-
-VDIV(vdiv_b, 8, B, DO_DIV)
-VDIV(vdiv_h, 16, H, DO_DIV)
-VDIV(vdiv_w, 32, W, DO_DIV)
-VDIV(vdiv_d, 64, D, DO_DIV)
-VDIV(vdiv_bu, 8, UB, DO_DIVU)
-VDIV(vdiv_hu, 16, UH, DO_DIVU)
-VDIV(vdiv_wu, 32, UW, DO_DIVU)
-VDIV(vdiv_du, 64, UD, DO_DIVU)
-VDIV(vmod_b, 8, B, DO_REM)
-VDIV(vmod_h, 16, H, DO_REM)
-VDIV(vmod_w, 32, W, DO_REM)
-VDIV(vmod_d, 64, D, DO_REM)
-VDIV(vmod_bu, 8, UB, DO_REMU)
-VDIV(vmod_hu, 16, UH, DO_REMU)
-VDIV(vmod_wu, 32, UW, DO_REMU)
-VDIV(vmod_du, 64, UD, DO_REMU)
-
-#define VSAT_S(NAME, BIT, E)                                    \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-    typedef __typeof(Vd->E(0)) TD;                              \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max :               \
-                   Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i);    \
-    }                                                           \
-}
-
-VSAT_S(vsat_b, 8, B)
-VSAT_S(vsat_h, 16, H)
-VSAT_S(vsat_w, 32, W)
-VSAT_S(vsat_d, 64, D)
-
-#define VSAT_U(NAME, BIT, E)                                    \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-    typedef __typeof(Vd->E(0)) TD;                              \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i);     \
-    }                                                           \
-}
-
-VSAT_U(vsat_bu, 8, UB)
-VSAT_U(vsat_hu, 16, UH)
-VSAT_U(vsat_wu, 32, UW)
-VSAT_U(vsat_du, 64, UD)
-
-#define VEXTH(NAME, BIT, E1, E2)                                    \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                             \
-        Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT);                        \
-    }                                                               \
-}
-
-void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    Vd->Q(0) = int128_makes64(Vj->D(1));
-}
-
-void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
-}
-
-VEXTH(vexth_h_b, 16, H, B)
-VEXTH(vexth_w_h, 32, W, H)
-VEXTH(vexth_d_w, 64, D, W)
-VEXTH(vexth_hu_bu, 16, UH, UB)
-VEXTH(vexth_wu_hu, 32, UW, UH)
-VEXTH(vexth_du_wu, 64, UD, UW)
-
-#define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
-
-DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
-DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
-DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
-DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
-
-static uint64_t do_vmskltz_b(int64_t val)
-{
-    uint64_t m = 0x8080808080808080ULL;
-    uint64_t c =  val & m;
-    c |= c << 7;
-    c |= c << 14;
-    c |= c << 28;
-    return c >> 56;
-}
-
-void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp = do_vmskltz_b(Vj->D(0));
-    temp |= (do_vmskltz_b(Vj->D(1)) << 8);
-    Vd->D(0) = temp;
-    Vd->D(1) = 0;
-}
-
-static uint64_t do_vmskltz_h(int64_t val)
-{
-    uint64_t m = 0x8000800080008000ULL;
-    uint64_t c =  val & m;
-    c |= c << 15;
-    c |= c << 30;
-    return c >> 60;
-}
-
-void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp = do_vmskltz_h(Vj->D(0));
-    temp |= (do_vmskltz_h(Vj->D(1)) << 4);
-    Vd->D(0) = temp;
-    Vd->D(1) = 0;
-}
-
-static uint64_t do_vmskltz_w(int64_t val)
-{
-    uint64_t m = 0x8000000080000000ULL;
-    uint64_t c =  val & m;
-    c |= c << 31;
-    return c >> 62;
-}
-
-void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp = do_vmskltz_w(Vj->D(0));
-    temp |= (do_vmskltz_w(Vj->D(1)) << 2);
-    Vd->D(0) = temp;
-    Vd->D(1) = 0;
-}
-
-static uint64_t do_vmskltz_d(int64_t val)
-{
-    return (uint64_t)val >> 63;
-}
-void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp = do_vmskltz_d(Vj->D(0));
-    temp |= (do_vmskltz_d(Vj->D(1)) << 1);
-    Vd->D(0) = temp;
-    Vd->D(1) = 0;
-}
-
-void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp =  do_vmskltz_b(Vj->D(0));
-    temp |= (do_vmskltz_b(Vj->D(1)) << 8);
-    Vd->D(0) = (uint16_t)(~temp);
-    Vd->D(1) = 0;
-}
-
-static uint64_t do_vmskez_b(uint64_t a)
-{
-    uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
-    uint64_t c = ~(((a & m) + m) | a | m);
-    c |= c << 7;
-    c |= c << 14;
-    c |= c << 28;
-    return c >> 56;
-}
-
-void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    uint16_t temp = 0;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp = do_vmskez_b(Vj->D(0));
-    temp |= (do_vmskez_b(Vj->D(1)) << 8);
-    Vd->D(0) = (uint16_t)(~temp);
-    Vd->D(1) = 0;
-}
-
-void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
-{
-    int i;
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-
-    for (i = 0; i < LSX_LEN/8; i++) {
-        Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
-    }
-}
-
-#define VSLLWIL(NAME, BIT, E1, E2)                        \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i;                                                \
-    VReg temp;                                            \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-    typedef __typeof(temp.E1(0)) TD;                      \
-                                                          \
-    temp.D(0) = 0;                                        \
-    temp.D(1) = 0;                                        \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                   \
-        temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT);        \
-    }                                                     \
-    *Vd = temp;                                           \
-}
-
-void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    Vd->Q(0) = int128_makes64(Vj->D(0));
-}
-
-void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    Vd->Q(0) = int128_make64(Vj->D(0));
-}
-
-VSLLWIL(vsllwil_h_b, 16, H, B)
-VSLLWIL(vsllwil_w_h, 32, W, H)
-VSLLWIL(vsllwil_d_w, 64, D, W)
-VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
-VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
-VSLLWIL(vsllwil_du_wu, 64, UD, UW)
-
-#define do_vsrlr(E, T)                                  \
-static T do_vsrlr_ ##E(T s1, int sh)                    \
-{                                                       \
-    if (sh == 0) {                                      \
-        return s1;                                      \
-    } else {                                            \
-        return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
-    }                                                   \
-}
-
-do_vsrlr(B, uint8_t)
-do_vsrlr(H, uint16_t)
-do_vsrlr(W, uint32_t)
-do_vsrlr(D, uint64_t)
-
-#define VSRLR(NAME, BIT, T, E)                                  \
-void HELPER(NAME)(CPULoongArchState *env,                       \
-                  uint32_t vd, uint32_t vj, uint32_t vk)        \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = &(env->fpr[vd].vreg);                            \
-    VReg *Vj = &(env->fpr[vj].vreg);                            \
-    VReg *Vk = &(env->fpr[vk].vreg);                            \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
-    }                                                           \
-}
-
-VSRLR(vsrlr_b, 8,  uint8_t, B)
-VSRLR(vsrlr_h, 16, uint16_t, H)
-VSRLR(vsrlr_w, 32, uint32_t, W)
-VSRLR(vsrlr_d, 64, uint64_t, D)
-
-#define VSRLRI(NAME, BIT, E)                              \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i;                                                \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                   \
-        Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm);         \
-    }                                                     \
-}
-
-VSRLRI(vsrlri_b, 8, B)
-VSRLRI(vsrlri_h, 16, H)
-VSRLRI(vsrlri_w, 32, W)
-VSRLRI(vsrlri_d, 64, D)
-
-#define do_vsrar(E, T)                                  \
-static T do_vsrar_ ##E(T s1, int sh)                    \
-{                                                       \
-    if (sh == 0) {                                      \
-        return s1;                                      \
-    } else {                                            \
-        return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
-    }                                                   \
-}
-
-do_vsrar(B, int8_t)
-do_vsrar(H, int16_t)
-do_vsrar(W, int32_t)
-do_vsrar(D, int64_t)
-
-#define VSRAR(NAME, BIT, T, E)                                  \
-void HELPER(NAME)(CPULoongArchState *env,                       \
-                  uint32_t vd, uint32_t vj, uint32_t vk)        \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = &(env->fpr[vd].vreg);                            \
-    VReg *Vj = &(env->fpr[vj].vreg);                            \
-    VReg *Vk = &(env->fpr[vk].vreg);                            \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
-    }                                                           \
-}
-
-VSRAR(vsrar_b, 8,  uint8_t, B)
-VSRAR(vsrar_h, 16, uint16_t, H)
-VSRAR(vsrar_w, 32, uint32_t, W)
-VSRAR(vsrar_d, 64, uint64_t, D)
-
-#define VSRARI(NAME, BIT, E)                              \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i;                                                \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                   \
-        Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm);         \
-    }                                                     \
-}
-
-VSRARI(vsrari_b, 8, B)
-VSRARI(vsrari_h, 16, H)
-VSRARI(vsrari_w, 32, W)
-VSRARI(vsrari_d, 64, D)
-
-#define R_SHIFT(a, b) (a >> b)
-
-#define VSRLN(NAME, BIT, T, E1, E2)                             \
-void HELPER(NAME)(CPULoongArchState *env,                       \
-                  uint32_t vd, uint32_t vj, uint32_t vk)        \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = &(env->fpr[vd].vreg);                            \
-    VReg *Vj = &(env->fpr[vj].vreg);                            \
-    VReg *Vk = &(env->fpr[vk].vreg);                            \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
-    }                                                           \
-    Vd->D(1) = 0;                                               \
-}
-
-VSRLN(vsrln_b_h, 16, uint16_t, B, H)
-VSRLN(vsrln_h_w, 32, uint32_t, H, W)
-VSRLN(vsrln_w_d, 64, uint64_t, W, D)
-
-#define VSRAN(NAME, BIT, T, E1, E2)                           \
-void HELPER(NAME)(CPULoongArchState *env,                     \
-                  uint32_t vd, uint32_t vj, uint32_t vk)      \
-{                                                             \
-    int i;                                                    \
-    VReg *Vd = &(env->fpr[vd].vreg);                          \
-    VReg *Vj = &(env->fpr[vj].vreg);                          \
-    VReg *Vk = &(env->fpr[vk].vreg);                          \
-                                                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                       \
-        Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
-    }                                                         \
-    Vd->D(1) = 0;                                             \
-}
-
-VSRAN(vsran_b_h, 16, uint16_t, B, H)
-VSRAN(vsran_h_w, 32, uint32_t, H, W)
-VSRAN(vsran_w_d, 64, uint64_t, W, D)
-
-#define VSRLNI(NAME, BIT, T, E1, E2)                         \
-void HELPER(NAME)(CPULoongArchState *env,                    \
-                  uint32_t vd, uint32_t vj, uint32_t imm)    \
-{                                                            \
-    int i, max;                                              \
-    VReg temp;                                               \
-    VReg *Vd = &(env->fpr[vd].vreg);                         \
-    VReg *Vj = &(env->fpr[vj].vreg);                         \
-                                                             \
-    temp.D(0) = 0;                                           \
-    temp.D(1) = 0;                                           \
-    max = LSX_LEN/BIT;                                       \
-    for (i = 0; i < max; i++) {                              \
-        temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm);             \
-        temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm);       \
-    }                                                        \
-    *Vd = temp;                                              \
-}
-
-void HELPER(vsrlni_d_q)(CPULoongArchState *env,
-                        uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp.D(0) = 0;
-    temp.D(1) = 0;
-    temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128));
-    temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128));
-    *Vd = temp;
-}
-
-VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
-VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
-VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
-
-#define VSRANI(NAME, BIT, E1, E2)                         \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i, max;                                           \
-    VReg temp;                                            \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    temp.D(0) = 0;                                        \
-    temp.D(1) = 0;                                        \
-    max = LSX_LEN/BIT;                                    \
-    for (i = 0; i < max; i++) {                           \
-        temp.E1(i) = R_SHIFT(Vj->E2(i), imm);             \
-        temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm);       \
-    }                                                     \
-    *Vd = temp;                                           \
-}
-
-void HELPER(vsrani_d_q)(CPULoongArchState *env,
-                        uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp.D(0) = 0;
-    temp.D(1) = 0;
-    temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128));
-    temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128));
-    *Vd = temp;
-}
-
-VSRANI(vsrani_b_h, 16, B, H)
-VSRANI(vsrani_h_w, 32, H, W)
-VSRANI(vsrani_w_d, 64, W, D)
-
-#define VSRLRN(NAME, BIT, T, E1, E2)                                \
-void HELPER(NAME)(CPULoongArchState *env,                           \
-                  uint32_t vd, uint32_t vj, uint32_t vk)            \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-    VReg *Vk = &(env->fpr[vk].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                             \
-        Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
-    }                                                               \
-    Vd->D(1) = 0;                                                   \
-}
-
-VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
-VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
-VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
-
-#define VSRARN(NAME, BIT, T, E1, E2)                                \
-void HELPER(NAME)(CPULoongArchState *env,                           \
-                  uint32_t vd, uint32_t vj, uint32_t vk)            \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-    VReg *Vk = &(env->fpr[vk].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                             \
-        Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
-    }                                                               \
-    Vd->D(1) = 0;                                                   \
-}
-
-VSRARN(vsrarn_b_h, 16, uint8_t,  B, H)
-VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
-VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
-
-#define VSRLRNI(NAME, BIT, E1, E2)                          \
-void HELPER(NAME)(CPULoongArchState *env,                   \
-                  uint32_t vd, uint32_t vj, uint32_t imm)   \
-{                                                           \
-    int i, max;                                             \
-    VReg temp;                                              \
-    VReg *Vd = &(env->fpr[vd].vreg);                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                        \
-                                                            \
-    temp.D(0) = 0;                                          \
-    temp.D(1) = 0;                                          \
-    max = LSX_LEN/BIT;                                      \
-    for (i = 0; i < max; i++) {                             \
-        temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm);       \
-        temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
-    }                                                       \
-    *Vd = temp;                                             \
-}
-
-void HELPER(vsrlrni_d_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    Int128 r1, r2;
-
-    if (imm == 0) {
-        temp.D(0) = int128_getlo(Vj->Q(0));
-        temp.D(1) = int128_getlo(Vd->Q(0));
-    } else {
-        r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
-        r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
-
-       temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
-       temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
-    }
-    *Vd = temp;
-}
-
-VSRLRNI(vsrlrni_b_h, 16, B, H)
-VSRLRNI(vsrlrni_h_w, 32, H, W)
-VSRLRNI(vsrlrni_w_d, 64, W, D)
-
-#define VSRARNI(NAME, BIT, E1, E2)                          \
-void HELPER(NAME)(CPULoongArchState *env,                   \
-                  uint32_t vd, uint32_t vj, uint32_t imm)   \
-{                                                           \
-    int i, max;                                             \
-    VReg temp;                                              \
-    VReg *Vd = &(env->fpr[vd].vreg);                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                        \
-                                                            \
-    temp.D(0) = 0;                                          \
-    temp.D(1) = 0;                                          \
-    max = LSX_LEN/BIT;                                      \
-    for (i = 0; i < max; i++) {                             \
-        temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm);       \
-        temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
-    }                                                       \
-    *Vd = temp;                                             \
-}
-
-void HELPER(vsrarni_d_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    Int128 r1, r2;
-
-    if (imm == 0) {
-        temp.D(0) = int128_getlo(Vj->Q(0));
-        temp.D(1) = int128_getlo(Vd->Q(0));
-    } else {
-        r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
-        r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
-
-       temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
-       temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
-    }
-    *Vd = temp;
-}
-
-VSRARNI(vsrarni_b_h, 16, B, H)
-VSRARNI(vsrarni_h_w, 32, H, W)
-VSRARNI(vsrarni_w_d, 64, W, D)
-
-#define SSRLNS(NAME, T1, T2, T3)                    \
-static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
-{                                                   \
-        T1 shft_res;                                \
-        if (sa == 0) {                              \
-            shft_res = e2;                          \
-        } else {                                    \
-            shft_res = (((T1)e2) >> sa);            \
-        }                                           \
-        T3 mask;                                    \
-        mask = (1ull << sh) -1;                     \
-        if (shft_res > mask) {                      \
-            return mask;                            \
-        } else {                                    \
-            return  shft_res;                       \
-        }                                           \
-}
-
-SSRLNS(B, uint16_t, int16_t, uint8_t)
-SSRLNS(H, uint32_t, int32_t, uint16_t)
-SSRLNS(W, uint64_t, int64_t, uint32_t)
-
-#define VSSRLN(NAME, BIT, T, E1, E2)                                          \
-void HELPER(NAME)(CPULoongArchState *env,                                     \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                      \
-{                                                                             \
-    int i;                                                                    \
-    VReg *Vd = &(env->fpr[vd].vreg);                                          \
-    VReg *Vj = &(env->fpr[vj].vreg);                                          \
-    VReg *Vk = &(env->fpr[vk].vreg);                                          \
-                                                                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
-        Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
-    }                                                                         \
-    Vd->D(1) = 0;                                                             \
-}
-
-VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
-VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
-VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
-
-#define SSRANS(E, T1, T2)                        \
-static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
-{                                                \
-        T1 shft_res;                             \
-        if (sa == 0) {                           \
-            shft_res = e2;                       \
-        } else {                                 \
-            shft_res = e2 >> sa;                 \
-        }                                        \
-        T2 mask;                                 \
-        mask = (1ll << sh) -1;                   \
-        if (shft_res > mask) {                   \
-            return  mask;                        \
-        } else if (shft_res < -(mask +1)) {      \
-            return  ~mask;                       \
-        } else {                                 \
-            return shft_res;                     \
-        }                                        \
-}
-
-SSRANS(B, int16_t, int8_t)
-SSRANS(H, int32_t, int16_t)
-SSRANS(W, int64_t, int32_t)
-
-#define VSSRAN(NAME, BIT, T, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                    \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                     \
-{                                                                            \
-    int i;                                                                   \
-    VReg *Vd = &(env->fpr[vd].vreg);                                         \
-    VReg *Vj = &(env->fpr[vj].vreg);                                         \
-    VReg *Vk = &(env->fpr[vk].vreg);                                         \
-                                                                             \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
-        Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
-    }                                                                        \
-    Vd->D(1) = 0;                                                            \
-}
-
-VSSRAN(vssran_b_h, 16, uint16_t, B, H)
-VSSRAN(vssran_h_w, 32, uint32_t, H, W)
-VSSRAN(vssran_w_d, 64, uint64_t, W, D)
-
-#define SSRLNU(E, T1, T2, T3)                    \
-static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
-{                                                \
-        T1 shft_res;                             \
-        if (sa == 0) {                           \
-            shft_res = e2;                       \
-        } else {                                 \
-            shft_res = (((T1)e2) >> sa);         \
-        }                                        \
-        T2 mask;                                 \
-        mask = (1ull << sh) -1;                  \
-        if (shft_res > mask) {                   \
-            return mask;                         \
-        } else {                                 \
-            return shft_res;                     \
-        }                                        \
-}
-
-SSRLNU(B, uint16_t, uint8_t,  int16_t)
-SSRLNU(H, uint32_t, uint16_t, int32_t)
-SSRLNU(W, uint64_t, uint32_t, int64_t)
-
-#define VSSRLNU(NAME, BIT, T, E1, E2)                                     \
-void HELPER(NAME)(CPULoongArchState *env,                                 \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                  \
-{                                                                         \
-    int i;                                                                \
-    VReg *Vd = &(env->fpr[vd].vreg);                                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                                      \
-    VReg *Vk = &(env->fpr[vk].vreg);                                      \
-                                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
-        Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
-    }                                                                     \
-    Vd->D(1) = 0;                                                         \
-}
-
-VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
-VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
-VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
-
-#define SSRANU(E, T1, T2, T3)                    \
-static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
-{                                                \
-        T1 shft_res;                             \
-        if (sa == 0) {                           \
-            shft_res = e2;                       \
-        } else {                                 \
-            shft_res = e2 >> sa;                 \
-        }                                        \
-        if (e2 < 0) {                            \
-            shft_res = 0;                        \
-        }                                        \
-        T2 mask;                                 \
-        mask = (1ull << sh) -1;                  \
-        if (shft_res > mask) {                   \
-            return mask;                         \
-        } else {                                 \
-            return shft_res;                     \
-        }                                        \
-}
-
-SSRANU(B, uint16_t, uint8_t,  int16_t)
-SSRANU(H, uint32_t, uint16_t, int32_t)
-SSRANU(W, uint64_t, uint32_t, int64_t)
-
-#define VSSRANU(NAME, BIT, T, E1, E2)                                     \
-void HELPER(NAME)(CPULoongArchState *env,                                 \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                  \
-{                                                                         \
-    int i;                                                                \
-    VReg *Vd = &(env->fpr[vd].vreg);                                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                                      \
-    VReg *Vk = &(env->fpr[vk].vreg);                                      \
-                                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
-        Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
-    }                                                                     \
-    Vd->D(1) = 0;                                                         \
-}
-
-VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
-VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
-VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
-
-#define VSSRLNI(NAME, BIT, E1, E2)                                            \
-void HELPER(NAME)(CPULoongArchState *env,                                     \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                     \
-{                                                                             \
-    int i;                                                                    \
-    VReg temp;                                                                \
-    VReg *Vd = &(env->fpr[vd].vreg);                                          \
-    VReg *Vj = &(env->fpr[vj].vreg);                                          \
-                                                                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
-        temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
-    }                                                                         \
-    *Vd = temp;                                                               \
-}
-
-void HELPER(vssrlni_d_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        shft_res1 = int128_urshift(Vj->Q(0), imm);
-        shft_res2 = int128_urshift(Vd->Q(0), imm);
-    }
-    mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
-
-    if (int128_ult(mask, shft_res1)) {
-        Vd->D(0) = int128_getlo(mask);
-    }else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_ult(mask, shft_res2)) {
-        Vd->D(1) = int128_getlo(mask);
-    }else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRLNI(vssrlni_b_h, 16, B, H)
-VSSRLNI(vssrlni_h_w, 32, H, W)
-VSSRLNI(vssrlni_w_d, 64, W, D)
-
-#define VSSRANI(NAME, BIT, E1, E2)                                             \
-void HELPER(NAME)(CPULoongArchState *env,                                      \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                      \
-{                                                                              \
-    int i;                                                                     \
-    VReg temp;                                                                 \
-    VReg *Vd = &(env->fpr[vd].vreg);                                           \
-    VReg *Vj = &(env->fpr[vj].vreg);                                           \
-                                                                               \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
-        temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
-    }                                                                          \
-    *Vd = temp;                                                                \
-}
-
-void HELPER(vssrani_d_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask, min;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        shft_res1 = int128_rshift(Vj->Q(0), imm);
-        shft_res2 = int128_rshift(Vd->Q(0), imm);
-    }
-    mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
-    min  = int128_lshift(int128_one(), 63);
-
-    if (int128_gt(shft_res1,  mask)) {
-        Vd->D(0) = int128_getlo(mask);
-    } else if (int128_lt(shft_res1, int128_neg(min))) {
-        Vd->D(0) = int128_getlo(min);
-    } else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_gt(shft_res2, mask)) {
-        Vd->D(1) = int128_getlo(mask);
-    } else if (int128_lt(shft_res2, int128_neg(min))) {
-        Vd->D(1) = int128_getlo(min);
-    } else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRANI(vssrani_b_h, 16, B, H)
-VSSRANI(vssrani_h_w, 32, H, W)
-VSSRANI(vssrani_w_d, 64, W, D)
-
-#define VSSRLNUI(NAME, BIT, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                   \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                   \
-{                                                                           \
-    int i;                                                                  \
-    VReg temp;                                                              \
-    VReg *Vd = &(env->fpr[vd].vreg);                                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                                        \
-                                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
-        temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
-    }                                                                       \
-    *Vd = temp;                                                             \
-}
-
-void HELPER(vssrlni_du_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        shft_res1 = int128_urshift(Vj->Q(0), imm);
-        shft_res2 = int128_urshift(Vd->Q(0), imm);
-    }
-    mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
-    if (int128_ult(mask, shft_res1)) {
-        Vd->D(0) = int128_getlo(mask);
-    }else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_ult(mask, shft_res2)) {
-        Vd->D(1) = int128_getlo(mask);
-    }else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRLNUI(vssrlni_bu_h, 16, B, H)
-VSSRLNUI(vssrlni_hu_w, 32, H, W)
-VSSRLNUI(vssrlni_wu_d, 64, W, D)
-
-#define VSSRANUI(NAME, BIT, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                   \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                   \
-{                                                                           \
-    int i;                                                                  \
-    VReg temp;                                                              \
-    VReg *Vd = &(env->fpr[vd].vreg);                                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                                        \
-                                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
-        temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
-    }                                                                       \
-    *Vd = temp;                                                             \
-}
-
-void HELPER(vssrani_du_q)(CPULoongArchState *env,
-                         uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        shft_res1 = int128_rshift(Vj->Q(0), imm);
-        shft_res2 = int128_rshift(Vd->Q(0), imm);
-    }
-
-    if (int128_lt(Vj->Q(0), int128_zero())) {
-        shft_res1 = int128_zero();
-    }
-
-    if (int128_lt(Vd->Q(0), int128_zero())) {
-        shft_res2 = int128_zero();
-    }
-
-    mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
-    if (int128_ult(mask, shft_res1)) {
-        Vd->D(0) = int128_getlo(mask);
-    }else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_ult(mask, shft_res2)) {
-        Vd->D(1) = int128_getlo(mask);
-    }else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRANUI(vssrani_bu_h, 16, B, H)
-VSSRANUI(vssrani_hu_w, 32, H, W)
-VSSRANUI(vssrani_wu_d, 64, W, D)
-
-#define SSRLRNS(E1, E2, T1, T2, T3)                \
-static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
-{                                                  \
-    T1 shft_res;                                   \
-                                                   \
-    shft_res = do_vsrlr_ ## E2(e2, sa);            \
-    T1 mask;                                       \
-    mask = (1ull << sh) -1;                        \
-    if (shft_res > mask) {                         \
-        return mask;                               \
-    } else {                                       \
-        return  shft_res;                          \
-    }                                              \
-}
-
-SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
-SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
-SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
-
-#define VSSRLRN(NAME, BIT, T, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                     \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                      \
-{                                                                             \
-    int i;                                                                    \
-    VReg *Vd = &(env->fpr[vd].vreg);                                          \
-    VReg *Vj = &(env->fpr[vj].vreg);                                          \
-    VReg *Vk = &(env->fpr[vk].vreg);                                          \
-                                                                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
-        Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
-    }                                                                         \
-    Vd->D(1) = 0;                                                             \
-}
-
-VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
-VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
-VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
-
-#define SSRARNS(E1, E2, T1, T2)                    \
-static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
-{                                                  \
-    T1 shft_res;                                   \
-                                                   \
-    shft_res = do_vsrar_ ## E2(e2, sa);            \
-    T2 mask;                                       \
-    mask = (1ll << sh) -1;                         \
-    if (shft_res > mask) {                         \
-        return  mask;                              \
-    } else if (shft_res < -(mask +1)) {            \
-        return  ~mask;                             \
-    } else {                                       \
-        return shft_res;                           \
-    }                                              \
-}
-
-SSRARNS(B, H, int16_t, int8_t)
-SSRARNS(H, W, int32_t, int16_t)
-SSRARNS(W, D, int64_t, int32_t)
-
-#define VSSRARN(NAME, BIT, T, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                     \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                      \
-{                                                                             \
-    int i;                                                                    \
-    VReg *Vd = &(env->fpr[vd].vreg);                                          \
-    VReg *Vj = &(env->fpr[vj].vreg);                                          \
-    VReg *Vk = &(env->fpr[vk].vreg);                                          \
-                                                                              \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
-        Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
-    }                                                                         \
-    Vd->D(1) = 0;                                                             \
-}
-
-VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
-VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
-VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
-
-#define SSRLRNU(E1, E2, T1, T2, T3)                \
-static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
-{                                                  \
-    T1 shft_res;                                   \
-                                                   \
-    shft_res = do_vsrlr_ ## E2(e2, sa);            \
-                                                   \
-    T2 mask;                                       \
-    mask = (1ull << sh) -1;                        \
-    if (shft_res > mask) {                         \
-        return mask;                               \
-    } else {                                       \
-        return shft_res;                           \
-    }                                              \
-}
-
-SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
-SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
-SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
-
-#define VSSRLRNU(NAME, BIT, T, E1, E2)                                     \
-void HELPER(NAME)(CPULoongArchState *env,                                  \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                   \
-{                                                                          \
-    int i;                                                                 \
-    VReg *Vd = &(env->fpr[vd].vreg);                                       \
-    VReg *Vj = &(env->fpr[vj].vreg);                                       \
-    VReg *Vk = &(env->fpr[vk].vreg);                                       \
-                                                                           \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
-        Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
-    }                                                                      \
-    Vd->D(1) = 0;                                                          \
-}
-
-VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
-VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
-VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
-
-#define SSRARNU(E1, E2, T1, T2, T3)                \
-static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
-{                                                  \
-    T1 shft_res;                                   \
-                                                   \
-    if (e2 < 0) {                                  \
-        shft_res = 0;                              \
-    } else {                                       \
-        shft_res = do_vsrar_ ## E2(e2, sa);        \
-    }                                              \
-    T2 mask;                                       \
-    mask = (1ull << sh) -1;                        \
-    if (shft_res > mask) {                         \
-        return mask;                               \
-    } else {                                       \
-        return shft_res;                           \
-    }                                              \
-}
-
-SSRARNU(B, H, uint16_t, uint8_t, int16_t)
-SSRARNU(H, W, uint32_t, uint16_t, int32_t)
-SSRARNU(W, D, uint64_t, uint32_t, int64_t)
-
-#define VSSRARNU(NAME, BIT, T, E1, E2)                                     \
-void HELPER(NAME)(CPULoongArchState *env,                                  \
-                  uint32_t vd, uint32_t vj, uint32_t vk)                   \
-{                                                                          \
-    int i;                                                                 \
-    VReg *Vd = &(env->fpr[vd].vreg);                                       \
-    VReg *Vj = &(env->fpr[vj].vreg);                                       \
-    VReg *Vk = &(env->fpr[vk].vreg);                                       \
-                                                                           \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
-        Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
-    }                                                                      \
-    Vd->D(1) = 0;                                                          \
-}
-
-VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
-VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
-VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
-
-#define VSSRLRNI(NAME, BIT, E1, E2)                                            \
-void HELPER(NAME)(CPULoongArchState *env,                                      \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                      \
-{                                                                              \
-    int i;                                                                     \
-    VReg temp;                                                                 \
-    VReg *Vd = &(env->fpr[vd].vreg);                                           \
-    VReg *Vj = &(env->fpr[vj].vreg);                                           \
-                                                                               \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
-        temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
-    }                                                                          \
-    *Vd = temp;                                                                \
-}
-
-#define VSSRLRNI_Q(NAME, sh)                                               \
-void HELPER(NAME)(CPULoongArchState *env,                                  \
-                          uint32_t vd, uint32_t vj, uint32_t imm)          \
-{                                                                          \
-    Int128 shft_res1, shft_res2, mask, r1, r2;                             \
-    VReg *Vd = &(env->fpr[vd].vreg);                                       \
-    VReg *Vj = &(env->fpr[vj].vreg);                                       \
-                                                                           \
-    if (imm == 0) {                                                        \
-        shft_res1 = Vj->Q(0);                                              \
-        shft_res2 = Vd->Q(0);                                              \
-    } else {                                                               \
-        r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
-        r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
-                                                                           \
-        shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1));       \
-        shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2));       \
-    }                                                                      \
-                                                                           \
-    mask = int128_sub(int128_lshift(int128_one(), sh), int128_one());      \
-                                                                           \
-    if (int128_ult(mask, shft_res1)) {                                     \
-        Vd->D(0) = int128_getlo(mask);                                     \
-    }else {                                                                \
-        Vd->D(0) = int128_getlo(shft_res1);                                \
-    }                                                                      \
-                                                                           \
-    if (int128_ult(mask, shft_res2)) {                                     \
-        Vd->D(1) = int128_getlo(mask);                                     \
-    }else {                                                                \
-        Vd->D(1) = int128_getlo(shft_res2);                                \
-    }                                                                      \
-}
-
-VSSRLRNI(vssrlrni_b_h, 16, B, H)
-VSSRLRNI(vssrlrni_h_w, 32, H, W)
-VSSRLRNI(vssrlrni_w_d, 64, W, D)
-VSSRLRNI_Q(vssrlrni_d_q, 63)
-
-#define VSSRARNI(NAME, BIT, E1, E2)                                             \
-void HELPER(NAME)(CPULoongArchState *env,                                       \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                       \
-{                                                                               \
-    int i;                                                                      \
-    VReg temp;                                                                  \
-    VReg *Vd = &(env->fpr[vd].vreg);                                            \
-    VReg *Vj = &(env->fpr[vj].vreg);                                            \
-                                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                         \
-        temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
-    }                                                                           \
-    *Vd = temp;                                                                 \
-}
-
-void HELPER(vssrarni_d_q)(CPULoongArchState *env,
-                          uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
-        r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
-
-        shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
-        shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
-    }
-
-    mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
-    mask2  = int128_lshift(int128_one(), 63);
-
-    if (int128_gt(shft_res1,  mask1)) {
-        Vd->D(0) = int128_getlo(mask1);
-    } else if (int128_lt(shft_res1, int128_neg(mask2))) {
-        Vd->D(0) = int128_getlo(mask2);
-    } else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_gt(shft_res2, mask1)) {
-        Vd->D(1) = int128_getlo(mask1);
-    } else if (int128_lt(shft_res2, int128_neg(mask2))) {
-        Vd->D(1) = int128_getlo(mask2);
-    } else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRARNI(vssrarni_b_h, 16, B, H)
-VSSRARNI(vssrarni_h_w, 32, H, W)
-VSSRARNI(vssrarni_w_d, 64, W, D)
-
-#define VSSRLRNUI(NAME, BIT, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                    \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                    \
-{                                                                            \
-    int i;                                                                   \
-    VReg temp;                                                               \
-    VReg *Vd = &(env->fpr[vd].vreg);                                         \
-    VReg *Vj = &(env->fpr[vj].vreg);                                         \
-                                                                             \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
-        temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
-    }                                                                        \
-    *Vd = temp;                                                              \
-}
-
-VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
-VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
-VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
-VSSRLRNI_Q(vssrlrni_du_q, 64)
-
-#define VSSRARNUI(NAME, BIT, E1, E2)                                         \
-void HELPER(NAME)(CPULoongArchState *env,                                    \
-                  uint32_t vd, uint32_t vj, uint32_t imm)                    \
-{                                                                            \
-    int i;                                                                   \
-    VReg temp;                                                               \
-    VReg *Vd = &(env->fpr[vd].vreg);                                         \
-    VReg *Vj = &(env->fpr[vj].vreg);                                         \
-                                                                             \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
-        temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
-        temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
-    }                                                                        \
-    *Vd = temp;                                                              \
-}
-
-void HELPER(vssrarni_du_q)(CPULoongArchState *env,
-                           uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    if (imm == 0) {
-        shft_res1 = Vj->Q(0);
-        shft_res2 = Vd->Q(0);
-    } else {
-        r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
-        r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
-
-        shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
-        shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
-    }
-
-    if (int128_lt(Vj->Q(0), int128_zero())) {
-        shft_res1 = int128_zero();
-    }
-    if (int128_lt(Vd->Q(0), int128_zero())) {
-        shft_res2 = int128_zero();
-    }
-
-    mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-    mask2  = int128_lshift(int128_one(), 64);
-
-    if (int128_gt(shft_res1,  mask1)) {
-        Vd->D(0) = int128_getlo(mask1);
-    } else if (int128_lt(shft_res1, int128_neg(mask2))) {
-        Vd->D(0) = int128_getlo(mask2);
-    } else {
-        Vd->D(0) = int128_getlo(shft_res1);
-    }
-
-    if (int128_gt(shft_res2, mask1)) {
-        Vd->D(1) = int128_getlo(mask1);
-    } else if (int128_lt(shft_res2, int128_neg(mask2))) {
-        Vd->D(1) = int128_getlo(mask2);
-    } else {
-        Vd->D(1) = int128_getlo(shft_res2);
-    }
-}
-
-VSSRARNUI(vssrarni_bu_h, 16, B, H)
-VSSRARNUI(vssrarni_hu_w, 32, H, W)
-VSSRARNUI(vssrarni_wu_d, 64, W, D)
-
-#define DO_2OP(NAME, BIT, E, DO_OP)                                 \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++)                               \
-    {                                                               \
-        Vd->E(i) = DO_OP(Vj->E(i));                                 \
-    }                                                               \
-}
-
-#define DO_CLO_B(N)  (clz32(~N & 0xff) - 24)
-#define DO_CLO_H(N)  (clz32(~N & 0xffff) - 16)
-#define DO_CLO_W(N)  (clz32(~N))
-#define DO_CLO_D(N)  (clz64(~N))
-#define DO_CLZ_B(N)  (clz32(N) - 24)
-#define DO_CLZ_H(N)  (clz32(N) - 16)
-#define DO_CLZ_W(N)  (clz32(N))
-#define DO_CLZ_D(N)  (clz64(N))
-
-DO_2OP(vclo_b, 8, UB, DO_CLO_B)
-DO_2OP(vclo_h, 16, UH, DO_CLO_H)
-DO_2OP(vclo_w, 32, UW, DO_CLO_W)
-DO_2OP(vclo_d, 64, UD, DO_CLO_D)
-DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
-DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
-DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
-DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
-
-#define VPCNT(NAME, BIT, E, FN)                                     \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++)                               \
-    {                                                               \
-        Vd->E(i) = FN(Vj->E(i));                                    \
-    }                                                               \
-}
-
-VPCNT(vpcnt_b, 8, UB, ctpop8)
-VPCNT(vpcnt_h, 16, UH, ctpop16)
-VPCNT(vpcnt_w, 32, UW, ctpop32)
-VPCNT(vpcnt_d, 64, UD, ctpop64)
-
-#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
-#define DO_BITSET(a, bit) (a | 1ull << bit)
-#define DO_BITREV(a, bit) (a ^ (1ull << bit))
-
-#define DO_BIT(NAME, BIT, E, DO_OP)                         \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT);           \
-    }                                                       \
-}
-
-DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
-DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
-DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
-DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
-DO_BIT(vbitset_b, 8, UB, DO_BITSET)
-DO_BIT(vbitset_h, 16, UH, DO_BITSET)
-DO_BIT(vbitset_w, 32, UW, DO_BITSET)
-DO_BIT(vbitset_d, 64, UD, DO_BITSET)
-DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
-DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
-DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
-DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
-
-#define DO_BITI(NAME, BIT, E, DO_OP)                            \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = DO_OP(Vj->E(i), imm);                        \
-    }                                                           \
-}
-
-DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
-DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
-DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
-DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
-DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
-DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
-DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
-DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
-DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
-DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
-DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
-DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
-
-#define VFRSTP(NAME, BIT, MASK, E)                       \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i, m;                                            \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        if (Vj->E(i) < 0) {                              \
-            break;                                       \
-        }                                                \
-    }                                                    \
-    m = Vk->E(0) & MASK;                                 \
-    Vd->E(m) = i;                                        \
-}
-
-VFRSTP(vfrstp_b, 8, 0xf, B)
-VFRSTP(vfrstp_h, 16, 0x7, H)
-
-#define VFRSTPI(NAME, BIT, E)                             \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i, m;                                             \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                   \
-        if (Vj->E(i) < 0) {                               \
-            break;                                        \
-        }                                                 \
-    }                                                     \
-    m = imm % (LSX_LEN/BIT);                              \
-    Vd->E(m) = i;                                         \
-}
-
-VFRSTPI(vfrstpi_b, 8,  B)
-VFRSTPI(vfrstpi_h, 16, H)
-
-static void vec_update_fcsr0_mask(CPULoongArchState *env,
-                                  uintptr_t pc, int mask)
-{
-    int flags = get_float_exception_flags(&env->fp_status);
-
-    set_float_exception_flags(0, &env->fp_status);
-
-    flags &= ~mask;
-
-    if (flags) {
-        flags = ieee_ex_to_loongarch(flags);
-        UPDATE_FP_CAUSE(env->fcsr0, flags);
-    }
-
-    if (GET_FP_ENABLES(env->fcsr0) & flags) {
-        do_raise_exception(env, EXCCODE_FPE, pc);
-    } else {
-        UPDATE_FP_FLAGS(env->fcsr0, flags);
-    }
-}
-
-static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
-{
-    vec_update_fcsr0_mask(env, pc, 0);
-}
-
-static inline void vec_clear_cause(CPULoongArchState *env)
-{
-    SET_FP_CAUSE(env->fcsr0, 0);
-}
-
-#define DO_3OP_F(NAME, BIT, E, FN)                          \
-void HELPER(NAME)(CPULoongArchState *env,                   \
-                  uint32_t vd, uint32_t vj, uint32_t vk)    \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = &(env->fpr[vd].vreg);                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                        \
-    VReg *Vk = &(env->fpr[vk].vreg);                        \
-                                                            \
-    vec_clear_cause(env);                                   \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
-        vec_update_fcsr0(env, GETPC());                     \
-    }                                                       \
-}
-
-DO_3OP_F(vfadd_s, 32, UW, float32_add)
-DO_3OP_F(vfadd_d, 64, UD, float64_add)
-DO_3OP_F(vfsub_s, 32, UW, float32_sub)
-DO_3OP_F(vfsub_d, 64, UD, float64_sub)
-DO_3OP_F(vfmul_s, 32, UW, float32_mul)
-DO_3OP_F(vfmul_d, 64, UD, float64_mul)
-DO_3OP_F(vfdiv_s, 32, UW, float32_div)
-DO_3OP_F(vfdiv_d, 64, UD, float64_div)
-DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
-DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
-DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
-DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
-DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
-DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
-DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
-DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
-
-#define DO_4OP_F(NAME, BIT, E, FN, flags)                                    \
-void HELPER(NAME)(CPULoongArchState *env,                                    \
-                  uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)        \
-{                                                                            \
-    int i;                                                                   \
-    VReg *Vd = &(env->fpr[vd].vreg);                                         \
-    VReg *Vj = &(env->fpr[vj].vreg);                                         \
-    VReg *Vk = &(env->fpr[vk].vreg);                                         \
-    VReg *Va = &(env->fpr[va].vreg);                                         \
-                                                                             \
-    vec_clear_cause(env);                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
-        Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
-        vec_update_fcsr0(env, GETPC());                                      \
-    }                                                                        \
-}
-
-DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
-DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
-DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
-DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
-DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
-DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
-DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
-         float_muladd_negate_c | float_muladd_negate_result)
-DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
-         float_muladd_negate_c | float_muladd_negate_result)
-
-#define DO_2OP_F(NAME, BIT, E, FN)                                  \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    vec_clear_cause(env);                                           \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                             \
-        Vd->E(i) = FN(env, Vj->E(i));                               \
-    }                                                               \
-}
-
-#define FLOGB(BIT, T)                                            \
-static T do_flogb_## BIT(CPULoongArchState *env, T fj)           \
-{                                                                \
-    T fp, fd;                                                    \
-    float_status *status = &env->fp_status;                      \
-    FloatRoundMode old_mode = get_float_rounding_mode(status);   \
-                                                                 \
-    set_float_rounding_mode(float_round_down, status);           \
-    fp = float ## BIT ##_log2(fj, status);                       \
-    fd = float ## BIT ##_round_to_int(fp, status);               \
-    set_float_rounding_mode(old_mode, status);                   \
-    vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact);     \
-    return fd;                                                   \
-}
-
-FLOGB(32, uint32_t)
-FLOGB(64, uint64_t)
-
-#define FCLASS(NAME, BIT, E, FN)                                    \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                             \
-        Vd->E(i) = FN(env, Vj->E(i));                               \
-    }                                                               \
-}
-
-FCLASS(vfclass_s, 32, UW, helper_fclass_s)
-FCLASS(vfclass_d, 64, UD, helper_fclass_d)
-
-#define FSQRT(BIT, T)                                  \
-static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
-{                                                      \
-    T fd;                                              \
-    fd = float ## BIT ##_sqrt(fj, &env->fp_status);    \
-    vec_update_fcsr0(env, GETPC());                    \
-    return fd;                                         \
-}
-
-FSQRT(32, uint32_t)
-FSQRT(64, uint64_t)
-
-#define FRECIP(BIT, T)                                                  \
-static T do_frecip_## BIT(CPULoongArchState *env, T fj)                 \
-{                                                                       \
-    T fd;                                                               \
-    fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
-    vec_update_fcsr0(env, GETPC());                                     \
-    return fd;                                                          \
-}
-
-FRECIP(32, uint32_t)
-FRECIP(64, uint64_t)
-
-#define FRSQRT(BIT, T)                                                  \
-static T do_frsqrt_## BIT(CPULoongArchState *env, T fj)                 \
-{                                                                       \
-    T fd, fp;                                                           \
-    fp = float ## BIT ##_sqrt(fj, &env->fp_status);                     \
-    fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
-    vec_update_fcsr0(env, GETPC());                                     \
-    return fd;                                                          \
-}
-
-FRSQRT(32, uint32_t)
-FRSQRT(64, uint64_t)
-
-DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
-DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
-DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
-DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
-DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
-DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
-DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
-DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
-
-static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
-{
-    return float16_to_float32(h, true, status);
-}
-static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
-{
-    return float32_to_float64(s, status);
-}
-
-static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
-{
-    return float32_to_float16(s, true, status);
-}
-static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
-{
-    return float64_to_float32(d, status);
-}
-
-void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < LSX_LEN/32; i++) {
-        temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < LSX_LEN/64; i++) {
-        temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < LSX_LEN/32; i++) {
-        temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < LSX_LEN/64; i++) {
-        temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfcvt_h_s)(CPULoongArchState *env,
-                       uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    vec_clear_cause(env);
-    for(i = 0; i < LSX_LEN/32; i++) {
-        temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
-        temp.UH(i)  = float32_cvt_float16(Vk->UW(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfcvt_s_d)(CPULoongArchState *env,
-                       uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    vec_clear_cause(env);
-    for(i = 0; i < LSX_LEN/64; i++) {
-        temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
-        temp.UW(i)  = float64_cvt_float32(Vk->UD(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < 4; i++) {
-        Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-}
-
-void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < 2; i++) {
-        Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-}
-
-#define FCVT_2OP(NAME, BIT, E, MODE)                                        \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj)         \
-{                                                                           \
-    int i;                                                                  \
-    VReg *Vd = &(env->fpr[vd].vreg);                                        \
-    VReg *Vj = &(env->fpr[vj].vreg);                                        \
-                                                                            \
-    vec_clear_cause(env);                                                   \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
-        FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
-        set_float_rounding_mode(MODE, &env->fp_status);                     \
-        Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
-        set_float_rounding_mode(old_mode, &env->fp_status);                 \
-        vec_update_fcsr0(env, GETPC());                                     \
-    }                                                                       \
-}
-
-FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
-FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
-FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
-FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
-FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
-FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
-FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
-FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
-
-#define FTINT(NAME, FMT1, FMT2, T1, T2,  MODE)                          \
-static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj)               \
-{                                                                       \
-    T2 fd;                                                              \
-    FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
-                                                                        \
-    set_float_rounding_mode(MODE, &env->fp_status);                     \
-    fd = do_## FMT1 ##_to_## FMT2(env, fj);                             \
-    set_float_rounding_mode(old_mode, &env->fp_status);                 \
-    return fd;                                                          \
-}
-
-#define DO_FTINT(FMT1, FMT2, T1, T2)                                         \
-static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj)            \
-{                                                                            \
-    T2 fd;                                                                   \
-                                                                             \
-    fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);                            \
-    if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
-        if (FMT1 ##_is_any_nan(fj)) {                                        \
-            fd = 0;                                                          \
-        }                                                                    \
-    }                                                                        \
-    vec_update_fcsr0(env, GETPC());                                          \
-    return fd;                                                               \
-}
-
-DO_FTINT(float32, int32, uint32_t, uint32_t)
-DO_FTINT(float64, int64, uint64_t, uint64_t)
-DO_FTINT(float32, uint32, uint32_t, uint32_t)
-DO_FTINT(float64, uint64, uint64_t, uint64_t)
-DO_FTINT(float64, int32, uint64_t, uint32_t)
-DO_FTINT(float32, int64, uint32_t, uint64_t)
-
-FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
-FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
-FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
-FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
-FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
-FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
-FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
-FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
-
-DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
-DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
-DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
-DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
-DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
-DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
-DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
-DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
-DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
-DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
-
-FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
-FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
-
-DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
-DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
-DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
-DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
-
-FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
-FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
-FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
-FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
-
-#define FTINT_W_D(NAME, FN)                              \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    vec_clear_cause(env);                                \
-    for (i = 0; i < 2; i++) {                            \
-        temp.W(i + 2) = FN(env, Vj->UD(i));              \
-        temp.W(i) = FN(env, Vk->UD(i));                  \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-FTINT_W_D(vftint_w_d, do_float64_to_int32)
-FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
-FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
-FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
-FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
-
-FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
-FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
-FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
-FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
-FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
-FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
-FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-
-#define FTINTL_L_S(NAME, FN)                                        \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg temp;                                                      \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    vec_clear_cause(env);                                           \
-    for (i = 0; i < 2; i++) {                                       \
-        temp.D(i) = FN(env, Vj->UW(i));                             \
-    }                                                               \
-    *Vd = temp;                                                     \
-}
-
-FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
-FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
-FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
-FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
-FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
-
-#define FTINTH_L_S(NAME, FN)                                        \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{                                                                   \
-    int i;                                                          \
-    VReg temp;                                                      \
-    VReg *Vd = &(env->fpr[vd].vreg);                                \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    vec_clear_cause(env);                                           \
-    for (i = 0; i < 2; i++) {                                       \
-        temp.D(i) = FN(env, Vj->UW(i + 2));                         \
-    }                                                               \
-    *Vd = temp;                                                     \
-}
-
-FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
-FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
-FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
-FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
-FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
-
-#define FFINT(NAME, FMT1, FMT2, T1, T2)                    \
-static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
-{                                                          \
-    T2 fd;                                                 \
-                                                           \
-    fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);          \
-    vec_update_fcsr0(env, GETPC());                        \
-    return fd;                                             \
-}
-
-FFINT(s_w, int32, float32, int32_t, uint32_t)
-FFINT(d_l, int64, float64, int64_t, uint64_t)
-FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
-FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
-
-DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
-DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
-DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
-DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
-
-void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < 2; i++) {
-        temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < 2; i++) {
-        temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-void HELPER(vffint_s_l)(CPULoongArchState *env,
-                        uint32_t vd, uint32_t vj, uint32_t vk)
-{
-    int i;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-
-    vec_clear_cause(env);
-    for (i = 0; i < 2; i++) {
-        temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
-        temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
-        vec_update_fcsr0(env, GETPC());
-    }
-    *Vd = temp;
-}
-
-#define VSEQ(a, b) (a == b ? -1 : 0)
-#define VSLE(a, b) (a <= b ? -1 : 0)
-#define VSLT(a, b) (a < b ? -1 : 0)
-
-#define VCMPI(NAME, BIT, E, DO_OP)                              \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-    typedef __typeof(Vd->E(0)) TD;                              \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                    \
-    }                                                           \
-}
-
-VCMPI(vseqi_b, 8, B, VSEQ)
-VCMPI(vseqi_h, 16, H, VSEQ)
-VCMPI(vseqi_w, 32, W, VSEQ)
-VCMPI(vseqi_d, 64, D, VSEQ)
-VCMPI(vslei_b, 8, B, VSLE)
-VCMPI(vslei_h, 16, H, VSLE)
-VCMPI(vslei_w, 32, W, VSLE)
-VCMPI(vslei_d, 64, D, VSLE)
-VCMPI(vslei_bu, 8, UB, VSLE)
-VCMPI(vslei_hu, 16, UH, VSLE)
-VCMPI(vslei_wu, 32, UW, VSLE)
-VCMPI(vslei_du, 64, UD, VSLE)
-VCMPI(vslti_b, 8, B, VSLT)
-VCMPI(vslti_h, 16, H, VSLT)
-VCMPI(vslti_w, 32, W, VSLT)
-VCMPI(vslti_d, 64, D, VSLT)
-VCMPI(vslti_bu, 8, UB, VSLT)
-VCMPI(vslti_hu, 16, UH, VSLT)
-VCMPI(vslti_wu, 32, UW, VSLT)
-VCMPI(vslti_du, 64, UD, VSLT)
-
-static uint64_t vfcmp_common(CPULoongArchState *env,
-                             FloatRelation cmp, uint32_t flags)
-{
-    uint64_t ret = 0;
-
-    switch (cmp) {
-    case float_relation_less:
-        ret = (flags & FCMP_LT);
-        break;
-    case float_relation_equal:
-        ret = (flags & FCMP_EQ);
-        break;
-    case float_relation_greater:
-        ret = (flags & FCMP_GT);
-        break;
-    case float_relation_unordered:
-        ret = (flags & FCMP_UN);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    if (ret) {
-        ret = -1;
-    }
-
-    return ret;
-}
-
-#define VFCMP(NAME, BIT, E, FN)                                          \
-void HELPER(NAME)(CPULoongArchState *env,                                \
-                  uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
-{                                                                        \
-    int i;                                                               \
-    VReg t;                                                              \
-    VReg *Vd = &(env->fpr[vd].vreg);                                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                                     \
-                                                                         \
-    vec_clear_cause(env);                                                \
-    for (i = 0; i < LSX_LEN/BIT ; i++) {                                 \
-        FloatRelation cmp;                                               \
-        cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status);                   \
-        t.E(i) = vfcmp_common(env, cmp, flags);                          \
-        vec_update_fcsr0(env, GETPC());                                  \
-    }                                                                    \
-    *Vd = t;                                                             \
-}
-
-VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
-VFCMP(vfcmp_s_s, 32, UW, float32_compare)
-VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
-VFCMP(vfcmp_s_d, 64, UD, float64_compare)
-
-void HELPER(vbitseli_b)(void *vd, void *vj,  uint64_t imm, uint32_t v)
-{
-    int i;
-    VReg *Vd = (VReg *)vd;
-    VReg *Vj = (VReg *)vj;
-
-    for (i = 0; i < 16; i++) {
-        Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
-    }
-}
-
-/* Copy from target/arm/tcg/sve_helper.c */
-static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
-{
-    uint64_t bits = 8 << esz;
-    uint64_t ones = dup_const(esz, 1);
-    uint64_t signs = ones << (bits - 1);
-    uint64_t cmp0, cmp1;
-
-    cmp1 = dup_const(esz, n);
-    cmp0 = cmp1 ^ m0;
-    cmp1 = cmp1 ^ m1;
-    cmp0 = (cmp0 - ones) & ~cmp0;
-    cmp1 = (cmp1 - ones) & ~cmp1;
-    return (cmp0 | cmp1) & signs;
-}
-
-#define SETANYEQZ(NAME, MO)                                         \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
-{                                                                   \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO);       \
-}
-SETANYEQZ(vsetanyeqz_b, MO_8)
-SETANYEQZ(vsetanyeqz_h, MO_16)
-SETANYEQZ(vsetanyeqz_w, MO_32)
-SETANYEQZ(vsetanyeqz_d, MO_64)
-
-#define SETALLNEZ(NAME, MO)                                         \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
-{                                                                   \
-    VReg *Vj = &(env->fpr[vj].vreg);                                \
-                                                                    \
-    env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO);       \
-}
-SETALLNEZ(vsetallnez_b, MO_8)
-SETALLNEZ(vsetallnez_h, MO_16)
-SETALLNEZ(vsetallnez_w, MO_32)
-SETALLNEZ(vsetallnez_d, MO_64)
-
-#define VPACKEV(NAME, BIT, E)                            \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(2 * i + 1) = Vj->E(2 * i);                \
-        temp.E(2 *i) = Vk->E(2 * i);                     \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VPACKEV(vpackev_b, 16, B)
-VPACKEV(vpackev_h, 32, H)
-VPACKEV(vpackev_w, 64, W)
-VPACKEV(vpackev_d, 128, D)
-
-#define VPACKOD(NAME, BIT, E)                            \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(2 * i + 1) = Vj->E(2 * i + 1);            \
-        temp.E(2 * i) = Vk->E(2 * i + 1);                \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VPACKOD(vpackod_b, 16, B)
-VPACKOD(vpackod_h, 32, H)
-VPACKOD(vpackod_w, 64, W)
-VPACKOD(vpackod_d, 128, D)
-
-#define VPICKEV(NAME, BIT, E)                            \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i);          \
-        temp.E(i) = Vk->E(2 * i);                        \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VPICKEV(vpickev_b, 16, B)
-VPICKEV(vpickev_h, 32, H)
-VPICKEV(vpickev_w, 64, W)
-VPICKEV(vpickev_d, 128, D)
-
-#define VPICKOD(NAME, BIT, E)                            \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1);      \
-        temp.E(i) = Vk->E(2 * i + 1);                    \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VPICKOD(vpickod_b, 16, B)
-VPICKOD(vpickod_h, 32, H)
-VPICKOD(vpickod_w, 64, W)
-VPICKOD(vpickod_d, 128, D)
-
-#define VILVL(NAME, BIT, E)                              \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(2 * i + 1) = Vj->E(i);                    \
-        temp.E(2 * i) = Vk->E(i);                        \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VILVL(vilvl_b, 16, B)
-VILVL(vilvl_h, 32, H)
-VILVL(vilvl_w, 64, W)
-VILVL(vilvl_d, 128, D)
-
-#define VILVH(NAME, BIT, E)                              \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i;                                               \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                  \
-        temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT);      \
-        temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT);          \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VILVH(vilvh_b, 16, B)
-VILVH(vilvh_h, 32, H)
-VILVH(vilvh_w, 64, W)
-VILVH(vilvh_d, 128, D)
-
-void HELPER(vshuf_b)(CPULoongArchState *env,
-                     uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
-{
-    int i, m;
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-    VReg *Vk = &(env->fpr[vk].vreg);
-    VReg *Va = &(env->fpr[va].vreg);
-
-    m = LSX_LEN/8;
-    for (i = 0; i < m ; i++) {
-        uint64_t k = (uint8_t)Va->B(i) % (2 * m);
-        temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
-    }
-    *Vd = temp;
-}
-
-#define VSHUF(NAME, BIT, E)                              \
-void HELPER(NAME)(CPULoongArchState *env,                \
-                  uint32_t vd, uint32_t vj, uint32_t vk) \
-{                                                        \
-    int i, m;                                            \
-    VReg temp;                                           \
-    VReg *Vd = &(env->fpr[vd].vreg);                     \
-    VReg *Vj = &(env->fpr[vj].vreg);                     \
-    VReg *Vk = &(env->fpr[vk].vreg);                     \
-                                                         \
-    m = LSX_LEN/BIT;                                     \
-    for (i = 0; i < m; i++) {                            \
-        uint64_t k  = ((uint8_t) Vd->E(i)) % (2 * m);    \
-        temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m);     \
-    }                                                    \
-    *Vd = temp;                                          \
-}
-
-VSHUF(vshuf_h, 16, H)
-VSHUF(vshuf_w, 32, W)
-VSHUF(vshuf_d, 64, D)
-
-#define VSHUF4I(NAME, BIT, E)                             \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int i;                                                \
-    VReg temp;                                            \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                   \
-         temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >>      \
-                           (2 * ((i) & 0x03))) & 0x03));  \
-    }                                                     \
-    *Vd = temp;                                           \
-}
-
-VSHUF4I(vshuf4i_b, 8, B)
-VSHUF4I(vshuf4i_h, 16, H)
-VSHUF4I(vshuf4i_w, 32, W)
-
-void HELPER(vshuf4i_d)(CPULoongArchState *env,
-                       uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    VReg temp;
-    temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
-    temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
-    *Vd = temp;
-}
-
-void HELPER(vpermi_w)(CPULoongArchState *env,
-                      uint32_t vd, uint32_t vj, uint32_t imm)
-{
-    VReg temp;
-    VReg *Vd = &(env->fpr[vd].vreg);
-    VReg *Vj = &(env->fpr[vj].vreg);
-
-    temp.W(0) = Vj->W(imm & 0x3);
-    temp.W(1) = Vj->W((imm >> 2) & 0x3);
-    temp.W(2) = Vd->W((imm >> 4) & 0x3);
-    temp.W(3) = Vd->W((imm >> 6) & 0x3);
-    *Vd = temp;
-}
-
-#define VEXTRINS(NAME, BIT, E, MASK)                      \
-void HELPER(NAME)(CPULoongArchState *env,                 \
-                  uint32_t vd, uint32_t vj, uint32_t imm) \
-{                                                         \
-    int ins, extr;                                        \
-    VReg *Vd = &(env->fpr[vd].vreg);                      \
-    VReg *Vj = &(env->fpr[vj].vreg);                      \
-                                                          \
-    ins = (imm >> 4) & MASK;                              \
-    extr = imm & MASK;                                    \
-    Vd->E(ins) = Vj->E(extr);                             \
-}
-
-VEXTRINS(vextrins_b, 8, B, 0xf)
-VEXTRINS(vextrins_h, 16, H, 0x7)
-VEXTRINS(vextrins_w, 32, W, 0x3)
-VEXTRINS(vextrins_d, 64, D, 0x1)
diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c
index d8ac99c9a4..1c4e01d076 100644
--- a/target/loongarch/machine.c
+++ b/target/loongarch/machine.c
@@ -8,7 +8,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "migration/cpu.h"
-#include "internals.h"
+#include "vec.h"
 
 static const VMStateDescription vmstate_fpu_reg = {
     .name = "fpu_reg",
@@ -76,6 +76,39 @@ static const VMStateDescription vmstate_lsx = {
     },
 };
 
+static const VMStateDescription vmstate_lasxh_reg = {
+    .name = "lasxh_reg",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(UD(2), VReg),
+        VMSTATE_UINT64(UD(3), VReg),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define VMSTATE_LASXH_REGS(_field, _state, _start)          \
+    VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \
+                             vmstate_lasxh_reg, fpr_t)
+
+static bool lasx_needed(void *opaque)
+{
+    LoongArchCPU *cpu = opaque;
+
+    return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LASX);
+}
+
+static const VMStateDescription vmstate_lasx = {
+    .name = "cpu/lasx",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = lasx_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_LASXH_REGS(env.fpr, LoongArchCPU, 0),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 /* TLB state */
 const VMStateDescription vmstate_tlb = {
     .name = "cpu/tlb",
@@ -163,6 +196,7 @@ const VMStateDescription vmstate_loongarch_cpu = {
     .subsections = (const VMStateDescription*[]) {
         &vmstate_fpu,
         &vmstate_lsx,
+        &vmstate_lasx,
         NULL
     }
 };
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
index b7a27df5a9..7fbf045a5d 100644
--- a/target/loongarch/meson.build
+++ b/target/loongarch/meson.build
@@ -11,7 +11,7 @@ loongarch_tcg_ss.add(files(
   'op_helper.c',
   'translate.c',
   'gdbstub.c',
-  'lsx_helper.c',
+  'vec_helper.c',
 ))
 loongarch_tcg_ss.add(zlib)
 
diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c
index cf84f20aba..fe79c62fa4 100644
--- a/target/loongarch/op_helper.c
+++ b/target/loongarch/op_helper.c
@@ -7,7 +7,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index fd393ed76d..f6038fc567 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -18,6 +18,7 @@
 #include "fpu/softfloat.h"
 #include "translate.h"
 #include "internals.h"
+#include "vec.h"
 
 /* Global register indices */
 TCGv cpu_gpr[32], cpu_pc;
@@ -36,6 +37,18 @@ static inline int vec_full_offset(int regno)
     return  offsetof(CPULoongArchState, fpr[regno]);
 }
 
+static inline int vec_reg_offset(int regno, int index, MemOp mop)
+{
+    const uint8_t size = 1 << mop;
+    int offs = index * size;
+
+    if (HOST_BIG_ENDIAN && size < 8 ) {
+        offs ^= (8 - size);
+    }
+
+    return offs + vec_full_offset(regno);
+}
+
 static inline void get_vreg64(TCGv_i64 dest, int regno, int index)
 {
     tcg_gen_ld_i64(dest, cpu_env,
@@ -123,6 +136,10 @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
         ctx->vl = LSX_LEN;
     }
 
+    if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) {
+        ctx->vl = LASX_LEN;
+    }
+
     ctx->la64 = is_la64(env);
     ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0;
 
@@ -261,7 +278,7 @@ static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr)
 #include "insn_trans/trans_fmemory.c.inc"
 #include "insn_trans/trans_branch.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
-#include "insn_trans/trans_lsx.c.inc"
+#include "insn_trans/trans_vec.c.inc"
 
 static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 89b49a859e..195f53573a 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -23,6 +23,7 @@
 #define avail_LSPW(C)  (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
 #define avail_LAM(C)   (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
 #define avail_LSX(C)   (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
+#define avail_LASX(C)  (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
 #define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
 
 /*
diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
new file mode 100644
index 0000000000..3c9adf8427
--- /dev/null
+++ b/target/loongarch/vec.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch vector utilitites
+ *
+ * Copyright (c) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_VEC_H
+#define LOONGARCH_VEC_H
+
+#if HOST_BIG_ENDIAN
+#define B(x)  B[(x) ^ 15]
+#define H(x)  H[(x) ^ 7]
+#define W(x)  W[(x) ^ 3]
+#define D(x)  D[(x) ^ 1]
+#define UB(x) UB[(x) ^ 15]
+#define UH(x) UH[(x) ^ 7]
+#define UW(x) UW[(x) ^ 3]
+#define UD(x) UD[(x) ^ 1]
+#define Q(x)  Q[x]
+#else
+#define B(x)  B[x]
+#define H(x)  H[x]
+#define W(x)  W[x]
+#define D(x)  D[x]
+#define UB(x) UB[x]
+#define UH(x) UH[x]
+#define UW(x) UW[x]
+#define UD(x) UD[x]
+#define Q(x)  Q[x]
+#endif /* HOST_BIG_ENDIAN */
+
+#define DO_ADD(a, b)  (a + b)
+#define DO_SUB(a, b)  (a - b)
+#define DO_VAVG(a, b)  ((a >> 1) + (b >> 1) + (a & b & 1))
+#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
+#define DO_VABSD(a, b)  ((a > b) ? (a -b) : (b-a))
+#define DO_VABS(a)  ((a < 0) ? (-a) : (a))
+#define DO_MIN(a, b) (a < b ? a : b)
+#define DO_MAX(a, b) (a > b ? a : b)
+#define DO_MUL(a, b) (a * b)
+#define DO_MADD(a, b, c)  (a + b * c)
+#define DO_MSUB(a, b, c)  (a - b * c)
+
+#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
+#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
+#define DO_DIV(N, M)  (unlikely(M == 0) ? 0 :\
+        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
+#define DO_REM(N, M)  (unlikely(M == 0) ? 0 :\
+        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
+
+#define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
+
+#define R_SHIFT(a, b) (a >> b)
+
+#define DO_CLO_B(N)  (clz32(~N & 0xff) - 24)
+#define DO_CLO_H(N)  (clz32(~N & 0xffff) - 16)
+#define DO_CLO_W(N)  (clz32(~N))
+#define DO_CLO_D(N)  (clz64(~N))
+#define DO_CLZ_B(N)  (clz32(N) - 24)
+#define DO_CLZ_H(N)  (clz32(N) - 16)
+#define DO_CLZ_W(N)  (clz32(N))
+#define DO_CLZ_D(N)  (clz64(N))
+
+#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
+#define DO_BITSET(a, bit) (a | 1ull << bit)
+#define DO_BITREV(a, bit) (a ^ (1ull << bit))
+
+#define VSEQ(a, b) (a == b ? -1 : 0)
+#define VSLE(a, b) (a <= b ? -1 : 0)
+#define VSLT(a, b) (a < b ? -1 : 0)
+
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
+
+#endif /* LOONGARCH_VEC_H */
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
new file mode 100644
index 0000000000..3faf52cbc4
--- /dev/null
+++ b/target/loongarch/vec_helper.c
@@ -0,0 +1,3494 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch vector helper functions.
+ *
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+#include "internals.h"
+#include "tcg/tcg.h"
+#include "vec.h"
+#include "tcg/tcg-gvec-desc.h"
+
+#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)       \
+{                                                                    \
+    int i;                                                           \
+    VReg *Vd = (VReg *)vd;                                           \
+    VReg *Vj = (VReg *)vj;                                           \
+    VReg *Vk = (VReg *)vk;                                           \
+    typedef __typeof(Vd->E1(0)) TD;                                  \
+    int oprsz = simd_oprsz(desc);                                    \
+                                                                     \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                        \
+        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
+    }                                                                \
+}
+
+DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
+DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
+DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16 ; i++) {
+        Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
+                              int128_makes64(Vk->D(2 * i)));
+    }
+}
+
+DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
+DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
+DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+                              int128_makes64(Vk->D(2 * i)));
+    }
+}
+
+DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
+DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
+DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i ++) {
+        Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+                              int128_make64(Vk->UD(2 * i)));
+    }
+}
+
+DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
+DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
+DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
+
+void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+                              int128_make64(Vk->UD(2 * i)));
+    }
+}
+
+#define DO_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)   \
+{                                                                \
+    int i;                                                       \
+    VReg *Vd = (VReg *)vd;                                       \
+    VReg *Vj = (VReg *)vj;                                       \
+    VReg *Vk = (VReg *)vk;                                       \
+    typedef __typeof(Vd->E1(0)) TD;                              \
+    int oprsz = simd_oprsz(desc);                                \
+                                                                 \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                    \
+        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
+    }                                                            \
+}
+
+#define DO_ODD(NAME, BIT, E1, E2, DO_OP)                                 \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)           \
+{                                                                        \
+    int i;                                                               \
+    VReg *Vd = (VReg *)vd;                                               \
+    VReg *Vj = (VReg *)vj;                                               \
+    VReg *Vk = (VReg *)vk;                                               \
+    typedef __typeof(Vd->E1(0)) TD;                                      \
+    int oprsz = simd_oprsz(desc);                                        \
+                                                                         \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                            \
+        Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
+    }                                                                    \
+}
+
+void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
+                              int128_makes64(Vk->D(2 * i)));
+    }
+}
+
+DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
+DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
+DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
+                              int128_makes64(Vk->D(2 * i +1)));
+    }
+}
+
+DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
+DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
+DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
+                              int128_makes64(Vk->D(2 * i)));
+    }
+}
+
+DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
+DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
+DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+                              int128_makes64(Vk->D(2 * i + 1)));
+    }
+}
+
+DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
+DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
+DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+                              int128_make64(Vk->UD(2 * i)));
+    }
+}
+
+DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
+DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
+DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+                              int128_make64(Vk->UD(2 * i + 1)));
+    }
+}
+
+DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
+DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
+DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
+                              int128_make64(Vk->UD(2 * i)));
+    }
+}
+
+DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
+DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
+DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
+
+void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+                              int128_make64(Vk->UD(2 * i + 1)));
+    }
+}
+
+DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
+DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
+DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
+
+#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)             \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)        \
+{                                                                     \
+    int i;                                                            \
+    VReg *Vd = (VReg *)vd;                                            \
+    VReg *Vj = (VReg *)vj;                                            \
+    VReg *Vk = (VReg *)vk;                                            \
+    typedef __typeof(Vd->ES1(0)) TDS;                                 \
+    typedef __typeof(Vd->EU1(0)) TDU;                                 \
+    int oprsz = simd_oprsz(desc);                                     \
+                                                                      \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                         \
+        Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
+    }                                                                 \
+}
+
+#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)                      \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                \
+{                                                                             \
+    int i;                                                                    \
+    VReg *Vd = (VReg *)vd;                                                    \
+    VReg *Vj = (VReg *)vj;                                                    \
+    VReg *Vk = (VReg *)vk;                                                    \
+    typedef __typeof(Vd->ES1(0)) TDS;                                         \
+    typedef __typeof(Vd->EU1(0)) TDU;                                         \
+    int oprsz = simd_oprsz(desc);                                             \
+                                                                              \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                                 \
+        Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
+    }                                                                         \
+}
+
+void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+                              int128_makes64(Vk->D(2 * i)));
+    }
+}
+
+DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
+DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
+DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
+
+void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+                              int128_makes64(Vk->D(2 * i + 1)));
+    }
+}
+
+DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
+DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
+DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
+
+#define DO_3OP(NAME, BIT, E, DO_OP)                            \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));                  \
+    }                                                          \
+}
+
+DO_3OP(vavg_b, 8, B, DO_VAVG)
+DO_3OP(vavg_h, 16, H, DO_VAVG)
+DO_3OP(vavg_w, 32, W, DO_VAVG)
+DO_3OP(vavg_d, 64, D, DO_VAVG)
+DO_3OP(vavgr_b, 8, B, DO_VAVGR)
+DO_3OP(vavgr_h, 16, H, DO_VAVGR)
+DO_3OP(vavgr_w, 32, W, DO_VAVGR)
+DO_3OP(vavgr_d, 64, D, DO_VAVGR)
+DO_3OP(vavg_bu, 8, UB, DO_VAVG)
+DO_3OP(vavg_hu, 16, UH, DO_VAVG)
+DO_3OP(vavg_wu, 32, UW, DO_VAVG)
+DO_3OP(vavg_du, 64, UD, DO_VAVG)
+DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
+DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
+DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
+DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
+
+DO_3OP(vabsd_b, 8, B, DO_VABSD)
+DO_3OP(vabsd_h, 16, H, DO_VABSD)
+DO_3OP(vabsd_w, 32, W, DO_VABSD)
+DO_3OP(vabsd_d, 64, D, DO_VABSD)
+DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
+DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
+DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
+DO_3OP(vabsd_du, 64, UD, DO_VABSD)
+
+#define DO_VADDA(NAME, BIT, E)                                 \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i));      \
+    }                                                          \
+}
+
+DO_VADDA(vadda_b, 8, B)
+DO_VADDA(vadda_h, 16, H)
+DO_VADDA(vadda_w, 32, W)
+DO_VADDA(vadda_d, 64, D)
+
+#define VMINMAXI(NAME, BIT, E, DO_OP)                              \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    typedef __typeof(Vd->E(0)) TD;                                 \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                       \
+    }                                                              \
+}
+
+VMINMAXI(vmini_b, 8, B, DO_MIN)
+VMINMAXI(vmini_h, 16, H, DO_MIN)
+VMINMAXI(vmini_w, 32, W, DO_MIN)
+VMINMAXI(vmini_d, 64, D, DO_MIN)
+VMINMAXI(vmaxi_b, 8, B, DO_MAX)
+VMINMAXI(vmaxi_h, 16, H, DO_MAX)
+VMINMAXI(vmaxi_w, 32, W, DO_MAX)
+VMINMAXI(vmaxi_d, 64, D, DO_MAX)
+VMINMAXI(vmini_bu, 8, UB, DO_MIN)
+VMINMAXI(vmini_hu, 16, UH, DO_MIN)
+VMINMAXI(vmini_wu, 32, UW, DO_MIN)
+VMINMAXI(vmini_du, 64, UD, DO_MIN)
+VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
+VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
+VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
+VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
+
+#define DO_VMUH(NAME, BIT, E1, E2, DO_OP)                      \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    typedef __typeof(Vd->E1(0)) T;                             \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT;    \
+    }                                                          \
+}
+
+void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    uint64_t l, h;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 8; i++) {
+        muls64(&l, &h, Vj->D(i), Vk->D(i));
+        Vd->D(i) = h;
+    }
+}
+
+DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
+DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
+DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
+
+void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i;
+    uint64_t l, h;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 8; i++) {
+        mulu64(&l, &h, Vj->D(i), Vk->D(i));
+        Vd->D(i) = h;
+    }
+}
+
+DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
+DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
+DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
+
+DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
+DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
+DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
+
+DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
+DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
+DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
+
+DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
+DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
+DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
+
+DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
+DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
+DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
+
+DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VMADDSUB(NAME, BIT, E, DO_OP)                          \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i));        \
+    }                                                          \
+}
+
+VMADDSUB(vmadd_b, 8, B, DO_MADD)
+VMADDSUB(vmadd_h, 16, H, DO_MADD)
+VMADDSUB(vmadd_w, 32, W, DO_MADD)
+VMADDSUB(vmadd_d, 64, D, DO_MADD)
+VMADDSUB(vmsub_b, 8, B, DO_MSUB)
+VMADDSUB(vmsub_h, 16, H, DO_MSUB)
+VMADDSUB(vmsub_w, 32, W, DO_MSUB)
+VMADDSUB(vmsub_d, 64, D, DO_MSUB)
+
+#define VMADDWEV(NAME, BIT, E1, E2, DO_OP)                        \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)    \
+{                                                                 \
+    int i;                                                        \
+    VReg *Vd = (VReg *)vd;                                        \
+    VReg *Vj = (VReg *)vj;                                        \
+    VReg *Vk = (VReg *)vk;                                        \
+    typedef __typeof(Vd->E1(0)) TD;                               \
+    int oprsz = simd_oprsz(desc);                                 \
+                                                                  \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                     \
+        Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
+    }                                                             \
+}
+
+VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
+VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
+VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
+VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
+VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
+VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
+
+#define VMADDWOD(NAME, BIT, E1, E2, DO_OP)                     \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    typedef __typeof(Vd->E1(0)) TD;                            \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1),              \
+                           (TD)Vk->E2(2 * i + 1));             \
+    }                                                          \
+}
+
+VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
+VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
+VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
+VMADDWOD(vmaddwod_h_bu, 16,  UH, UB, DO_MUL)
+VMADDWOD(vmaddwod_w_hu, 32,  UW, UH, DO_MUL)
+VMADDWOD(vmaddwod_d_wu, 64,  UD, UW, DO_MUL)
+
+#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)     \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    typedef __typeof(Vd->ES1(0)) TS1;                          \
+    typedef __typeof(Vd->EU1(0)) TU1;                          \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i),               \
+                            (TS1)Vk->ES2(2 * i));              \
+    }                                                          \
+}
+
+VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)     \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    typedef __typeof(Vd->ES1(0)) TS1;                          \
+    typedef __typeof(Vd->EU1(0)) TU1;                          \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1),           \
+                            (TS1)Vk->ES2(2 * i + 1));          \
+    }                                                          \
+}
+
+VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VDIV(NAME, BIT, E, DO_OP)                              \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));                  \
+    }                                                          \
+}
+
+VDIV(vdiv_b, 8, B, DO_DIV)
+VDIV(vdiv_h, 16, H, DO_DIV)
+VDIV(vdiv_w, 32, W, DO_DIV)
+VDIV(vdiv_d, 64, D, DO_DIV)
+VDIV(vdiv_bu, 8, UB, DO_DIVU)
+VDIV(vdiv_hu, 16, UH, DO_DIVU)
+VDIV(vdiv_wu, 32, UW, DO_DIVU)
+VDIV(vdiv_du, 64, UD, DO_DIVU)
+VDIV(vmod_b, 8, B, DO_REM)
+VDIV(vmod_h, 16, H, DO_REM)
+VDIV(vmod_w, 32, W, DO_REM)
+VDIV(vmod_d, 64, D, DO_REM)
+VDIV(vmod_bu, 8, UB, DO_REMU)
+VDIV(vmod_hu, 16, UH, DO_REMU)
+VDIV(vmod_wu, 32, UW, DO_REMU)
+VDIV(vmod_du, 64, UD, DO_REMU)
+
+#define VSAT_S(NAME, BIT, E)                                       \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    typedef __typeof(Vd->E(0)) TD;                                 \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max :                  \
+                   Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i);       \
+    }                                                              \
+}
+
+VSAT_S(vsat_b, 8, B)
+VSAT_S(vsat_h, 16, H)
+VSAT_S(vsat_w, 32, W)
+VSAT_S(vsat_d, 64, D)
+
+#define VSAT_U(NAME, BIT, E)                                       \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    typedef __typeof(Vd->E(0)) TD;                                 \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i);        \
+    }                                                              \
+}
+
+VSAT_U(vsat_bu, 8, UB)
+VSAT_U(vsat_hu, 16, UH)
+VSAT_U(vsat_wu, 32, UW)
+VSAT_U(vsat_du, 64, UD)
+
+#define VEXTH(NAME, BIT, E1, E2)                                 \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc)             \
+{                                                                \
+    int i, j, ofs;                                               \
+    VReg *Vd = (VReg *)vd;                                       \
+    VReg *Vj = (VReg *)vj;                                       \
+    int oprsz = simd_oprsz(desc);                                \
+                                                                 \
+    ofs = LSX_LEN / BIT;                                         \
+    for (i = 0; i < oprsz / 16; i++) {                           \
+        for (j = 0; j < ofs; j++) {                              \
+            Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
+        }                                                        \
+    }                                                            \
+}
+
+void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
+    }
+}
+
+void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
+    }
+}
+
+VEXTH(vexth_h_b, 16, H, B)
+VEXTH(vexth_w_h, 32, W, H)
+VEXTH(vexth_d_w, 64, D, W)
+VEXTH(vexth_hu_bu, 16, UH, UB)
+VEXTH(vexth_wu_hu, 32, UW, UH)
+VEXTH(vexth_du_wu, 64, UD, UW)
+
+#define VEXT2XV(NAME, BIT, E1, E2)                   \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{                                                    \
+    int i;                                           \
+    VReg temp = {};                                  \
+    VReg *Vd = (VReg *)vd;                           \
+    VReg *Vj = (VReg *)vj;                           \
+    int oprsz = simd_oprsz(desc);                    \
+                                                     \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {        \
+        temp.E1(i) = Vj->E2(i);                      \
+    }                                                \
+    *Vd = temp;                                      \
+}
+
+VEXT2XV(vext2xv_h_b, 16, H, B)
+VEXT2XV(vext2xv_w_b, 32, W, B)
+VEXT2XV(vext2xv_d_b, 64, D, B)
+VEXT2XV(vext2xv_w_h, 32, W, H)
+VEXT2XV(vext2xv_d_h, 64, D, H)
+VEXT2XV(vext2xv_d_w, 64, D, W)
+VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
+VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
+VEXT2XV(vext2xv_du_bu, 64, UD, UB)
+VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
+VEXT2XV(vext2xv_du_hu, 64, UD, UH)
+VEXT2XV(vext2xv_du_wu, 64, UD, UW)
+
+DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
+DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
+DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
+DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
+
+static uint64_t do_vmskltz_b(int64_t val)
+{
+    uint64_t m = 0x8080808080808080ULL;
+    uint64_t c =  val & m;
+    c |= c << 7;
+    c |= c << 14;
+    c |= c << 28;
+    return c >> 56;
+}
+
+void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp = do_vmskltz_b(Vj->D(2 * i));
+        temp |= (do_vmskltz_b(Vj->D(2 * i  + 1)) << 8);
+        Vd->D(2 * i) = temp;
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+static uint64_t do_vmskltz_h(int64_t val)
+{
+    uint64_t m = 0x8000800080008000ULL;
+    uint64_t c =  val & m;
+    c |= c << 15;
+    c |= c << 30;
+    return c >> 60;
+}
+
+void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp = do_vmskltz_h(Vj->D(2 * i));
+        temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4);
+        Vd->D(2 * i) = temp;
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+static uint64_t do_vmskltz_w(int64_t val)
+{
+    uint64_t m = 0x8000000080000000ULL;
+    uint64_t c =  val & m;
+    c |= c << 31;
+    return c >> 62;
+}
+
+void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp = do_vmskltz_w(Vj->D(2 * i));
+        temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2);
+        Vd->D(2 * i) = temp;
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+static uint64_t do_vmskltz_d(int64_t val)
+{
+    return (uint64_t)val >> 63;
+}
+void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp = do_vmskltz_d(Vj->D(2 * i));
+        temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1);
+        Vd->D(2 * i) = temp;
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp =  do_vmskltz_b(Vj->D(2 * i));
+        temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
+        Vd->D(2 * i) = (uint16_t)(~temp);
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+static uint64_t do_vmskez_b(uint64_t a)
+{
+    uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
+    uint64_t c = ~(((a & m) + m) | a | m);
+    c |= c << 7;
+    c |= c << 14;
+    c |= c << 28;
+    return c >> 56;
+}
+
+void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    uint16_t temp = 0;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp = 0;
+        temp = do_vmskez_b(Vj->D(2 * i));
+        temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8);
+        Vd->D(2 * i) = (uint16_t)(~temp);
+        Vd->D(2 * i + 1) = 0;
+    }
+}
+
+void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    for (i = 0; i < simd_oprsz(desc); i++) {
+        Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
+    }
+}
+
+#define VSLLWIL(NAME, BIT, E1, E2)                                             \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)             \
+{                                                                              \
+    int i, j, ofs;                                                             \
+    VReg temp = {};                                                            \
+    VReg *Vd = (VReg *)vd;                                                     \
+    VReg *Vj = (VReg *)vj;                                                     \
+    int oprsz = simd_oprsz(desc);                                              \
+    typedef __typeof(temp.E1(0)) TD;                                           \
+                                                                               \
+    ofs = LSX_LEN / BIT;                                                       \
+    for (i = 0; i < oprsz / 16; i++) {                                         \
+        for (j = 0; j < ofs; j++) {                                            \
+            temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
+        }                                                                      \
+    }                                                                          \
+    *Vd = temp;                                                                \
+}
+
+
+void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_makes64(Vj->D(2 * i));
+    }
+}
+
+void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        Vd->Q(i) = int128_make64(Vj->UD(2 * i));
+    }
+}
+
+VSLLWIL(vsllwil_h_b, 16, H, B)
+VSLLWIL(vsllwil_w_h, 32, W, H)
+VSLLWIL(vsllwil_d_w, 64, D, W)
+VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
+VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
+VSLLWIL(vsllwil_du_wu, 64, UD, UW)
+
+#define do_vsrlr(E, T)                                  \
+static T do_vsrlr_ ##E(T s1, int sh)                    \
+{                                                       \
+    if (sh == 0) {                                      \
+        return s1;                                      \
+    } else {                                            \
+        return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
+    }                                                   \
+}
+
+do_vsrlr(B, uint8_t)
+do_vsrlr(H, uint16_t)
+do_vsrlr(W, uint32_t)
+do_vsrlr(D, uint64_t)
+
+#define VSRLR(NAME, BIT, T, E)                                  \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)  \
+{                                                               \
+    int i;                                                      \
+    VReg *Vd = (VReg *)vd;                                      \
+    VReg *Vj = (VReg *)vj;                                      \
+    VReg *Vk = (VReg *)vk;                                      \
+    int oprsz = simd_oprsz(desc);                               \
+                                                                \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                   \
+        Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
+    }                                                           \
+}
+
+VSRLR(vsrlr_b, 8,  uint8_t, B)
+VSRLR(vsrlr_h, 16, uint16_t, H)
+VSRLR(vsrlr_w, 32, uint32_t, W)
+VSRLR(vsrlr_d, 64, uint64_t, D)
+
+#define VSRLRI(NAME, BIT, E)                                       \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm);                  \
+    }                                                              \
+}
+
+VSRLRI(vsrlri_b, 8, B)
+VSRLRI(vsrlri_h, 16, H)
+VSRLRI(vsrlri_w, 32, W)
+VSRLRI(vsrlri_d, 64, D)
+
+#define do_vsrar(E, T)                                  \
+static T do_vsrar_ ##E(T s1, int sh)                    \
+{                                                       \
+    if (sh == 0) {                                      \
+        return s1;                                      \
+    } else {                                            \
+        return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
+    }                                                   \
+}
+
+do_vsrar(B, int8_t)
+do_vsrar(H, int16_t)
+do_vsrar(W, int32_t)
+do_vsrar(D, int64_t)
+
+#define VSRAR(NAME, BIT, T, E)                                  \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)  \
+{                                                               \
+    int i;                                                      \
+    VReg *Vd = (VReg *)vd;                                      \
+    VReg *Vj = (VReg *)vj;                                      \
+    VReg *Vk = (VReg *)vk;                                      \
+    int oprsz = simd_oprsz(desc);                               \
+                                                                \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                   \
+        Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
+    }                                                           \
+}
+
+VSRAR(vsrar_b, 8,  uint8_t, B)
+VSRAR(vsrar_h, 16, uint16_t, H)
+VSRAR(vsrar_w, 32, uint32_t, W)
+VSRAR(vsrar_d, 64, uint64_t, D)
+
+#define VSRARI(NAME, BIT, E)                                       \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm);                  \
+    }                                                              \
+}
+
+VSRARI(vsrari_b, 8, B)
+VSRARI(vsrari_h, 16, H)
+VSRARI(vsrari_w, 32, W)
+VSRARI(vsrari_d, 64, D)
+
+#define VSRLN(NAME, BIT, E1, E2)                                          \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    VReg *Vk = (VReg *)vk;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i),        \
+                                              Vk->E2(j + ofs * i) % BIT); \
+        }                                                                 \
+        Vd->D(2 * i + 1) = 0;                                             \
+    }                                                                     \
+}
+
+VSRLN(vsrln_b_h, 16, B, UH)
+VSRLN(vsrln_h_w, 32, H, UW)
+VSRLN(vsrln_w_d, 64, W, UD)
+
+#define VSRAN(NAME, BIT, E1, E2, E3)                                      \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    VReg *Vk = (VReg *)vk;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i),        \
+                                              Vk->E3(j + ofs * i) % BIT); \
+        }                                                                 \
+        Vd->D(2 * i + 1) = 0;                                             \
+    }                                                                     \
+}
+
+VSRAN(vsran_b_h, 16, B, H, UH)
+VSRAN(vsran_h_w, 32, H, W, UW)
+VSRAN(vsran_w_d, 64, W, D, UD)
+
+#define VSRLNI(NAME, BIT, E1, E2)                                         \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)        \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+            temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+                                                     imm);                \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    for (i = 0; i < 2; i++) {
+        temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
+        temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
+    }
+    *Vd = temp;
+}
+
+VSRLNI(vsrlni_b_h, 16, B, UH)
+VSRLNI(vsrlni_h_w, 32, H, UW)
+VSRLNI(vsrlni_w_d, 64, W, UD)
+
+#define VSRANI(NAME, BIT, E1, E2)                                         \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)        \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+            temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+                                                     imm);                \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    for (i = 0; i < 2; i++) {
+        temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128));
+        temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128));
+    }
+    *Vd = temp;
+}
+
+VSRANI(vsrani_b_h, 16, B, H)
+VSRANI(vsrani_h_w, 32, H, W)
+VSRANI(vsrani_w_d, 64, W, D)
+
+#define VSRLRN(NAME, BIT, E1, E2, E3)                                      \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)             \
+{                                                                          \
+    int i, j, ofs;                                                         \
+    VReg *Vd = (VReg *)vd;                                                 \
+    VReg *Vj = (VReg *)vj;                                                 \
+    VReg *Vk = (VReg *)vk;                                                 \
+    int oprsz = simd_oprsz(desc);                                          \
+                                                                           \
+    ofs = LSX_LEN / BIT;                                                   \
+    for (i = 0; i < oprsz / 16; i++) {                                     \
+        for (j = 0; j < ofs; j++) {                                        \
+            Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i),  \
+                                               Vk->E3(j + ofs * i) % BIT); \
+        }                                                                  \
+        Vd->D(2 * i + 1) = 0;                                              \
+    }                                                                      \
+}
+
+VSRLRN(vsrlrn_b_h, 16, B, H, UH)
+VSRLRN(vsrlrn_h_w, 32, H, W, UW)
+VSRLRN(vsrlrn_w_d, 64, W, D, UD)
+
+#define VSRARN(NAME, BIT, E1, E2, E3)                                       \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)              \
+{                                                                           \
+    int i, j, ofs;                                                          \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    VReg *Vk = (VReg *)vk;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    ofs = LSX_LEN / BIT;                                                    \
+    for (i = 0; i < oprsz / 16; i++) {                                      \
+        for (j = 0; j < ofs; j++) {                                         \
+            Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i),  \
+                                                Vk->E3(j + ofs * i) % BIT); \
+        }                                                                   \
+        Vd->D(2 * i + 1) = 0;                                               \
+    }                                                                       \
+}
+
+VSRARN(vsrarn_b_h, 16, B, H, UH)
+VSRARN(vsrarn_h_w, 32, H, W, UW)
+VSRARN(vsrarn_w_d, 64, W, D, UD)
+
+#define VSRLRNI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                \
+{                                                                                 \
+    int i, j, ofs;                                                                \
+    VReg temp = {};                                                               \
+    VReg *Vd = (VReg *)vd;                                                        \
+    VReg *Vj = (VReg *)vj;                                                        \
+    int oprsz = simd_oprsz(desc);                                                 \
+                                                                                  \
+    ofs = LSX_LEN / BIT;                                                          \
+    for (i = 0; i < oprsz / 16; i++) {                                            \
+        for (j = 0; j < ofs; j++) {                                               \
+            temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
+            temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
+                                                                 imm);            \
+        }                                                                         \
+    }                                                                             \
+    *Vd = temp;                                                                   \
+}
+
+void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    Int128 r[4];
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        if (imm == 0) {
+            temp.D(2 * i) = int128_getlo(Vj->Q(i));
+            temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+        } else {
+            r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)),
+                                  int128_one());
+            r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)),
+                                      int128_one());
+            temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i),
+                                                    imm), r[2 * i]));
+            temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i),
+                                                        imm), r[ 2 * i + 1]));
+        }
+    }
+    *Vd = temp;
+}
+
+VSRLRNI(vsrlrni_b_h, 16, B, H)
+VSRLRNI(vsrlrni_h_w, 32, H, W)
+VSRLRNI(vsrlrni_w_d, 64, W, D)
+
+#define VSRARNI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                \
+{                                                                                 \
+    int i, j, ofs;                                                                \
+    VReg temp = {};                                                               \
+    VReg *Vd = (VReg *)vd;                                                        \
+    VReg *Vj = (VReg *)vj;                                                        \
+    int oprsz = simd_oprsz(desc);                                                 \
+                                                                                  \
+    ofs = LSX_LEN / BIT;                                                          \
+    for (i = 0; i < oprsz / 16; i++) {                                            \
+        for (j = 0; j < ofs; j++) {                                               \
+            temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
+            temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
+                                                             imm);                \
+        }                                                                         \
+    }                                                                             \
+    *Vd = temp;                                                                   \
+}
+
+void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    Int128 r[4];
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        if (imm == 0) {
+            temp.D(2 * i) = int128_getlo(Vj->Q(i));
+            temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+        } else {
+            r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)),
+                                  int128_one());
+            r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)),
+                                      int128_one());
+            temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i),
+                                                    imm), r[2 * i]));
+            temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i),
+                                                        imm), r[2 * i + 1]));
+        }
+    }
+    *Vd = temp;
+}
+
+VSRARNI(vsrarni_b_h, 16, B, H)
+VSRARNI(vsrarni_h_w, 32, H, W)
+VSRARNI(vsrarni_w_d, 64, W, D)
+
+#define SSRLNS(NAME, T1, T2, T3)                    \
+static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
+{                                                   \
+        T1 shft_res;                                \
+        if (sa == 0) {                              \
+            shft_res = e2;                          \
+        } else {                                    \
+            shft_res = (((T1)e2) >> sa);            \
+        }                                           \
+        T3 mask;                                    \
+        mask = (1ull << sh) -1;                     \
+        if (shft_res > mask) {                      \
+            return mask;                            \
+        } else {                                    \
+            return  shft_res;                       \
+        }                                           \
+}
+
+SSRLNS(B, uint16_t, int16_t, uint8_t)
+SSRLNS(H, uint32_t, int32_t, uint16_t)
+SSRLNS(W, uint64_t, int64_t, uint32_t)
+
+#define VSSRLN(NAME, BIT, E1, E2, E3)                                       \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)              \
+{                                                                           \
+    int i, j, ofs;                                                          \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    VReg *Vk = (VReg *)vk;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    ofs = LSX_LEN / BIT;                                                    \
+    for (i = 0; i < oprsz / 16; i++) {                                      \
+        for (j = 0; j < ofs; j++) {                                         \
+            Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
+                                                Vk->E3(j + ofs * i) % BIT,  \
+                                                BIT / 2 - 1);               \
+        }                                                                   \
+        Vd->D(2 * i + 1) = 0;                                               \
+    }                                                                       \
+}
+
+VSSRLN(vssrln_b_h, 16, B, H, UH)
+VSSRLN(vssrln_h_w, 32, H, W, UW)
+VSSRLN(vssrln_w_d, 64, W, D, UD)
+
+#define SSRANS(E, T1, T2)                        \
+static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
+{                                                \
+        T1 shft_res;                             \
+        if (sa == 0) {                           \
+            shft_res = e2;                       \
+        } else {                                 \
+            shft_res = e2 >> sa;                 \
+        }                                        \
+        T2 mask;                                 \
+        mask = (1ll << sh) - 1;                  \
+        if (shft_res > mask) {                   \
+            return  mask;                        \
+        } else if (shft_res < -(mask + 1)) {     \
+            return  ~mask;                       \
+        } else {                                 \
+            return shft_res;                     \
+        }                                        \
+}
+
+SSRANS(B, int16_t, int8_t)
+SSRANS(H, int32_t, int16_t)
+SSRANS(W, int64_t, int32_t)
+
+#define VSSRAN(NAME, BIT, E1, E2, E3)                                       \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)              \
+{                                                                           \
+    int i, j, ofs;                                                          \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    VReg *Vk = (VReg *)vk;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    ofs = LSX_LEN / BIT;                                                    \
+    for (i = 0; i < oprsz / 16; i++) {                                      \
+        for (j = 0; j < ofs; j++) {                                         \
+            Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
+                                                Vk->E3(j + ofs * i) % BIT,  \
+                                                BIT / 2 - 1);               \
+        }                                                                   \
+        Vd->D(2 * i + 1) = 0;                                               \
+    }                                                                       \
+}
+
+VSSRAN(vssran_b_h, 16, B, H, UH)
+VSSRAN(vssran_h_w, 32, H, W, UW)
+VSSRAN(vssran_w_d, 64, W, D, UD)
+
+#define SSRLNU(E, T1, T2, T3)                    \
+static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
+{                                                \
+        T1 shft_res;                             \
+        if (sa == 0) {                           \
+            shft_res = e2;                       \
+        } else {                                 \
+            shft_res = (((T1)e2) >> sa);         \
+        }                                        \
+        T2 mask;                                 \
+        mask = (1ull << sh) - 1;                 \
+        if (shft_res > mask) {                   \
+            return mask;                         \
+        } else {                                 \
+            return shft_res;                     \
+        }                                        \
+}
+
+SSRLNU(B, uint16_t, uint8_t,  int16_t)
+SSRLNU(H, uint32_t, uint16_t, int32_t)
+SSRLNU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRLNU(NAME, BIT, E1, E2, E3)                                      \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)              \
+{                                                                           \
+    int i, j, ofs;                                                          \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    VReg *Vk = (VReg *)vk;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    ofs = LSX_LEN / BIT;                                                    \
+    for (i = 0; i < oprsz / 16; i++) {                                      \
+        for (j = 0; j < ofs; j++) {                                         \
+            Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
+                                                Vk->E3(j + ofs * i) % BIT,  \
+                                                BIT / 2);                   \
+        }                                                                   \
+        Vd->D(2 * i + 1) = 0;                                               \
+    }                                                                       \
+}
+
+VSSRLNU(vssrln_bu_h, 16, B, H, UH)
+VSSRLNU(vssrln_hu_w, 32, H, W, UW)
+VSSRLNU(vssrln_wu_d, 64, W, D, UD)
+
+#define SSRANU(E, T1, T2, T3)                    \
+static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
+{                                                \
+        T1 shft_res;                             \
+        if (sa == 0) {                           \
+            shft_res = e2;                       \
+        } else {                                 \
+            shft_res = e2 >> sa;                 \
+        }                                        \
+        if (e2 < 0) {                            \
+            shft_res = 0;                        \
+        }                                        \
+        T2 mask;                                 \
+        mask = (1ull << sh) - 1;                 \
+        if (shft_res > mask) {                   \
+            return mask;                         \
+        } else {                                 \
+            return shft_res;                     \
+        }                                        \
+}
+
+SSRANU(B, uint16_t, uint8_t,  int16_t)
+SSRANU(H, uint32_t, uint16_t, int32_t)
+SSRANU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRANU(NAME, BIT, E1, E2, E3)                                         \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                 \
+{                                                                              \
+    int i, j, ofs;                                                             \
+    VReg *Vd = (VReg *)vd;                                                     \
+    VReg *Vj = (VReg *)vj;                                                     \
+    VReg *Vk = (VReg *)vk;                                                     \
+    int oprsz = simd_oprsz(desc);                                              \
+                                                                               \
+    ofs = LSX_LEN / BIT;                                                       \
+    for (i = 0; i < oprsz / 16; i++) {                                         \
+        for (j = 0; j < ofs; j++) {                                            \
+            Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i),    \
+                                                    Vk->E3(j + ofs * i) % BIT, \
+                                                    BIT / 2);                  \
+        }                                                                      \
+        Vd->D(2 * i + 1) = 0;                                                  \
+    }                                                                          \
+}
+
+VSSRANU(vssran_bu_h, 16, B, H, UH)
+VSSRANU(vssran_hu_w, 32, H, W, UW)
+VSSRANU(vssran_wu_d, 64, W, D, UD)
+
+#define VSSRLNI(NAME, BIT, E1, E2)                                                 \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                 \
+{                                                                                  \
+    int i, j, ofs;                                                                 \
+    VReg temp = {};                                                                \
+    VReg *Vd = (VReg *)vd;                                                         \
+    VReg *Vj = (VReg *)vj;                                                         \
+    int oprsz = simd_oprsz(desc);                                                  \
+                                                                                   \
+    ofs = LSX_LEN / BIT;                                                           \
+    for (i = 0; i < oprsz / 16; i++) {                                             \
+        for (j = 0; j < ofs; j++) {                                                \
+            temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i),       \
+                                                     imm, BIT / 2 - 1);            \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
+                                                           imm, BIT / 2 - 1);      \
+        }                                                                          \
+    }                                                                              \
+    *Vd = temp;                                                                    \
+}
+
+static void do_vssrlni_q(VReg *Vd, VReg *Vj,
+                         uint64_t imm, int idx, Int128 mask)
+{
+    Int128 shft_res1, shft_res2;
+
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        shft_res1 = int128_urshift(Vj->Q(idx), imm);
+        shft_res2 = int128_urshift(Vd->Q(idx), imm);
+    }
+
+    if (int128_ult(mask, shft_res1)) {
+        Vd->D(idx * 2) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_ult(mask, shft_res2)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+}
+
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrlni_q(Vd, Vj, imm, i, mask);
+    }
+}
+
+VSSRLNI(vssrlni_b_h, 16, B, H)
+VSSRLNI(vssrlni_h_w, 32, H, W)
+VSSRLNI(vssrlni_w_d, 64, W, D)
+
+#define VSSRANI(NAME, BIT, E1, E2)                                                 \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                 \
+{                                                                                  \
+    int i, j, ofs;                                                                 \
+    VReg temp = {};                                                                \
+    VReg *Vd = (VReg *)vd;                                                         \
+    VReg *Vj = (VReg *)vj;                                                         \
+    int oprsz = simd_oprsz(desc);                                                  \
+                                                                                   \
+    ofs = LSX_LEN / BIT;                                                           \
+    for (i = 0; i < oprsz / 16; i++) {                                             \
+        for (j = 0; j < ofs; j++) {                                                \
+            temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i),       \
+                                                        imm, BIT / 2 - 1);         \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
+                                                              imm, BIT / 2 - 1);   \
+        }                                                                          \
+    }                                                                              \
+    *Vd = temp;                                                                    \
+}
+
+static void do_vssrani_d_q(VReg *Vd, VReg *Vj,
+                           uint64_t imm, int idx, Int128 mask, Int128 min)
+{
+    Int128 shft_res1, shft_res2;
+
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        shft_res1 = int128_rshift(Vj->Q(idx), imm);
+        shft_res2 = int128_rshift(Vd->Q(idx), imm);
+    }
+
+    if (int128_gt(shft_res1, mask)) {
+        Vd->D(idx * 2) = int128_getlo(mask);
+    } else if (int128_lt(shft_res1, int128_neg(min))) {
+        Vd->D(idx * 2) = int128_getlo(min);
+    } else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_gt(shft_res2, mask)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask);
+    } else if (int128_lt(shft_res2, int128_neg(min))) {
+        Vd->D(idx * 2 + 1) = int128_getlo(min);
+    } else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+}
+
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask, min;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+    min  = int128_lshift(int128_one(), 63);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrani_d_q(Vd, Vj, imm, i, mask, min);
+    }
+}
+
+
+VSSRANI(vssrani_b_h, 16, B, H)
+VSSRANI(vssrani_h_w, 32, H, W)
+VSSRANI(vssrani_w_d, 64, W, D)
+
+#define VSSRLNUI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                 \
+{                                                                                  \
+    int i, j, ofs;                                                                 \
+    VReg temp = {};                                                                \
+    VReg *Vd = (VReg *)vd;                                                         \
+    VReg *Vj = (VReg *)vj;                                                         \
+    int oprsz = simd_oprsz(desc);                                                  \
+                                                                                   \
+    ofs = LSX_LEN / BIT;                                                           \
+    for (i = 0; i < oprsz / 16; i++) {                                             \
+        for (j = 0; j < ofs; j++) {                                                \
+            temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i),       \
+                                                        imm, BIT / 2);             \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
+                                                              imm, BIT / 2);       \
+        }                                                                          \
+    }                                                                              \
+    *Vd = temp;                                                                    \
+}
+
+void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrlni_q(Vd, Vj, imm, i, mask);
+    }
+}
+
+VSSRLNUI(vssrlni_bu_h, 16, B, H)
+VSSRLNUI(vssrlni_hu_w, 32, H, W)
+VSSRLNUI(vssrlni_wu_d, 64, W, D)
+
+#define VSSRANUI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                 \
+{                                                                                  \
+    int i, j, ofs;                                                                 \
+    VReg temp = {};                                                                \
+    VReg *Vd = (VReg *)vd;                                                         \
+    VReg *Vj = (VReg *)vj;                                                         \
+    int oprsz = simd_oprsz(desc);                                                  \
+                                                                                   \
+    ofs = LSX_LEN / BIT;                                                           \
+    for (i = 0; i < oprsz / 16; i++) {                                             \
+        for (j = 0; j < ofs; j++) {                                                \
+            temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i),       \
+                                                        imm, BIT / 2);             \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
+                                                              imm, BIT / 2);       \
+        }                                                                          \
+    }                                                                              \
+    *Vd = temp;                                                                    \
+}
+
+static void do_vssrani_du_q(VReg *Vd, VReg *Vj,
+                            uint64_t imm, int idx, Int128 mask)
+{
+    Int128 shft_res1, shft_res2;
+
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        shft_res1 = int128_rshift(Vj->Q(idx), imm);
+        shft_res2 = int128_rshift(Vd->Q(idx), imm);
+    }
+
+    if (int128_lt(Vj->Q(idx), int128_zero())) {
+        shft_res1 = int128_zero();
+    }
+
+    if (int128_lt(Vd->Q(idx), int128_zero())) {
+        shft_res2 = int128_zero();
+    }
+    if (int128_ult(mask, shft_res1)) {
+        Vd->D(idx * 2) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_ult(mask, shft_res2)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+
+}
+
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrani_du_q(Vd, Vj, imm, i, mask);
+    }
+}
+
+VSSRANUI(vssrani_bu_h, 16, B, H)
+VSSRANUI(vssrani_hu_w, 32, H, W)
+VSSRANUI(vssrani_wu_d, 64, W, D)
+
+#define SSRLRNS(E1, E2, T1, T2, T3)                \
+static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
+{                                                  \
+    T1 shft_res;                                   \
+                                                   \
+    shft_res = do_vsrlr_ ## E2(e2, sa);            \
+    T1 mask;                                       \
+    mask = (1ull << sh) - 1;                       \
+    if (shft_res > mask) {                         \
+        return mask;                               \
+    } else {                                       \
+        return  shft_res;                          \
+    }                                              \
+}
+
+SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
+SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
+SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
+
+#define VSSRLRN(NAME, BIT, E1, E2, E3)                                         \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                 \
+{                                                                              \
+    int i, j, ofs;                                                             \
+    VReg *Vd = (VReg *)vd;                                                     \
+    VReg *Vj = (VReg *)vj;                                                     \
+    VReg *Vk = (VReg *)vk;                                                     \
+    int oprsz = simd_oprsz(desc);                                              \
+                                                                               \
+    ofs = LSX_LEN / BIT;                                                       \
+    for (i = 0; i < oprsz / 16; i++) {                                         \
+        for (j = 0; j < ofs; j++) {                                            \
+            Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i),   \
+                                                    Vk->E3(j + ofs * i) % BIT, \
+                                                    BIT / 2 - 1);              \
+        }                                                                      \
+        Vd->D(2 * i + 1) = 0;                                                  \
+    }                                                                          \
+}
+
+VSSRLRN(vssrlrn_b_h, 16, B, H, UH)
+VSSRLRN(vssrlrn_h_w, 32, H, W, UW)
+VSSRLRN(vssrlrn_w_d, 64, W, D, UD)
+
+#define SSRARNS(E1, E2, T1, T2)                    \
+static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
+{                                                  \
+    T1 shft_res;                                   \
+                                                   \
+    shft_res = do_vsrar_ ## E2(e2, sa);            \
+    T2 mask;                                       \
+    mask = (1ll << sh) - 1;                        \
+    if (shft_res > mask) {                         \
+        return  mask;                              \
+    } else if (shft_res < -(mask +1)) {            \
+        return  ~mask;                             \
+    } else {                                       \
+        return shft_res;                           \
+    }                                              \
+}
+
+SSRARNS(B, H, int16_t, int8_t)
+SSRARNS(H, W, int32_t, int16_t)
+SSRARNS(W, D, int64_t, int32_t)
+
+#define VSSRARN(NAME, BIT, E1, E2, E3)                                         \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                 \
+{                                                                              \
+    int i, j, ofs;                                                             \
+    VReg *Vd = (VReg *)vd;                                                     \
+    VReg *Vj = (VReg *)vj;                                                     \
+    VReg *Vk = (VReg *)vk;                                                     \
+    int oprsz = simd_oprsz(desc);                                              \
+                                                                               \
+    ofs = LSX_LEN / BIT;                                                       \
+    for (i = 0; i < oprsz / 16; i++) {                                         \
+        for (j = 0; j < ofs; j++) {                                            \
+            Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i),   \
+                                                    Vk->E3(j + ofs * i) % BIT, \
+                                                    BIT/ 2 - 1);               \
+        }                                                                      \
+        Vd->D(2 * i + 1) = 0;                                                  \
+    }                                                                          \
+}
+
+VSSRARN(vssrarn_b_h, 16, B, H, UH)
+VSSRARN(vssrarn_h_w, 32, H, W, UW)
+VSSRARN(vssrarn_w_d, 64, W, D, UD)
+
+#define SSRLRNU(E1, E2, T1, T2, T3)                \
+static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
+{                                                  \
+    T1 shft_res;                                   \
+                                                   \
+    shft_res = do_vsrlr_ ## E2(e2, sa);            \
+                                                   \
+    T2 mask;                                       \
+    mask = (1ull << sh) - 1;                       \
+    if (shft_res > mask) {                         \
+        return mask;                               \
+    } else {                                       \
+        return shft_res;                           \
+    }                                              \
+}
+
+SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
+SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
+SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
+
+#define VSSRLRNU(NAME, BIT, E1, E2, E3)                                        \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                 \
+{                                                                              \
+    int i, j, ofs;                                                             \
+    VReg *Vd = (VReg *)vd;                                                     \
+    VReg *Vj = (VReg *)vj;                                                     \
+    VReg *Vk = (VReg *)vk;                                                     \
+    int oprsz = simd_oprsz(desc);                                              \
+                                                                               \
+    ofs = LSX_LEN / BIT;                                                       \
+    for (i = 0; i < oprsz / 16; i++) {                                         \
+        for (j = 0; j < ofs; j++) {                                            \
+            Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i),   \
+                                                    Vk->E3(j + ofs * i) % BIT, \
+                                                    BIT / 2);                  \
+        }                                                                      \
+        Vd->D(2 * i + 1) = 0;                                                  \
+    }                                                                          \
+}
+
+VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH)
+VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW)
+VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD)
+
+#define SSRARNU(E1, E2, T1, T2, T3)                \
+static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
+{                                                  \
+    T1 shft_res;                                   \
+                                                   \
+    if (e2 < 0) {                                  \
+        shft_res = 0;                              \
+    } else {                                       \
+        shft_res = do_vsrar_ ## E2(e2, sa);        \
+    }                                              \
+    T2 mask;                                       \
+    mask = (1ull << sh) - 1;                       \
+    if (shft_res > mask) {                         \
+        return mask;                               \
+    } else {                                       \
+        return shft_res;                           \
+    }                                              \
+}
+
+SSRARNU(B, H, uint16_t, uint8_t, int16_t)
+SSRARNU(H, W, uint32_t, uint16_t, int32_t)
+SSRARNU(W, D, uint64_t, uint32_t, int64_t)
+
+#define VSSRARNU(NAME, BIT, E1, E2, E3)                                      \
+void HELPER(NAME)(void *vd, void *vj, void  *vk, uint32_t desc)              \
+{                                                                            \
+    int i, j, ofs;                                                           \
+    VReg *Vd = (VReg *)vd;                                                   \
+    VReg *Vj = (VReg *)vj;                                                   \
+    VReg *Vk = (VReg *)vk;                                                   \
+    int oprsz = simd_oprsz(desc);                                            \
+                                                                             \
+    ofs = LSX_LEN / BIT;                                                     \
+    for (i = 0; i < oprsz / 16; i++) {                                       \
+        for (j = 0; j < ofs; j++) {                                          \
+            Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
+                                                Vk->E3(j + ofs * i) % BIT,   \
+                                                BIT / 2);                    \
+        }                                                                    \
+        Vd->D(2 * i + 1) = 0;                                                \
+    }                                                                        \
+}
+
+VSSRARNU(vssrarn_bu_h, 16, B, H, UH)
+VSSRARNU(vssrarn_hu_w, 32, H, W, UW)
+VSSRARNU(vssrarn_wu_d, 64, W, D, UD)
+
+#define VSSRLRNI(NAME, BIT, E1, E2)                                                 \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                  \
+{                                                                                   \
+    int i, j, ofs;                                                                  \
+    VReg temp = {};                                                                 \
+    VReg *Vd = (VReg *)vd;                                                          \
+    VReg *Vj = (VReg *)vj;                                                          \
+    int oprsz = simd_oprsz(desc);                                                   \
+                                                                                    \
+    ofs = LSX_LEN / BIT;                                                            \
+    for (i = 0; i < oprsz / 16; i++) {                                              \
+        for (j = 0; j < ofs; j++) {                                                 \
+            temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i),       \
+                                                         imm, BIT / 2 - 1);         \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
+                                                               imm, BIT / 2 - 1);   \
+        }                                                                           \
+    }                                                                               \
+    *Vd = temp;                                                                     \
+}
+
+static void do_vssrlrni_q(VReg *Vd, VReg * Vj,
+                          uint64_t imm, int idx, Int128 mask)
+{
+    Int128 shft_res1, shft_res2, r1, r2;
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
+        r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
+        shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
+        shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
+    }
+
+    if (int128_ult(mask, shft_res1)) {
+        Vd->D(idx * 2) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_ult(mask, shft_res2)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask);
+    }else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+}
+
+void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrlrni_q(Vd, Vj, imm, i, mask);
+    }
+}
+
+VSSRLRNI(vssrlrni_b_h, 16, B, H)
+VSSRLRNI(vssrlrni_h_w, 32, H, W)
+VSSRLRNI(vssrlrni_w_d, 64, W, D)
+
+#define VSSRARNI(NAME, BIT, E1, E2)                                                 \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                  \
+{                                                                                   \
+    int i, j, ofs;                                                                  \
+    VReg temp = {};                                                                 \
+    VReg *Vd = (VReg *)vd;                                                          \
+    VReg *Vj = (VReg *)vj;                                                          \
+    int oprsz = simd_oprsz(desc);                                                   \
+                                                                                    \
+    ofs = LSX_LEN / BIT;                                                            \
+    for (i = 0; i < oprsz / 16; i++) {                                              \
+        for (j = 0; j < ofs; j++) {                                                 \
+            temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i),       \
+                                                         imm, BIT / 2 - 1);         \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
+                                                               imm, BIT / 2 - 1);   \
+        }                                                                           \
+    }                                                                               \
+    *Vd = temp;                                                                     \
+}
+
+static void do_vssrarni_d_q(VReg *Vd, VReg *Vj,
+                           uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+    Int128 shft_res1, shft_res2, r1, r2;
+
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+        r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+        shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+        shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
+    }
+    if (int128_gt(shft_res1, mask1)) {
+        Vd->D(idx * 2) = int128_getlo(mask1);
+    } else if (int128_lt(shft_res1, int128_neg(mask2))) {
+        Vd->D(idx * 2) = int128_getlo(mask2);
+    } else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_gt(shft_res2, mask1)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask1);
+    } else if (int128_lt(shft_res2, int128_neg(mask2))) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask2);
+    } else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+}
+
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask1, mask2;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+    mask2  = int128_lshift(int128_one(), 63);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2);
+    }
+}
+
+VSSRARNI(vssrarni_b_h, 16, B, H)
+VSSRARNI(vssrarni_h_w, 32, H, W)
+VSSRARNI(vssrarni_w_d, 64, W, D)
+
+#define VSSRLRNUI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                  \
+{                                                                                   \
+    int i, j, ofs;                                                                  \
+    VReg temp = {};                                                                 \
+    VReg *Vd = (VReg *)vd;                                                          \
+    VReg *Vj = (VReg *)vj;                                                          \
+    int oprsz = simd_oprsz(desc);                                                   \
+                                                                                    \
+    ofs = LSX_LEN / BIT;                                                            \
+    for (i = 0; i < oprsz / 16; i++) {                                              \
+        for (j = 0; j < ofs; j++) {                                                 \
+            temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i),       \
+                                                         imm, BIT / 2);             \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
+                                                               imm, BIT / 2);       \
+        }                                                                           \
+    }                                                                               \
+    *Vd = temp;                                                                     \
+}
+
+void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrlrni_q(Vd, Vj, imm, i, mask);
+    }
+}
+
+VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
+VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
+VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
+
+#define VSSRARNUI(NAME, BIT, E1, E2)                                                \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)                  \
+{                                                                                   \
+    int i, j, ofs;                                                                  \
+    VReg temp = {};                                                                 \
+    VReg *Vd = (VReg *)vd;                                                          \
+    VReg *Vj = (VReg *)vj;                                                          \
+    int oprsz = simd_oprsz(desc);                                                   \
+                                                                                    \
+    ofs = LSX_LEN / BIT;                                                            \
+    for (i = 0; i < oprsz / 16; i++) {                                              \
+        for (j = 0; j < ofs; j++) {                                                 \
+            temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i),       \
+                                                         imm, BIT / 2);             \
+            temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
+                                                               imm, BIT / 2);       \
+        }                                                                           \
+    }                                                                               \
+    *Vd = temp;                                                                     \
+}
+
+static void do_vssrarni_du_q(VReg *Vd, VReg *Vj,
+                             uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+    Int128 shft_res1, shft_res2, r1, r2;
+
+    if (imm == 0) {
+        shft_res1 = Vj->Q(idx);
+        shft_res2 = Vd->Q(idx);
+    } else {
+        r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+        r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+        shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+        shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
+    }
+
+    if (int128_lt(Vj->Q(idx), int128_zero())) {
+        shft_res1 = int128_zero();
+    }
+    if (int128_lt(Vd->Q(idx), int128_zero())) {
+        shft_res2 = int128_zero();
+    }
+
+    if (int128_gt(shft_res1,  mask1)) {
+        Vd->D(idx * 2) = int128_getlo(mask1);
+    } else if (int128_lt(shft_res1, int128_neg(mask2))) {
+        Vd->D(idx * 2) = int128_getlo(mask2);
+    } else {
+        Vd->D(idx * 2) = int128_getlo(shft_res1);
+    }
+
+    if (int128_gt(shft_res2, mask1)) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask1);
+    } else if (int128_lt(shft_res2, int128_neg(mask2))) {
+        Vd->D(idx * 2 + 1) = int128_getlo(mask2);
+    } else {
+        Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+    }
+}
+
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    Int128 mask1, mask2;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+    mask2  = int128_lshift(int128_one(), 64);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2);
+    }
+}
+
+VSSRARNUI(vssrarni_bu_h, 16, B, H)
+VSSRARNUI(vssrarni_hu_w, 32, H, W)
+VSSRARNUI(vssrarni_wu_d, 64, W, D)
+
+#define DO_2OP(NAME, BIT, E, DO_OP)                  \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{                                                    \
+    int i;                                           \
+    VReg *Vd = (VReg *)vd;                           \
+    VReg *Vj = (VReg *)vj;                           \
+    int oprsz = simd_oprsz(desc);                    \
+                                                     \
+    for (i = 0; i < oprsz / (BIT / 8); i++)          \
+    {                                                \
+        Vd->E(i) = DO_OP(Vj->E(i));                  \
+    }                                                \
+}
+
+DO_2OP(vclo_b, 8, UB, DO_CLO_B)
+DO_2OP(vclo_h, 16, UH, DO_CLO_H)
+DO_2OP(vclo_w, 32, UW, DO_CLO_W)
+DO_2OP(vclo_d, 64, UD, DO_CLO_D)
+DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
+DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
+DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
+DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
+
+#define VPCNT(NAME, BIT, E, FN)                      \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{                                                    \
+    int i;                                           \
+    VReg *Vd = (VReg *)vd;                           \
+    VReg *Vj = (VReg *)vj;                           \
+    int oprsz = simd_oprsz(desc);                    \
+                                                     \
+    for (i = 0; i < oprsz / (BIT / 8); i++)          \
+    {                                                \
+        Vd->E(i) = FN(Vj->E(i));                     \
+    }                                                \
+}
+
+VPCNT(vpcnt_b, 8, UB, ctpop8)
+VPCNT(vpcnt_h, 16, UH, ctpop16)
+VPCNT(vpcnt_w, 32, UW, ctpop32)
+VPCNT(vpcnt_d, 64, UD, ctpop64)
+
+#define DO_BIT(NAME, BIT, E, DO_OP)                            \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT);              \
+    }                                                          \
+}
+
+DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
+DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
+DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
+DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
+DO_BIT(vbitset_b, 8, UB, DO_BITSET)
+DO_BIT(vbitset_h, 16, UH, DO_BITSET)
+DO_BIT(vbitset_w, 32, UW, DO_BITSET)
+DO_BIT(vbitset_d, 64, UD, DO_BITSET)
+DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
+DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
+DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
+DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
+
+#define DO_BITI(NAME, BIT, E, DO_OP)                               \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = DO_OP(Vj->E(i), imm);                           \
+    }                                                              \
+}
+
+DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
+DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
+DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
+DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
+DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
+DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
+DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
+DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
+DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
+DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
+DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
+DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
+
+#define VFRSTP(NAME, BIT, MASK, E)                             \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i, j, m, ofs;                                          \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    ofs = LSX_LEN / BIT;                                       \
+    for (i = 0; i < oprsz / 16; i++) {                         \
+        m = Vk->E(i * ofs) & MASK;                             \
+        for (j = 0; j < ofs; j++) {                            \
+            if (Vj->E(j + ofs * i) < 0) {                      \
+                break;                                         \
+            }                                                  \
+        }                                                      \
+        Vd->E(m + i * ofs) = j;                                \
+    }                                                          \
+}
+
+VFRSTP(vfrstp_b, 8, 0xf, B)
+VFRSTP(vfrstp_h, 16, 0x7, H)
+
+#define VFRSTPI(NAME, BIT, E)                                      \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i, j, m, ofs;                                              \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    ofs = LSX_LEN / BIT;                                           \
+    m = imm % ofs;                                                 \
+    for (i = 0; i < oprsz / 16; i++) {                             \
+        for (j = 0; j < ofs; j++) {                                \
+            if (Vj->E(j + ofs * i) < 0) {                          \
+                break;                                             \
+            }                                                      \
+        }                                                          \
+        Vd->E(m + i * ofs) = j;                                    \
+    }                                                              \
+}
+
+VFRSTPI(vfrstpi_b, 8,  B)
+VFRSTPI(vfrstpi_h, 16, H)
+
+static void vec_update_fcsr0_mask(CPULoongArchState *env,
+                                  uintptr_t pc, int mask)
+{
+    int flags = get_float_exception_flags(&env->fp_status);
+
+    set_float_exception_flags(0, &env->fp_status);
+
+    flags &= ~mask;
+
+    if (flags) {
+        flags = ieee_ex_to_loongarch(flags);
+        UPDATE_FP_CAUSE(env->fcsr0, flags);
+    }
+
+    if (GET_FP_ENABLES(env->fcsr0) & flags) {
+        do_raise_exception(env, EXCCODE_FPE, pc);
+    } else {
+        UPDATE_FP_FLAGS(env->fcsr0, flags);
+    }
+}
+
+static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
+{
+    vec_update_fcsr0_mask(env, pc, 0);
+}
+
+static inline void vec_clear_cause(CPULoongArchState *env)
+{
+    SET_FP_CAUSE(env->fcsr0, 0);
+}
+
+#define DO_3OP_F(NAME, BIT, E, FN)                          \
+void HELPER(NAME)(void *vd, void *vj, void *vk,             \
+                  CPULoongArchState *env, uint32_t desc)    \
+{                                                           \
+    int i;                                                  \
+    VReg *Vd = (VReg *)vd;                                  \
+    VReg *Vj = (VReg *)vj;                                  \
+    VReg *Vk = (VReg *)vk;                                  \
+    int oprsz = simd_oprsz(desc);                           \
+                                                            \
+    vec_clear_cause(env);                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {               \
+        Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
+        vec_update_fcsr0(env, GETPC());                     \
+    }                                                       \
+}
+
+DO_3OP_F(vfadd_s, 32, UW, float32_add)
+DO_3OP_F(vfadd_d, 64, UD, float64_add)
+DO_3OP_F(vfsub_s, 32, UW, float32_sub)
+DO_3OP_F(vfsub_d, 64, UD, float64_sub)
+DO_3OP_F(vfmul_s, 32, UW, float32_mul)
+DO_3OP_F(vfmul_d, 64, UD, float64_mul)
+DO_3OP_F(vfdiv_s, 32, UW, float32_div)
+DO_3OP_F(vfdiv_d, 64, UD, float64_div)
+DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
+DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
+DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
+DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
+DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
+DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
+DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
+DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
+
+#define DO_4OP_F(NAME, BIT, E, FN, flags)                                    \
+void HELPER(NAME)(void *vd, void *vj, void *vk, void *va,                    \
+                  CPULoongArchState *env, uint32_t desc)                     \
+{                                                                            \
+    int i;                                                                   \
+    VReg *Vd = (VReg *)vd;                                                   \
+    VReg *Vj = (VReg *)vj;                                                   \
+    VReg *Vk = (VReg *)vk;                                                   \
+    VReg *Va = (VReg *)va;                                                   \
+    int oprsz = simd_oprsz(desc);                                            \
+                                                                             \
+    vec_clear_cause(env);                                                    \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                                \
+        Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
+        vec_update_fcsr0(env, GETPC());                                      \
+    }                                                                        \
+}
+
+DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
+DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
+DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
+DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
+DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
+DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
+DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
+         float_muladd_negate_c | float_muladd_negate_result)
+DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
+         float_muladd_negate_c | float_muladd_negate_result)
+
+#define DO_2OP_F(NAME, BIT, E, FN)                       \
+void HELPER(NAME)(void *vd, void *vj,                    \
+                  CPULoongArchState *env, uint32_t desc) \
+{                                                        \
+    int i;                                               \
+    VReg *Vd = (VReg *)vd;                               \
+    VReg *Vj = (VReg *)vj;                               \
+    int oprsz = simd_oprsz(desc);                        \
+                                                         \
+    vec_clear_cause(env);                                \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {            \
+        Vd->E(i) = FN(env, Vj->E(i));                    \
+    }                                                    \
+}
+
+#define FLOGB(BIT, T)                                            \
+static T do_flogb_## BIT(CPULoongArchState *env, T fj)           \
+{                                                                \
+    T fp, fd;                                                    \
+    float_status *status = &env->fp_status;                      \
+    FloatRoundMode old_mode = get_float_rounding_mode(status);   \
+                                                                 \
+    set_float_rounding_mode(float_round_down, status);           \
+    fp = float ## BIT ##_log2(fj, status);                       \
+    fd = float ## BIT ##_round_to_int(fp, status);               \
+    set_float_rounding_mode(old_mode, status);                   \
+    vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact);     \
+    return fd;                                                   \
+}
+
+FLOGB(32, uint32_t)
+FLOGB(64, uint64_t)
+
+#define FCLASS(NAME, BIT, E, FN)                         \
+void HELPER(NAME)(void *vd, void *vj,                    \
+                  CPULoongArchState *env, uint32_t desc) \
+{                                                        \
+    int i;                                               \
+    VReg *Vd = (VReg *)vd;                               \
+    VReg *Vj = (VReg *)vj;                               \
+    int oprsz = simd_oprsz(desc);                        \
+                                                         \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {            \
+        Vd->E(i) = FN(env, Vj->E(i));                    \
+    }                                                    \
+}
+
+FCLASS(vfclass_s, 32, UW, helper_fclass_s)
+FCLASS(vfclass_d, 64, UD, helper_fclass_d)
+
+#define FSQRT(BIT, T)                                  \
+static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
+{                                                      \
+    T fd;                                              \
+    fd = float ## BIT ##_sqrt(fj, &env->fp_status);    \
+    vec_update_fcsr0(env, GETPC());                    \
+    return fd;                                         \
+}
+
+FSQRT(32, uint32_t)
+FSQRT(64, uint64_t)
+
+#define FRECIP(BIT, T)                                                  \
+static T do_frecip_## BIT(CPULoongArchState *env, T fj)                 \
+{                                                                       \
+    T fd;                                                               \
+    fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
+    vec_update_fcsr0(env, GETPC());                                     \
+    return fd;                                                          \
+}
+
+FRECIP(32, uint32_t)
+FRECIP(64, uint64_t)
+
+#define FRSQRT(BIT, T)                                                  \
+static T do_frsqrt_## BIT(CPULoongArchState *env, T fj)                 \
+{                                                                       \
+    T fd, fp;                                                           \
+    fp = float ## BIT ##_sqrt(fj, &env->fp_status);                     \
+    fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
+    vec_update_fcsr0(env, GETPC());                                     \
+    return fd;                                                          \
+}
+
+FRSQRT(32, uint32_t)
+FRSQRT(64, uint64_t)
+
+DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
+DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
+DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
+DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
+DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
+DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
+DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
+DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
+
+static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
+{
+    return float16_to_float32(h, true, status);
+}
+static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
+{
+    return float32_to_float64(s, status);
+}
+
+static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
+{
+    return float32_to_float16(s, true, status);
+}
+static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
+{
+    return float64_to_float32(d, status);
+}
+
+void HELPER(vfcvtl_s_h)(void *vd, void *vj,
+                        CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 32;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i),
+                                                      &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfcvtl_d_s)(void *vd, void *vj,
+                        CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i),
+                                                       &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfcvth_s_h)(void *vd, void *vj,
+                        CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 32;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)),
+                                                       &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfcvth_d_s)(void *vd, void *vj,
+                        CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)),
+                                                        &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
+                       CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 32;
+    vec_clear_cause(env);
+    for(i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i),
+                                                                 &env->fp_status);
+            temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i),
+                                                           &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
+                       CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for(i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i),
+                                                                 &env->fp_status);
+            temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i),
+                                                           &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vfrint_s)(void *vd, void *vj,
+                      CPULoongArchState *env, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 4; i++) {
+        Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
+        vec_update_fcsr0(env, GETPC());
+    }
+}
+
+void HELPER(vfrint_d)(void *vd, void *vj,
+                      CPULoongArchState *env, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 8; i++) {
+        Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
+        vec_update_fcsr0(env, GETPC());
+    }
+}
+
+#define FCVT_2OP(NAME, BIT, E, MODE)                                        \
+void HELPER(NAME)(void *vd, void *vj,                                       \
+                  CPULoongArchState *env, uint32_t desc)                    \
+{                                                                           \
+    int i;                                                                  \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    vec_clear_cause(env);                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                               \
+        FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
+        set_float_rounding_mode(MODE, &env->fp_status);                     \
+        Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
+        set_float_rounding_mode(old_mode, &env->fp_status);                 \
+        vec_update_fcsr0(env, GETPC());                                     \
+    }                                                                       \
+}
+
+FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
+FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
+FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
+FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
+FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
+FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
+FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
+FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
+
+#define FTINT(NAME, FMT1, FMT2, T1, T2,  MODE)                          \
+static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj)               \
+{                                                                       \
+    T2 fd;                                                              \
+    FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
+                                                                        \
+    set_float_rounding_mode(MODE, &env->fp_status);                     \
+    fd = do_## FMT1 ##_to_## FMT2(env, fj);                             \
+    set_float_rounding_mode(old_mode, &env->fp_status);                 \
+    return fd;                                                          \
+}
+
+#define DO_FTINT(FMT1, FMT2, T1, T2)                                         \
+static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj)            \
+{                                                                            \
+    T2 fd;                                                                   \
+                                                                             \
+    fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);                            \
+    if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
+        if (FMT1 ##_is_any_nan(fj)) {                                        \
+            fd = 0;                                                          \
+        }                                                                    \
+    }                                                                        \
+    vec_update_fcsr0(env, GETPC());                                          \
+    return fd;                                                               \
+}
+
+DO_FTINT(float32, int32, uint32_t, uint32_t)
+DO_FTINT(float64, int64, uint64_t, uint64_t)
+DO_FTINT(float32, uint32, uint32_t, uint32_t)
+DO_FTINT(float64, uint64, uint64_t, uint64_t)
+DO_FTINT(float64, int32, uint64_t, uint32_t)
+DO_FTINT(float32, int64, uint32_t, uint64_t)
+
+FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
+FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
+FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
+FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
+FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
+FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
+FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
+FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
+
+DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
+DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
+DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
+DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
+DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
+DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
+DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
+DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
+DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
+DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
+
+FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
+FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
+
+DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
+DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
+DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
+DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
+
+FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
+FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
+FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
+FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
+
+#define FTINT_W_D(NAME, FN)                                               \
+void HELPER(NAME)(void *vd, void *vj, void *vk,                           \
+                  CPULoongArchState *env, uint32_t desc)                  \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    VReg *Vk = (VReg *)vk;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / 64;                                                   \
+    vec_clear_cause(env);                                                 \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \
+            temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i));       \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+FTINT_W_D(vftint_w_d, do_float64_to_int32)
+FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
+FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
+FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
+FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
+
+FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
+FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
+FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
+FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
+FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
+FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
+FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
+FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
+
+#define FTINTL_L_S(NAME, FN)                                        \
+void HELPER(NAME)(void *vd, void *vj,                               \
+                  CPULoongArchState *env, uint32_t desc)            \
+{                                                                   \
+    int i, j, ofs;                                                  \
+    VReg temp;                                                      \
+    VReg *Vd = (VReg *)vd;                                          \
+    VReg *Vj = (VReg *)vj;                                          \
+    int oprsz = simd_oprsz(desc);                                   \
+                                                                    \
+    ofs = LSX_LEN / 64;                                             \
+    vec_clear_cause(env);                                           \
+    for (i = 0; i < oprsz / 16; i++) {                              \
+        for (j = 0; j < ofs; j++) {                                 \
+            temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
+        }                                                           \
+    }                                                               \
+    *Vd = temp;                                                     \
+}
+
+FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
+FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
+FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
+FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
+FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
+
+#define FTINTH_L_S(NAME, FN)                                              \
+void HELPER(NAME)(void *vd, void *vj,                                     \
+                  CPULoongArchState *env, uint32_t desc)                  \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / 64;                                                   \
+    vec_clear_cause(env);                                                 \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
+FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
+FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
+FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
+FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
+
+#define FFINT(NAME, FMT1, FMT2, T1, T2)                    \
+static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
+{                                                          \
+    T2 fd;                                                 \
+                                                           \
+    fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);          \
+    vec_update_fcsr0(env, GETPC());                        \
+    return fd;                                             \
+}
+
+FFINT(s_w, int32, float32, int32_t, uint32_t)
+FFINT(d_l, int64, float64, int64_t, uint64_t)
+FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
+FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
+
+DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
+DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
+DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
+DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
+
+void HELPER(vffintl_d_w)(void *vd, void *vj,
+                         CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
+                                                   &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vffinth_d_w)(void *vd, void *vj,
+                         CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz /16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)),
+                                                   &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
+                        CPULoongArchState *env, uint32_t desc)
+{
+    int i, j, ofs;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    int oprsz = simd_oprsz(desc);
+
+    ofs = LSX_LEN / 64;
+    vec_clear_cause(env);
+    for (i = 0; i < oprsz / 16; i++) {
+        for (j = 0; j < ofs; j++) {
+            temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i),
+                                                             &env->fp_status);
+            temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i),
+                                                       &env->fp_status);
+        }
+        vec_update_fcsr0(env, GETPC());
+    }
+    *Vd = temp;
+}
+
+#define VCMPI(NAME, BIT, E, DO_OP)                                 \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    typedef __typeof(Vd->E(0)) TD;                                 \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                       \
+    }                                                              \
+}
+
+VCMPI(vseqi_b, 8, B, VSEQ)
+VCMPI(vseqi_h, 16, H, VSEQ)
+VCMPI(vseqi_w, 32, W, VSEQ)
+VCMPI(vseqi_d, 64, D, VSEQ)
+VCMPI(vslei_b, 8, B, VSLE)
+VCMPI(vslei_h, 16, H, VSLE)
+VCMPI(vslei_w, 32, W, VSLE)
+VCMPI(vslei_d, 64, D, VSLE)
+VCMPI(vslei_bu, 8, UB, VSLE)
+VCMPI(vslei_hu, 16, UH, VSLE)
+VCMPI(vslei_wu, 32, UW, VSLE)
+VCMPI(vslei_du, 64, UD, VSLE)
+VCMPI(vslti_b, 8, B, VSLT)
+VCMPI(vslti_h, 16, H, VSLT)
+VCMPI(vslti_w, 32, W, VSLT)
+VCMPI(vslti_d, 64, D, VSLT)
+VCMPI(vslti_bu, 8, UB, VSLT)
+VCMPI(vslti_hu, 16, UH, VSLT)
+VCMPI(vslti_wu, 32, UW, VSLT)
+VCMPI(vslti_du, 64, UD, VSLT)
+
+static uint64_t vfcmp_common(CPULoongArchState *env,
+                             FloatRelation cmp, uint32_t flags)
+{
+    uint64_t ret = 0;
+
+    switch (cmp) {
+    case float_relation_less:
+        ret = (flags & FCMP_LT);
+        break;
+    case float_relation_equal:
+        ret = (flags & FCMP_EQ);
+        break;
+    case float_relation_greater:
+        ret = (flags & FCMP_GT);
+        break;
+    case float_relation_unordered:
+        ret = (flags & FCMP_UN);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (ret) {
+        ret = -1;
+    }
+
+    return ret;
+}
+
+#define VFCMP(NAME, BIT, E, FN)                                          \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz,                \
+                  uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
+{                                                                        \
+    int i;                                                               \
+    VReg t;                                                              \
+    VReg *Vd = &(env->fpr[vd].vreg);                                     \
+    VReg *Vj = &(env->fpr[vj].vreg);                                     \
+    VReg *Vk = &(env->fpr[vk].vreg);                                     \
+                                                                         \
+    vec_clear_cause(env);                                                \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                            \
+        FloatRelation cmp;                                               \
+        cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status);                   \
+        t.E(i) = vfcmp_common(env, cmp, flags);                          \
+        vec_update_fcsr0(env, GETPC());                                  \
+    }                                                                    \
+    *Vd = t;                                                             \
+}
+
+VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
+VFCMP(vfcmp_s_s, 32, UW, float32_compare)
+VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
+VFCMP(vfcmp_s_d, 64, UD, float64_compare)
+
+void HELPER(vbitseli_b)(void *vd, void *vj,  uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    for (i = 0; i < simd_oprsz(desc); i++) {
+        Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
+    }
+}
+
+/* Copy from target/arm/tcg/sve_helper.c */
+static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
+{
+    int bits = 8 << esz;
+    uint64_t ones = dup_const(esz, 1);
+    uint64_t signs = ones << (bits - 1);
+    uint64_t cmp0, cmp1;
+
+    cmp1 = dup_const(esz, n);
+    cmp0 = cmp1 ^ m0;
+    cmp1 = cmp1 ^ m1;
+    cmp0 = (cmp0 - ones) & ~cmp0;
+    cmp1 = (cmp1 - ones) & ~cmp1;
+    return (cmp0 | cmp1) & signs;
+}
+
+#define SETANYEQZ(NAME, MO)                                       \
+void HELPER(NAME)(CPULoongArchState *env,                         \
+                  uint32_t oprsz, uint32_t cd, uint32_t vj)       \
+{                                                                 \
+    VReg *Vj = &(env->fpr[vj].vreg);                              \
+                                                                  \
+    env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO);     \
+    if (oprsz == 32) {                                            \
+        env->cf[cd & 0x7] = env->cf[cd & 0x7] ||                  \
+                            do_match2(0, Vj->D(2), Vj->D(3), MO); \
+    }                                                             \
+}
+
+SETANYEQZ(vsetanyeqz_b, MO_8)
+SETANYEQZ(vsetanyeqz_h, MO_16)
+SETANYEQZ(vsetanyeqz_w, MO_32)
+SETANYEQZ(vsetanyeqz_d, MO_64)
+
+#define SETALLNEZ(NAME, MO)                                        \
+void HELPER(NAME)(CPULoongArchState *env,                          \
+                  uint32_t oprsz, uint32_t cd, uint32_t vj)        \
+{                                                                  \
+    VReg *Vj = &(env->fpr[vj].vreg);                               \
+                                                                   \
+    env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO);      \
+    if (oprsz == 32) {                                             \
+        env->cf[cd & 0x7] = env->cf[cd & 0x7] &&                   \
+                            !do_match2(0, Vj->D(2), Vj->D(3), MO); \
+    }                                                              \
+}
+
+SETALLNEZ(vsetallnez_b, MO_8)
+SETALLNEZ(vsetallnez_h, MO_16)
+SETALLNEZ(vsetallnez_w, MO_32)
+SETALLNEZ(vsetallnez_d, MO_64)
+
+#define XVINSVE0(NAME, E, MASK)                                    \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    Vd->E(imm & MASK) = Vj->E(0);                                  \
+}
+
+XVINSVE0(xvinsve0_w, W, 0x7)
+XVINSVE0(xvinsve0_d, D, 0x3)
+
+#define XVPICKVE(NAME, E, BIT, MASK)                               \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    Vd->E(0) = Vj->E(imm & MASK);                                  \
+    for (i = 1; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = 0;                                              \
+    }                                                              \
+}
+
+XVPICKVE(xvpickve_w, W, 32, 0x7)
+XVPICKVE(xvpickve_d, D, 64, 0x3)
+
+#define VPACKEV(NAME, BIT, E)                                  \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg temp = {};                                            \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        temp.E(2 * i + 1) = Vj->E(2 * i);                      \
+        temp.E(2 *i) = Vk->E(2 * i);                           \
+    }                                                          \
+    *Vd = temp;                                                \
+}
+
+VPACKEV(vpackev_b, 16, B)
+VPACKEV(vpackev_h, 32, H)
+VPACKEV(vpackev_w, 64, W)
+VPACKEV(vpackev_d, 128, D)
+
+#define VPACKOD(NAME, BIT, E)                                  \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg temp = {};                                            \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                 \
+        temp.E(2 * i + 1) = Vj->E(2 * i + 1);                  \
+        temp.E(2 * i) = Vk->E(2 * i + 1);                      \
+    }                                                          \
+    *Vd = temp;                                                \
+}
+
+VPACKOD(vpackod_b, 16, B)
+VPACKOD(vpackod_h, 32, H)
+VPACKOD(vpackod_w, 64, W)
+VPACKOD(vpackod_d, 128, D)
+
+#define VPICKEV(NAME, BIT, E)                                         \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)        \
+{                                                                     \
+    int i, j, ofs;                                                    \
+    VReg temp = {};                                                   \
+    VReg *Vd = (VReg *)vd;                                            \
+    VReg *Vj = (VReg *)vj;                                            \
+    VReg *Vk = (VReg *)vk;                                            \
+    int oprsz = simd_oprsz(desc);                                     \
+                                                                      \
+    ofs = LSX_LEN / BIT;                                              \
+    for (i = 0; i < oprsz / 16; i++) {                                \
+        for (j = 0; j < ofs; j++) {                                   \
+            temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
+            temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i));       \
+        }                                                             \
+    }                                                                 \
+    *Vd = temp;                                                       \
+}
+
+VPICKEV(vpickev_b, 16, B)
+VPICKEV(vpickev_h, 32, H)
+VPICKEV(vpickev_w, 64, W)
+VPICKEV(vpickev_d, 128, D)
+
+#define VPICKOD(NAME, BIT, E)                                             \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    VReg *Vk = (VReg *)vk;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
+            temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1);       \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+VPICKOD(vpickod_b, 16, B)
+VPICKOD(vpickod_h, 32, H)
+VPICKOD(vpickod_w, 64, W)
+VPICKOD(vpickod_d, 128, D)
+
+#define VILVL(NAME, BIT, E)                                         \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)      \
+{                                                                   \
+    int i, j, ofs;                                                  \
+    VReg temp = {};                                                 \
+    VReg *Vd = (VReg *)vd;                                          \
+    VReg *Vj = (VReg *)vj;                                          \
+    VReg *Vk = (VReg *)vk;                                          \
+    int oprsz = simd_oprsz(desc);                                   \
+                                                                    \
+    ofs = LSX_LEN / BIT;                                            \
+    for (i = 0; i < oprsz / 16; i++) {                              \
+        for (j = 0; j < ofs; j++) {                                 \
+            temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
+            temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i);     \
+        }                                                           \
+    }                                                               \
+    *Vd = temp;                                                     \
+}
+
+VILVL(vilvl_b, 16, B)
+VILVL(vilvl_h, 32, H)
+VILVL(vilvl_w, 64, W)
+VILVL(vilvl_d, 128, D)
+
+#define VILVH(NAME, BIT, E)                                               \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
+{                                                                         \
+    int i, j, ofs;                                                        \
+    VReg temp = {};                                                       \
+    VReg *Vd = (VReg *)vd;                                                \
+    VReg *Vj = (VReg *)vj;                                                \
+    VReg *Vk = (VReg *)vk;                                                \
+    int oprsz = simd_oprsz(desc);                                         \
+                                                                          \
+    ofs = LSX_LEN / BIT;                                                  \
+    for (i = 0; i < oprsz / 16; i++) {                                    \
+        for (j = 0; j < ofs; j++) {                                       \
+            temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
+            temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1));     \
+        }                                                                 \
+    }                                                                     \
+    *Vd = temp;                                                           \
+}
+
+VILVH(vilvh_b, 16, B)
+VILVH(vilvh_h, 32, H)
+VILVH(vilvh_w, 64, W)
+VILVH(vilvh_d, 128, D)
+
+void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
+{
+    int i, j, m;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+    VReg *Va = (VReg *)va;
+    int oprsz = simd_oprsz(desc);
+
+    m = LSX_LEN / 8;
+    for (i = 0; i < (oprsz / 16) * m; i++) {
+        j = i < m ? 0 : 1;
+        uint64_t k = (uint8_t)Va->B(i) % (2 * m);
+        temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m);
+    }
+    *Vd = temp;
+}
+
+#define VSHUF(NAME, BIT, E)                                            \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)         \
+{                                                                      \
+    int i, j, m;                                                       \
+    VReg temp = {};                                                    \
+    VReg *Vd = (VReg *)vd;                                             \
+    VReg *Vj = (VReg *)vj;                                             \
+    VReg *Vk = (VReg *)vk;                                             \
+    int oprsz = simd_oprsz(desc);                                      \
+                                                                       \
+    m = LSX_LEN / BIT;                                                 \
+    for (i = 0; i < (oprsz / 16) * m; i++) {                           \
+        j = i < m ? 0 : 1;                                             \
+        uint64_t k  = ((uint8_t)Vd->E(i)) % (2 * m);                   \
+        temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \
+    }                                                                  \
+    *Vd = temp;                                                        \
+}
+
+VSHUF(vshuf_h, 16, H)
+VSHUF(vshuf_w, 32, W)
+VSHUF(vshuf_d, 64, D)
+
+#define VSHUF4I(NAME, BIT, E)                                               \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)          \
+{                                                                           \
+    int i, j, max;                                                          \
+    VReg temp = {};                                                         \
+    VReg *Vd = (VReg *)vd;                                                  \
+    VReg *Vj = (VReg *)vj;                                                  \
+    int oprsz = simd_oprsz(desc);                                           \
+                                                                            \
+    max = LSX_LEN / BIT;                                                    \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                               \
+        j = i < max ? 1 : 2;                                                \
+        temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \
+    }                                                                       \
+    *Vd = temp;                                                             \
+}
+
+VSHUF4I(vshuf4i_b, 8, B)
+VSHUF4I(vshuf4i_h, 16, H)
+VSHUF4I(vshuf4i_w, 32, W)
+
+void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i);
+        temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i);
+    }
+    *Vd = temp;
+}
+
+void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+    int i, m;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    VReg *Vk = (VReg *)vk;
+
+    m = LASX_LEN / 32;
+    for (i = 0; i < m ; i++) {
+        uint64_t k = (uint8_t)Vk->W(i) % 8;
+        temp.W(i) = Vj->W(k);
+    }
+    *Vd = temp;
+}
+
+void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+    int oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz / 16; i++) {
+        temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i);
+        temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i);
+        temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i);
+        temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i);
+    }
+    *Vd = temp;
+}
+
+void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    VReg temp = {};
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    temp.D(0) = Vj->D(imm & 0x3);
+    temp.D(1) = Vj->D((imm >> 2) & 0x3);
+    temp.D(2) = Vj->D((imm >> 4) & 0x3);
+    temp.D(3) = Vj->D((imm >> 6) & 0x3);
+    *Vd = temp;
+}
+
+void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+    int i;
+    VReg temp;
+    VReg *Vd = (VReg *)vd;
+    VReg *Vj = (VReg *)vj;
+
+    for (i = 0; i < 2; i++, imm >>= 4) {
+        temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1);
+    }
+    *Vd = temp;
+}
+
+#define VEXTRINS(NAME, BIT, E, MASK)                               \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i, ins, extr, max;                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    max = LSX_LEN / BIT;                                           \
+    ins = (imm >> 4) & MASK;                                       \
+    extr = imm & MASK;                                             \
+    for (i = 0; i < oprsz / 16; i++) {                             \
+        Vd->E(ins + i * max) = Vj->E(extr + i * max);              \
+    }                                                              \
+}
+
+VEXTRINS(vextrins_b, 8, B, 0xf)
+VEXTRINS(vextrins_h, 16, H, 0x7)
+VEXTRINS(vextrins_w, 32, W, 0x3)
+VEXTRINS(vextrins_d, 64, D, 0x1)
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 15b3701b8f..9e224fe796 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -25,7 +25,6 @@
 #include "tcg/tcg-op.h"
 #include "qemu/log.h"
 #include "qemu/qemu-print.h"
-#include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 
 #include "exec/helper-proto.h"
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 7e7f837c63..d02c16296a 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -24,7 +24,6 @@
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
 #include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
 #include "qemu/qemu-print.h"
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index 03185d9aa0..c0c389c59a 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -1045,7 +1045,7 @@ static void mvp_init(CPUMIPSState *env)
         return;
     }
 
-    /* MVPConf1 implemented, TLB sharable, no gating storage support,
+    /* MVPConf1 implemented, TLB shareable, no gating storage support,
        programmable cache partitioning implemented, number of allocatable
        and shareable TLB entries, MVP has allocatable TCs, 2 VPEs
        implemented, 5 TCs implemented. */
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index f81bd06f5e..6d6af1f2a8 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1224,8 +1224,8 @@ void mips_cpu_list(void);
 
 #define cpu_list mips_cpu_list
 
-extern void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env);
-extern uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env);
+void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env);
+uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env);
 
 /*
  * MMU modes definitions. We carefully match the indices with our
diff --git a/target/mips/tcg/fpu_helper.c b/target/mips/tcg/fpu_helper.c
index 8ce56ed7c8..45d593de48 100644
--- a/target/mips/tcg/fpu_helper.c
+++ b/target/mips/tcg/fpu_helper.c
@@ -25,7 +25,6 @@
 #include "internal.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "fpu/softfloat.h"
 #include "fpu_helper.h"
 
diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
index c1a8380e34..97056d00a2 100644
--- a/target/mips/tcg/ldst_helper.c
+++ b/target/mips/tcg/ldst_helper.c
@@ -24,6 +24,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/memop.h"
 #include "internal.h"
 
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
index 29b31d70fe..c314a74397 100644
--- a/target/mips/tcg/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -22,6 +22,7 @@
 #include "internal.h"
 #include "tcg/tcg.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "exec/memop.h"
 #include "fpu/softfloat.h"
@@ -802,9 +803,9 @@ void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
- * | HADD_U.H      | Vector Unigned Horizontal Add (halfword)                 |
- * | HADD_U.W      | Vector Unigned Horizontal Add (word)                     |
- * | HADD_U.D      | Vector Unigned Horizontal Add (doubleword)               |
+ * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
+ * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
+ * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
  * +---------------+----------------------------------------------------------+
  */
 
@@ -3451,9 +3452,9 @@ void helper_msa_mulv_d(CPUMIPSState *env,
  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
- * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
- * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
- * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
+ * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
+ * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
+ * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index e662acd5df..cfcd8ac9bc 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -2977,14 +2977,14 @@ static void gen_mxu_Q8ADD(DisasContext *ctx)
  *    to another one in XRc, with zero extending
  *    to 16-bit and put results as packed 16-bit data
  *    into XRa and XRd.
- *    aptn2 manages action add or subract of pairs of data.
+ *    aptn2 manages action add or subtract of pairs of data.
  *
  *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
  *    Add/subtract quadruple of 8-bit packed in XRb
  *    to another one in XRc, with zero extending
  *    to 16-bit and accumulate results as packed 16-bit data
  *    into XRa and XRd.
- *    aptn2 manages action add or subract of pairs of data.
+ *    aptn2 manages action add or subtract of pairs of data.
  */
 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
 {
@@ -4056,7 +4056,7 @@ static void gen_mxu_s32sfl(DisasContext *ctx)
 
 /*
  *  Q8SAD XRa, XRd, XRb, XRc
- *    Typical SAD opration for motion estimation.
+ *    Typical SAD operation for motion estimation.
  */
 static void gen_mxu_q8sad(DisasContext *ctx)
 {
diff --git a/target/mips/tcg/sysemu/lcsr_helper.c b/target/mips/tcg/sysemu/lcsr_helper.c
index 942143d209..25e03572fe 100644
--- a/target/mips/tcg/sysemu/lcsr_helper.c
+++ b/target/mips/tcg/sysemu/lcsr_helper.c
@@ -7,13 +7,8 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
-#include "internal.h"
-#include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 
 #define GET_MEMTXATTRS(cas) \
         ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index 0aaf33ffc2..5017457c5e 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -22,7 +22,6 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
-#include "qemu/main-loop.h"
 
 void helper_raise_exception(CPUNios2State *env, uint32_t index)
 {
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 4264c7ec6b..dfc546d3bb 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -29,7 +29,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/log.h"
-#include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 #include "qemu/qemu-print.h"
 #include "semihosting/semihost.h"
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index 7c6f80daf1..d65758449f 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -26,7 +26,6 @@
 #include "qemu/log.h"
 #include "qemu/bitops.h"
 #include "qemu/qemu-print.h"
-#include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 
 #include "exec/helper-proto.h"
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index 7949a24f5a..ebef2cccec 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -229,6 +229,25 @@ int ppc_set_compat_all(uint32_t compat_pvr, Error **errp)
     return 0;
 }
 
+/* To be used when the machine is not running */
+int ppc_init_compat_all(uint32_t compat_pvr, Error **errp)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int ret;
+
+        ret = ppc_set_compat(cpu, compat_pvr, errp);
+
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 int ppc_compat_max_vthreads(PowerPCCPU *cpu)
 {
     const CompatInfo *compat = compat_by_pvr(cpu->compat_pvr);
diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h
index 572b5e553a..0229ef3a9a 100644
--- a/target/ppc/cpu-models.h
+++ b/target/ppc/cpu-models.h
@@ -44,7 +44,7 @@ enum {
     /* PowerPC 405 cores */
     CPU_POWERPC_405D2              = 0x20010000,
     CPU_POWERPC_405D4              = 0x41810000,
-    /* PowerPC 405 microcontrolers */
+    /* PowerPC 405 microcontrollers */
     /* XXX: missing 0x200108a0 */
     CPU_POWERPC_405CRa             = 0x40110041,
     CPU_POWERPC_405CRb             = 0x401100C5,
@@ -74,7 +74,7 @@ enum {
 #define CPU_POWERPC_440              CPU_POWERPC_440GXf
     /* PowerPC 440 cores */
     CPU_POWERPC_440_XILINX         = 0x7ff21910,
-    /* PowerPC 440 microcontrolers */
+    /* PowerPC 440 microcontrollers */
     CPU_POWERPC_440EPa             = 0x42221850,
     CPU_POWERPC_440EPb             = 0x422218D3,
     CPU_POWERPC_440GPb             = 0x40120440,
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index 424f2e1741..e3ad8e0c27 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -59,6 +59,7 @@ void ppc_store_vscr(CPUPPCState *env, uint32_t vscr)
     env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
     env->vscr_sat.u64[1] = 0;
     set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+    set_flush_inputs_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
 }
 
 uint32_t ppc_get_vscr(CPUPPCState *env)
@@ -102,6 +103,92 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
 
     ppc_maybe_interrupt(env);
 }
+
+#if defined(TARGET_PPC64)
+void ppc_update_ciabr(CPUPPCState *env)
+{
+    CPUState *cs = env_cpu(env);
+    target_ulong ciabr = env->spr[SPR_CIABR];
+    target_ulong ciea, priv;
+
+    ciea = ciabr & PPC_BITMASK(0, 61);
+    priv = ciabr & PPC_BITMASK(62, 63);
+
+    if (env->ciabr_breakpoint) {
+        cpu_breakpoint_remove_by_ref(cs, env->ciabr_breakpoint);
+        env->ciabr_breakpoint = NULL;
+    }
+
+    if (priv) {
+        cpu_breakpoint_insert(cs, ciea, BP_CPU, &env->ciabr_breakpoint);
+    }
+}
+
+void ppc_store_ciabr(CPUPPCState *env, target_ulong val)
+{
+    env->spr[SPR_CIABR] = val;
+    ppc_update_ciabr(env);
+}
+
+void ppc_update_daw0(CPUPPCState *env)
+{
+    CPUState *cs = env_cpu(env);
+    target_ulong deaw = env->spr[SPR_DAWR0] & PPC_BITMASK(0, 60);
+    uint32_t dawrx = env->spr[SPR_DAWRX0];
+    int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48);
+    bool dw = extract32(dawrx, PPC_BIT_NR(57), 1);
+    bool dr = extract32(dawrx, PPC_BIT_NR(58), 1);
+    bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
+    bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
+    bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
+    vaddr len;
+    int flags;
+
+    if (env->dawr0_watchpoint) {
+        cpu_watchpoint_remove_by_ref(cs, env->dawr0_watchpoint);
+        env->dawr0_watchpoint = NULL;
+    }
+
+    if (!dr && !dw) {
+        return;
+    }
+
+    if (!hv && !sv && !pr) {
+        return;
+    }
+
+    len = (mrd + 1) * 8;
+    flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
+    if (dr) {
+        flags |= BP_MEM_READ;
+    }
+    if (dw) {
+        flags |= BP_MEM_WRITE;
+    }
+
+    cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr0_watchpoint);
+}
+
+void ppc_store_dawr0(CPUPPCState *env, target_ulong val)
+{
+    env->spr[SPR_DAWR0] = val;
+    ppc_update_daw0(env);
+}
+
+void ppc_store_dawrx0(CPUPPCState *env, uint32_t val)
+{
+    int hrammc = extract32(val, PPC_BIT_NR(56), 1);
+
+    if (hrammc) {
+        /* This might be done with a second watchpoint at the xor of DEAW[0] */
+        qemu_log_mask(LOG_UNIMP, "%s: DAWRX0[HRAMMC] is unimplemented\n",
+                      __func__);
+    }
+
+    env->spr[SPR_DAWRX0] = val;
+    ppc_update_daw0(env);
+}
+#endif
 #endif
 
 static inline void fpscr_set_rounding_mode(CPUPPCState *env)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 25fac9577a..d703a5f3c6 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -428,7 +428,7 @@ FIELD(MSR, LE, MSR_LE, 1)
 
 /* PMU bits */
 #define MMCR0_FC     PPC_BIT(32)         /* Freeze Counters  */
-#define MMCR0_PMAO   PPC_BIT(56)         /* Perf Monitor Alert Ocurred */
+#define MMCR0_PMAO   PPC_BIT(56)         /* Perf Monitor Alert Occurred */
 #define MMCR0_PMAE   PPC_BIT(37)         /* Perf Monitor Alert Enable */
 #define MMCR0_EBE    PPC_BIT(43)         /* Perf Monitor EBB Enable */
 #define MMCR0_FCECE  PPC_BIT(38)         /* FC on Enabled Cond or Event */
@@ -1121,7 +1121,9 @@ struct CPUArchState {
     target_ulong reserve_addr;   /* Reservation address */
     target_ulong reserve_length; /* Reservation larx op size (bytes) */
     target_ulong reserve_val;    /* Reservation value */
+#if defined(TARGET_PPC64)
     target_ulong reserve_val2;
+#endif
 
     /* These are used in supervisor mode only */
     target_ulong msr;      /* machine state register */
@@ -1137,6 +1139,8 @@ struct CPUArchState {
     /* MMU context, only relevant for full system emulation */
 #if defined(TARGET_PPC64)
     ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
+    struct CPUBreakpoint *ciabr_breakpoint;
+    struct CPUWatchpoint *dawr0_watchpoint;
 #endif
     target_ulong sr[32];   /* segment registers */
     uint32_t nb_BATs;      /* number of BATs */
@@ -1403,6 +1407,11 @@ void ppc_translate_init(void);
 #if !defined(CONFIG_USER_ONLY)
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
 void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
+void ppc_update_ciabr(CPUPPCState *env);
+void ppc_store_ciabr(CPUPPCState *env, target_ulong value);
+void ppc_update_daw0(CPUPPCState *env);
+void ppc_store_dawr0(CPUPPCState *env, target_ulong value);
+void ppc_store_dawrx0(CPUPPCState *env, uint32_t value);
 #endif /* !defined(CONFIG_USER_ONLY) */
 void ppc_store_msr(CPUPPCState *env, target_ulong value);
 
@@ -1495,6 +1504,7 @@ int ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp);
 
 #if !defined(CONFIG_USER_ONLY)
 int ppc_set_compat_all(uint32_t compat_pvr, Error **errp);
+int ppc_init_compat_all(uint32_t compat_pvr, Error **errp);
 #endif
 int ppc_compat_max_vthreads(PowerPCCPU *cpu);
 void ppc_compat_add_property(Object *obj, const char *name,
@@ -1897,7 +1907,9 @@ void ppc_compat_add_property(Object *obj, const char *name,
 #define SPR_PSSCR             (0x357)
 #define SPR_440_INV0          (0x370)
 #define SPR_440_INV1          (0x371)
+#define SPR_TRIG1             (0x371)
 #define SPR_440_INV2          (0x372)
+#define SPR_TRIG2             (0x372)
 #define SPR_440_INV3          (0x373)
 #define SPR_440_ITV0          (0x374)
 #define SPR_440_ITV1          (0x375)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 02b7aad9b0..c62bf0e437 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5117,17 +5117,17 @@ static void register_book3s_207_dbg_sprs(CPUPPCState *env)
     spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0",
                         SPR_NOACCESS, SPR_NOACCESS,
                         SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic,
+                        &spr_read_generic, &spr_write_dawr0,
                         KVM_REG_PPC_DAWR, 0x00000000);
     spr_register_kvm_hv(env, SPR_DAWRX0, "DAWRX0",
                         SPR_NOACCESS, SPR_NOACCESS,
                         SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic32,
+                        &spr_read_generic, &spr_write_dawrx0,
                         KVM_REG_PPC_DAWRX, 0x00000000);
     spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
                         SPR_NOACCESS, SPR_NOACCESS,
                         SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic,
+                        &spr_read_generic, &spr_write_ciabr,
                         KVM_REG_PPC_CIABR, 0x00000000);
 }
 
@@ -5347,7 +5347,7 @@ static void register_970_lpar_sprs(CPUPPCState *env)
 static void register_power5p_lpar_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
-    /* Logical partitionning */
+    /* Logical partitioning */
     spr_register_kvm_hv(env, SPR_LPCR, "LPCR",
                         SPR_NOACCESS, SPR_NOACCESS,
                         SPR_NOACCESS, SPR_NOACCESS,
@@ -5660,6 +5660,16 @@ static void register_power_common_book4_sprs(CPUPPCState *env)
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_tfmr, &spr_write_tfmr,
                  0x00000000);
+    spr_register_hv(env, SPR_TRIG1, "TRIG1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_access_nop, &spr_write_generic,
+                 &spr_access_nop, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_TRIG2, "TRIG2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_access_nop, &spr_write_generic,
+                 &spr_access_nop, &spr_write_generic,
+                 0x00000000);
 #endif
 }
 
@@ -5750,7 +5760,7 @@ static void register_power9_mmu_sprs(CPUPPCState *env)
 static void register_power10_hash_sprs(CPUPPCState *env)
 {
     /*
-     * it's the OS responsability to generate a random value for the registers
+     * it's the OS responsibility to generate a random value for the registers
      * in each process' context. So, initialize it with 0 here.
      */
     uint64_t hashkeyr_initial_value = 0, hashpkeyr_initial_value = 0;
@@ -7149,6 +7159,8 @@ static void ppc_cpu_reset_hold(Object *obj)
     env->nip = env->hreset_vector | env->excp_prefix;
 
     if (tcg_enabled()) {
+        cpu_breakpoint_remove_all(s, BP_CPU);
+        cpu_watchpoint_remove_all(s, BP_CPU);
         if (env->mmu_model != POWERPC_MMU_REAL) {
             ppc_tlb_invalidate_all(env);
         }
@@ -7336,6 +7348,9 @@ static const struct TCGCPUOps ppc_tcg_ops = {
   .cpu_exec_exit = ppc_cpu_exec_exit,
   .do_unaligned_access = ppc_cpu_do_unaligned_access,
   .do_transaction_failed = ppc_cpu_do_transaction_failed,
+  .debug_excp_handler = ppc_cpu_debug_excp_handler,
+  .debug_check_breakpoint = ppc_cpu_debug_check_breakpoint,
+  .debug_check_watchpoint = ppc_cpu_debug_check_watchpoint,
 #endif /* !CONFIG_USER_ONLY */
 };
 #endif /* CONFIG_TCG */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 9aa8e46566..99099cb1f6 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -455,7 +455,7 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing ME unless
-     * explicitly overriden.
+     * explicitly overridden.
      */
     new_msr = env->msr & (((target_ulong)1 << MSR_ME));
 
@@ -578,7 +578,7 @@ static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing ME unless
-     * explicitly overriden
+     * explicitly overridden
      */
     new_msr = env->msr & ((target_ulong)1 << MSR_ME);
 
@@ -739,7 +739,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing ME unless
-     * explicitly overriden
+     * explicitly overridden
      */
     new_msr = env->msr & ((target_ulong)1 << MSR_ME);
 
@@ -843,6 +843,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
             PPCVirtualHypervisorClass *vhc =
                 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
             vhc->hypercall(cpu->vhyp, cpu);
+            powerpc_reset_excp_state(cpu);
             return;
         }
 
@@ -910,7 +911,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing ME unless
-     * explicitly overriden
+     * explicitly overridden
      */
     new_msr = env->msr & ((target_ulong)1 << MSR_ME);
 
@@ -1014,6 +1015,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
             PPCVirtualHypervisorClass *vhc =
                 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
             vhc->hypercall(cpu->vhyp, cpu);
+            powerpc_reset_excp_state(cpu);
             return;
         }
 
@@ -1073,7 +1075,7 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing ME unless
-     * explicitly overriden
+     * explicitly overridden
      */
     new_msr = env->msr & ((target_ulong)1 << MSR_ME);
 
@@ -1286,7 +1288,7 @@ static bool books_vhyp_handles_hcall(PowerPCCPU *cpu)
 /*
  * When running a nested KVM HV guest under vhyp, HV exceptions are not
  * delivered to the guest (because there is no concept of HV support), but
- * rather they are sent tothe vhyp to exit from the L2 back to the L1 and
+ * rather they are sent to the vhyp to exit from the L2 back to the L1 and
  * return from the H_ENTER_NESTED hypercall.
  */
 static bool books_vhyp_handles_hv_excp(PowerPCCPU *cpu)
@@ -1375,7 +1377,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 
     /*
      * new interrupt handler msr preserves existing HV and ME unless
-     * explicitly overriden
+     * explicitly overridden
      */
     new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
 
@@ -1526,6 +1528,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
             PPCVirtualHypervisorClass *vhc =
                 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
             vhc->hypercall(cpu->vhyp, cpu);
+            powerpc_reset_excp_state(cpu);
             return;
         }
         if (env->insns_flags2 & PPC2_ISA310) {
@@ -1571,9 +1574,11 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
             }
         }
         break;
+    case POWERPC_EXCP_TRACE:     /* Trace exception                          */
+        msr |= env->error_code;
+        /* fall through */
     case POWERPC_EXCP_DSEG:      /* Data segment exception                   */
     case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
-    case POWERPC_EXCP_TRACE:     /* Trace exception                          */
     case POWERPC_EXCP_SDOOR:     /* Doorbell interrupt                       */
     case POWERPC_EXCP_PERFM:     /* Performance monitor interrupt            */
         break;
@@ -3168,6 +3173,18 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
 }
 #endif /* TARGET_PPC64 */
 
+/* Single-step tracing */
+void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip)
+{
+    uint32_t error_code = 0;
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */
+        env->spr[SPR_POWER_SIAR] = prev_ip;
+        error_code = PPC_BIT(33);
+    }
+    raise_exception_err(env, POWERPC_EXCP_TRACE, error_code);
+}
+
 void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
                                  MMUAccessType access_type,
                                  int mmu_idx, uintptr_t retaddr)
@@ -3243,5 +3260,97 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
     cs->exception_index = POWERPC_EXCP_MCHECK;
     cpu_loop_exit_restore(cs, retaddr);
 }
+
+void ppc_cpu_debug_excp_handler(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cs->env_ptr;
+
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        if (cs->watchpoint_hit) {
+            if (cs->watchpoint_hit->flags & BP_CPU) {
+                env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr;
+                env->spr[SPR_DSISR] = PPC_BIT(41);
+                cs->watchpoint_hit = NULL;
+                raise_exception(env, POWERPC_EXCP_DSI);
+            }
+            cs->watchpoint_hit = NULL;
+        } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) {
+            raise_exception_err(env, POWERPC_EXCP_TRACE,
+                                PPC_BIT(33) | PPC_BIT(43));
+        }
+    }
+#endif
+}
+
+bool ppc_cpu_debug_check_breakpoint(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cs->env_ptr;
+
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        target_ulong priv;
+
+        priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63);
+        switch (priv) {
+        case 0x1: /* problem */
+            return env->msr & ((target_ulong)1 << MSR_PR);
+        case 0x2: /* supervisor */
+            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+                    !(env->msr & ((target_ulong)1 << MSR_HV)));
+        case 0x3: /* hypervisor */
+            return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+                     (env->msr & ((target_ulong)1 << MSR_HV)));
+        default:
+            g_assert_not_reached();
+        }
+    }
+#endif
+
+    return false;
+}
+
+bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
+{
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = cs->env_ptr;
+
+    if (env->insns_flags2 & PPC2_ISA207S) {
+        if (wp == env->dawr0_watchpoint) {
+            uint32_t dawrx = env->spr[SPR_DAWRX0];
+            bool wt = extract32(dawrx, PPC_BIT_NR(59), 1);
+            bool wti = extract32(dawrx, PPC_BIT_NR(60), 1);
+            bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
+            bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
+            bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
+
+            if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) {
+                return false;
+            } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) {
+                return false;
+            } else if (!sv) {
+                return false;
+            }
+
+            if (!wti) {
+                if (env->msr & ((target_ulong)1 << MSR_DR)) {
+                    if (!wt) {
+                        return false;
+                    }
+                } else {
+                    if (wt) {
+                        return false;
+                    }
+                }
+            }
+
+            return true;
+        }
+    }
+#endif
+
+    return false;
+}
+
 #endif /* CONFIG_TCG */
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index abec6fe341..86f97ee1e7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -25,6 +25,9 @@ DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(rfebb, void, env, tl)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
+DEF_HELPER_2(store_ciabr, void, env, tl)
+DEF_HELPER_2(store_dawr0, void, env, tl)
+DEF_HELPER_2(store_dawrx0, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
@@ -32,6 +35,7 @@ DEF_HELPER_2(read_pmc, tl, env, i32)
 DEF_HELPER_2(insns_inc, void, env, i32)
 DEF_HELPER_1(handle_pmc5_overflow, void, env)
 #endif
+DEF_HELPER_2(book3s_trace, void, env, tl)
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
 #endif
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 834da80fe3..6fd00684a5 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -21,11 +21,11 @@
 #include "cpu.h"
 #include "internal.h"
 #include "qemu/host-utils.h"
-#include "qemu/main-loop.h"
 #include "qemu/log.h"
 #include "exec/helper-proto.h"
 #include "crypto/aes.h"
 #include "crypto/aes-round.h"
+#include "crypto/clmul.h"
 #include "fpu/softfloat.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
@@ -1425,46 +1425,39 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 #undef VBPERMQ_INDEX
 #undef VBPERMQ_DW
 
-#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
-void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
-{                                                             \
-    int i, j;                                                 \
-    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
-                                                              \
-    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
-        prod[i] = 0;                                          \
-        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
-            if (a->srcfld[i] & (1ull << j)) {                 \
-                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
-            }                                                 \
-        }                                                     \
-    }                                                         \
-                                                              \
-    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
-        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
-    }                                                         \
+/*
+ * There is no carry across the two doublewords, so their order does
+ * not matter.  Nor is there partial overlap between registers.
+ */
+void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb);
+    }
 }
 
-PMSUM(vpmsumb, u8, u16, uint16_t)
-PMSUM(vpmsumh, u16, u32, uint32_t)
-PMSUM(vpmsumw, u32, u64, uint64_t)
+void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb);
+    }
+}
+
+void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32);
+    }
+}
 
 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
-    int i, j;
-    Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
-
-    for (j = 0; j < 64; j++) {
-        for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
-            if (a->VsrD(i) & (1ull << j)) {
-                tmp = int128_make64(b->VsrD(i));
-                tmp = int128_lshift(tmp, j);
-                prod[i] = int128_xor(prod[i], tmp);
-            }
-        }
-    }
-
-    r->s128 = int128_xor(prod[0], prod[1]);
+    Int128 e = clmul_64(a->u64[0], b->u64[0]);
+    Int128 o = clmul_64(a->u64[1], b->u64[1]);
+    r->s128 = int128_xor(e, o);
 }
 
 #if HOST_BIG_ENDIAN
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 57acb3212c..15803bc313 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -301,6 +301,9 @@ void ppc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
                                    MMUAccessType access_type,
                                    int mmu_idx, MemTxAttrs attrs,
                                    MemTxResult response, uintptr_t retaddr);
+void ppc_cpu_debug_excp_handler(CPUState *cs);
+bool ppc_cpu_debug_check_breakpoint(CPUState *cs);
+bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
 #endif
 
 FIELD(GER_MSK, XMSK, 0, 4)
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 7698501743..51112bd367 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1320,7 +1320,7 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
         return 0;
     }
 
-    if (!kvm_enabled() || !cap_interrupt_unset) {
+    if (!cap_interrupt_unset) {
         return 0;
     }
 
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 134b16c625..68cbdffecd 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -7,9 +7,9 @@
 #include "mmu-hash64.h"
 #include "migration/cpu.h"
 #include "qapi/error.h"
-#include "qemu/main-loop.h"
 #include "kvm_ppc.h"
 #include "power8-pmu.h"
+#include "sysemu/replay.h"
 
 static void post_load_update_msr(CPUPPCState *env)
 {
@@ -209,6 +209,14 @@ static int cpu_pre_save(void *opaque)
     /* Used to retain migration compatibility for pre 6.0 for 601 machines. */
     env->hflags_compat_nmsr = 0;
 
+    if (tcg_enabled()) {
+        /*
+         * TCG does not maintain the DECR spr (unlike KVM) so have to save
+         * it here.
+         */
+        env->spr[SPR_DECR] = cpu_ppc_load_decr(env);
+    }
+
     return 0;
 }
 
@@ -314,6 +322,17 @@ static int cpu_post_load(void *opaque, int version_id)
     post_load_update_msr(env);
 
     if (tcg_enabled()) {
+        /* Re-set breaks based on regs */
+#if defined(TARGET_PPC64)
+        ppc_update_ciabr(env);
+        ppc_update_daw0(env);
+#endif
+        /*
+         * TCG needs to re-start the decrementer timer and/or raise the
+         * interrupt. This works for level-triggered decrementer. Edge
+         * triggered types (including HDEC) would need to carry more state.
+         */
+        cpu_ppc_store_decr(env, env->spr[SPR_DECR]);
         pmu_mmcr01_updated(env);
     }
 
@@ -671,6 +690,27 @@ static const VMStateDescription vmstate_compat = {
     }
 };
 
+static bool reservation_needed(void *opaque)
+{
+    return (replay_mode != REPLAY_MODE_NONE);
+}
+
+static const VMStateDescription vmstate_reservation = {
+    .name = "cpu/reservation",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = reservation_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU),
+        VMSTATE_UINTTL(env.reserve_length, PowerPCCPU),
+        VMSTATE_UINTTL(env.reserve_val, PowerPCCPU),
+#if defined(TARGET_PPC64)
+        VMSTATE_UINTTL(env.reserve_val2, PowerPCCPU),
+#endif
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -692,8 +732,7 @@ const VMStateDescription vmstate_ppc_cpu = {
         VMSTATE_UINTTL_ARRAY(env.spr, PowerPCCPU, 1024),
         VMSTATE_UINT64(env.spe_acc, PowerPCCPU),
 
-        /* Reservation */
-        VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU),
+        VMSTATE_UNUSED(sizeof(target_ulong)), /* was env.reserve_addr */
 
         /* Supervisor mode architected state */
         VMSTATE_UINTTL(env.msr, PowerPCCPU),
@@ -722,6 +761,7 @@ const VMStateDescription vmstate_ppc_cpu = {
         &vmstate_tlbemb,
         &vmstate_tlbmas,
         &vmstate_compat,
+        &vmstate_reservation,
         NULL
     }
 };
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 46eae65819..c7535481d6 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -21,7 +21,6 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
-#include "qemu/main-loop.h"
 #include "exec/helper-proto.h"
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 692d058665..a05bdf78c9 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -199,6 +199,21 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value)
     env->spr[SPR_PCR] = value & pcc->pcr_mask;
 }
 
+void helper_store_ciabr(CPUPPCState *env, target_ulong value)
+{
+    ppc_store_ciabr(env, value);
+}
+
+void helper_store_dawr0(CPUPPCState *env, target_ulong value)
+{
+    ppc_store_dawr0(env, value);
+}
+
+void helper_store_dawrx0(CPUPPCState *env, target_ulong value)
+{
+    ppc_store_dawrx0(env, value);
+}
+
 /*
  * DPDES register is shared. Each bit reflects the state of the
  * doorbell interrupt of a thread of the same core.
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 920084bd8f..5823e039e6 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -219,27 +219,25 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
     return false;
 }
 
-static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
-                               uint64_t pte, hwaddr pte_addr, int *prot)
+static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte)
 {
-    CPUState *cs = CPU(cpu);
-    uint64_t npte;
+    switch (access_type) {
+    case MMU_DATA_STORE:
+        if (!(pte & R_PTE_C)) {
+            break;
+        }
+        /* fall through */
+    case MMU_INST_FETCH:
+    case MMU_DATA_LOAD:
+        if (!(pte & R_PTE_R)) {
+            break;
+        }
 
-    npte = pte | R_PTE_R; /* Always set reference bit */
-
-    if (access_type == MMU_DATA_STORE) { /* Store/Write */
-        npte |= R_PTE_C; /* Set change bit */
-    } else {
-        /*
-         * Treat the page as read-only for now, so that a later write
-         * will pass through this function again to set the C bit.
-         */
-        *prot &= ~PAGE_WRITE;
+        /* R/C bits are already set appropriately for this access */
+        return 0;
     }
 
-    if (pte ^ npte) { /* If pte has changed then write it back */
-        stq_phys(cs->as, pte_addr, npte);
-    }
+    return 1;
 }
 
 static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
@@ -380,7 +378,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
                                               ppc_v3_pate_t pate,
                                               hwaddr *h_raddr, int *h_prot,
                                               int *h_page_size, bool pde_addr,
-                                              int mmu_idx, bool guest_visible)
+                                              int mmu_idx, uint64_t lpid,
+                                              bool guest_visible)
 {
     MMUAccessType access_type = orig_access_type;
     int fault_cause = 0;
@@ -418,7 +417,24 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
+        if (ppc_radix64_check_rc(access_type, pte)) {
+            /*
+             * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during
+             * partition-scoped translation when effLPID = 0 results in normal
+             * (non-Hypervisor) Data and Instruction Storage Interrupts
+             * respectively.
+             *
+             * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware
+             * seem to exhibit the same behavior.
+             */
+            if (lpid > 0) {
+                ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr,
+                                      DSISR_ATOMIC_RC);
+            } else {
+                ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
+            }
+            return 1;
+        }
     }
 
     return 0;
@@ -447,7 +463,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
                                             vaddr eaddr, uint64_t pid,
                                             ppc_v3_pate_t pate, hwaddr *g_raddr,
                                             int *g_prot, int *g_page_size,
-                                            int mmu_idx, bool guest_visible)
+                                            int mmu_idx, uint64_t lpid,
+                                            bool guest_visible)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -497,7 +514,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
         ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
                                                  prtbe_addr, pate, &h_raddr,
                                                  &h_prot, &h_page_size, true,
-                                                 5, guest_visible);
+                                                 5, lpid, guest_visible);
         if (ret) {
             return ret;
         }
@@ -539,7 +556,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
             ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
                                                      pte_addr, pate, &h_raddr,
                                                      &h_prot, &h_page_size,
-                                                     true, 5, guest_visible);
+                                                     true, 5, lpid,
+                                                     guest_visible);
             if (ret) {
                 return ret;
             }
@@ -580,7 +598,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot);
+        /* R/C bits not appropriately set for access */
+        if (ppc_radix64_check_rc(access_type, pte)) {
+            ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
+            return 1;
+        }
     }
 
     return 0;
@@ -695,7 +717,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
     if (relocation) {
         int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
                                                    pate, &g_raddr, &prot,
-                                                   &psize, mmu_idx, guest_visible);
+                                                   &psize, mmu_idx, lpid,
+                                                   guest_visible);
         if (ret) {
             return false;
         }
@@ -719,7 +742,8 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
             ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
                                                      g_raddr, pate, raddr,
                                                      &prot, &psize, false,
-                                                     mmu_idx, guest_visible);
+                                                     mmu_idx, lpid,
+                                                     guest_visible);
             if (ret) {
                 return false;
             }
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 8c000e250d..6ca5d12207 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -28,7 +28,6 @@
 #include "exec/log.h"
 #include "helper_regs.h"
 #include "qemu/error-report.h"
-#include "qemu/main-loop.h"
 #include "qemu/qemu-print.h"
 #include "internal.h"
 #include "mmu-book3s-v3.h"
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index d3ea7588f9..f87d35379a 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -28,7 +28,6 @@
 #include "exec/log.h"
 #include "helper_regs.h"
 #include "qemu/error-report.h"
-#include "qemu/main-loop.h"
 #include "qemu/qemu-print.h"
 #include "internal.h"
 #include "mmu-book3s-v3.h"
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index c82feedaff..75513db894 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -16,7 +16,7 @@
  * Checks whether the Group A SPR (MMCR0, MMCR2, MMCRA, and the
  * PMCs) has problem state read access.
  *
- * Read acccess is granted for all PMCC values but 0b01, where a
+ * Read access is granted for all PMCC values but 0b01, where a
  * Facility Unavailable Interrupt will occur.
  */
 static bool spr_groupA_read_allowed(DisasContext *ctx)
@@ -33,7 +33,7 @@ static bool spr_groupA_read_allowed(DisasContext *ctx)
  * Checks whether the Group A SPR (MMCR0, MMCR2, MMCRA, and the
  * PMCs) has problem state write access.
  *
- * Write acccess is granted for PMCC values 0b10 and 0b11. Userspace
+ * Write access is granted for PMCC values 0b10 and 0b11. Userspace
  * writing with PMCC 0b00 will generate a Hypervisor Emulation
  * Assistance Interrupt. Userspace writing with PMCC 0b01 will
  * generate a Facility Unavailable Interrupt.
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 7bb4bf81f7..cbc5889d91 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -16,7 +16,7 @@
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "qemu/error-report.h"
-#include "qemu/main-loop.h"
+#include "qemu/timer.h"
 #include "hw/ppc/ppc.h"
 #include "power8-pmu.h"
 
diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h
index 5995070eaf..8a9d6cd994 100644
--- a/target/ppc/spr_common.h
+++ b/target/ppc/spr_common.h
@@ -159,6 +159,9 @@ void spr_read_mas73(DisasContext *ctx, int gprn, int sprn);
 #ifdef TARGET_PPC64
 void spr_read_cfar(DisasContext *ctx, int gprn, int sprn);
 void spr_write_cfar(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn);
 void spr_write_ureg(DisasContext *ctx, int sprn, int gprn);
 void spr_read_purr(DisasContext *ctx, int gprn, int sprn);
 void spr_write_purr(DisasContext *ctx, int sprn, int gprn);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 74796ec7ba..5c28afbbb8 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -26,8 +26,6 @@
 #include "tcg/tcg-op.h"
 #include "tcg/tcg-op-gvec.h"
 #include "qemu/host-utils.h"
-#include "qemu/main-loop.h"
-#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
@@ -77,7 +75,9 @@ static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
 static TCGv cpu_reserve_length;
 static TCGv cpu_reserve_val;
+#if defined(TARGET_PPC64)
 static TCGv cpu_reserve_val2;
+#endif
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
 
@@ -151,9 +151,11 @@ void ppc_translate_init(void)
     cpu_reserve_val = tcg_global_mem_new(cpu_env,
                                          offsetof(CPUPPCState, reserve_val),
                                          "reserve_val");
+#if defined(TARGET_PPC64)
     cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
                                           offsetof(CPUPPCState, reserve_val2),
                                           "reserve_val2");
+#endif
 
     cpu_fpscr = tcg_global_mem_new(cpu_env,
                                    offsetof(CPUPPCState, fpscr), "fpscr");
@@ -338,8 +340,9 @@ static void gen_ppc_maybe_interrupt(DisasContext *ctx)
  * The exception can be either POWERPC_EXCP_TRACE (on most PowerPCs) or
  * POWERPC_EXCP_DEBUG (on BookE).
  */
-static uint32_t gen_prep_dbgex(DisasContext *ctx)
+static void gen_debug_exception(DisasContext *ctx, bool rfi_type)
 {
+#if !defined(CONFIG_USER_ONLY)
     if (ctx->flags & POWERPC_FLAG_DE) {
         target_ulong dbsr = 0;
         if (ctx->singlestep_enabled & CPU_SINGLE_STEP) {
@@ -352,16 +355,18 @@ static uint32_t gen_prep_dbgex(DisasContext *ctx)
         gen_load_spr(t0, SPR_BOOKE_DBSR);
         tcg_gen_ori_tl(t0, t0, dbsr);
         gen_store_spr(SPR_BOOKE_DBSR, t0);
-        return POWERPC_EXCP_DEBUG;
+        gen_helper_raise_exception(cpu_env,
+                                   tcg_constant_i32(POWERPC_EXCP_DEBUG));
+        ctx->base.is_jmp = DISAS_NORETURN;
     } else {
-        return POWERPC_EXCP_TRACE;
+        if (!rfi_type) { /* BookS does not single step rfi type instructions */
+            TCGv t0 = tcg_temp_new();
+            tcg_gen_movi_tl(t0, ctx->cia);
+            gen_helper_book3s_trace(cpu_env, t0);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        }
     }
-}
-
-static void gen_debug_exception(DisasContext *ctx)
-{
-    gen_helper_raise_exception(cpu_env, tcg_constant_i32(gen_prep_dbgex(ctx)));
-    ctx->base.is_jmp = DISAS_NORETURN;
+#endif
 }
 
 static inline void gen_inval_exception(DisasContext *ctx, uint32_t error)
@@ -556,8 +561,9 @@ void spr_write_lr(DisasContext *ctx, int sprn, int gprn)
     tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
 }
 
-/* CFAR */
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+/* Debug facilities */
+/* CFAR */
 void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
 {
     tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
@@ -567,6 +573,26 @@ void spr_write_cfar(DisasContext *ctx, int sprn, int gprn)
 {
     tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
 }
+
+/* Breakpoint */
+void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_store_ciabr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* Watchpoint */
+void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_store_dawr0(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn)
+{
+    translator_io_start(&ctx->base);
+    gen_helper_store_dawrx0(cpu_env, cpu_gpr[gprn]);
+}
 #endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
 
 /* CTR */
@@ -4184,7 +4210,7 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 static void gen_lookup_and_goto_ptr(DisasContext *ctx)
 {
     if (unlikely(ctx->singlestep_enabled)) {
-        gen_debug_exception(ctx);
+        gen_debug_exception(ctx, false);
     } else {
         /*
          * tcg_gen_lookup_and_goto_ptr will exit the TB if
@@ -7132,7 +7158,7 @@ static int test_opcode_table(opc_handler_t **table, int len)
                 tmp = test_opcode_table(ind_table(table[i]),
                     PPC_CPU_INDIRECT_OPCODES_LEN);
                 if (tmp == 0) {
-                    free(table[i]);
+                    g_free(table[i]);
                     table[i] = &invalid_handler;
                 } else {
                     count++;
@@ -7410,8 +7436,9 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     }
 
     /* Honor single stepping. */
-    if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)
-        && (nip <= 0x100 || nip > 0xf00)) {
+    if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)) {
+        bool rfi_type = false;
+
         switch (is_jmp) {
         case DISAS_TOO_MANY:
         case DISAS_EXIT_UPDATE:
@@ -7420,12 +7447,19 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
             break;
         case DISAS_EXIT:
         case DISAS_CHAIN:
+            /*
+             * This is a heuristic, to put it kindly. The rfi class of
+             * instructions are among the few outside branches that change
+             * NIP without taking an interrupt. Single step trace interrupts
+             * do not fire on completion of these instructions.
+             */
+            rfi_type = true;
             break;
         default:
             g_assert_not_reached();
         }
 
-        gen_debug_exception(ctx);
+        gen_debug_exception(ctx, rfi_type);
         return;
     }
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 4ce02fd3a4..7ff7e1ec46 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -71,7 +71,7 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
 {
 #if defined(TARGET_PPC64)
     TCGv ea;
-    TCGv_i64 low_addr_gpr, high_addr_gpr;
+    TCGv_i64 lo, hi;
     TCGv_i128 t16;
 
     REQUIRE_INSNS_FLAGS(ctx, 64BX);
@@ -94,21 +94,21 @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
     gen_set_access_type(ctx, ACCESS_INT);
     ea = do_ea_calc(ctx, a->ra, tcg_constant_tl(a->si));
 
-    if (prefixed || !ctx->le_mode) {
-        low_addr_gpr = cpu_gpr[a->rt];
-        high_addr_gpr = cpu_gpr[a->rt + 1];
+    if (ctx->le_mode && prefixed) {
+        lo = cpu_gpr[a->rt];
+        hi = cpu_gpr[a->rt + 1];
     } else {
-        low_addr_gpr = cpu_gpr[a->rt + 1];
-        high_addr_gpr = cpu_gpr[a->rt];
+        lo = cpu_gpr[a->rt + 1];
+        hi = cpu_gpr[a->rt];
     }
     t16 = tcg_temp_new_i128();
 
     if (store) {
-        tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr);
+        tcg_gen_concat_i64_i128(t16, lo, hi);
         tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
     } else {
         tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
-        tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16);
+        tcg_gen_extr_i128_i64(lo, hi, t16);
     }
 #else
     qemu_build_not_reached();
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6d7669aabd..5cdf53a9df 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -119,7 +119,7 @@ static void gen_stve##name(DisasContext *ctx)                           \
     }
 
 GEN_VR_LDX(lvx, 0x07, 0x03);
-/* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
+/* As we don't emulate the cache, lvxl is strictly equivalent to lvx */
 GEN_VR_LDX(lvxl, 0x07, 0x0B);
 
 GEN_VR_LVE(bx, 0x07, 0x00, 1);
@@ -127,7 +127,7 @@ GEN_VR_LVE(hx, 0x07, 0x01, 2);
 GEN_VR_LVE(wx, 0x07, 0x02, 4);
 
 GEN_VR_STX(svx, 0x07, 0x07);
-/* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
+/* As we don't emulate the cache, stvxl is strictly equivalent to stvx */
 GEN_VR_STX(svxl, 0x07, 0x0F);
 
 GEN_VR_STVE(bx, 0x07, 0x04, 1);
@@ -1526,7 +1526,7 @@ static void gen_vprtyb_vec(unsigned vece, TCGv_vec t, TCGv_vec b)
 {
     int i;
     TCGv_vec tmp = tcg_temp_new_vec_matching(b);
-    /* MO_32 is 2, so 2 iteractions for MO_32 and 3 for MO_64 */
+    /* MO_32 is 2, so 2 iterations for MO_32 and 3 for MO_64 */
     for (i = 0; i < vece; i++) {
         tcg_gen_shri_vec(vece, tmp, b, (4 << (vece - i)));
         tcg_gen_xor_vec(vece, b, tmp, b);
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 6b93b04453..f227c7664e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -87,7 +87,9 @@ static const struct isa_ext_data isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
     ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
     ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
+    ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl),
     ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
+    ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
     ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
     ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
     ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin),
@@ -119,6 +121,8 @@ static const struct isa_ext_data isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
     ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
     ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
+    ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
+    ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
     ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
     ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
     ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
@@ -126,9 +130,16 @@ static const struct isa_ext_data isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
     ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
     ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
+    ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
+    ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
+    ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
+    ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
+    ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed),
+    ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
     ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
     ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
     ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
+    ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp),
     ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
     ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
     ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
@@ -298,6 +309,17 @@ static uint8_t satp_mode_from_str(const char *satp_mode_str)
 
 uint8_t satp_mode_max_from_map(uint32_t map)
 {
+    /*
+     * 'map = 0' will make us return (31 - 32), which C will
+     * happily overflow to UINT_MAX. There's no good result to
+     * return if 'map = 0' (e.g. returning 0 will be ambiguous
+     * with the result for 'map = 1').
+     *
+     * Assert out if map = 0. Callers will have to deal with
+     * it outside of this function.
+     */
+    g_assert(map > 0);
+
     /* map here has at least one bit set, so no problem with clz */
     return 31 - __builtin_clz(map);
 }
@@ -875,9 +897,9 @@ static void riscv_cpu_reset_hold(Object *obj)
     env->two_stage_lookup = false;
 
     env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) |
-                   (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0);
+                   (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0);
     env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) |
-                   (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0);
+                   (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0);
 
     /* Initialized default priorities of local interrupts. */
     for (i = 0; i < ARRAY_SIZE(env->miprio); i++) {
@@ -904,7 +926,7 @@ static void riscv_cpu_reset_hold(Object *obj)
 
 #ifndef CONFIG_USER_ONLY
     if (cpu->cfg.debug) {
-        riscv_trigger_init(env);
+        riscv_trigger_reset_hold(env);
     }
 
     if (kvm_enabled()) {
@@ -1269,6 +1291,25 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
+    /*
+     * In principle Zve*x would also suffice here, were they supported
+     * in qemu
+     */
+    if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
+         cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) &&
+        !cpu->cfg.ext_zve32f) {
+        error_setg(errp,
+                   "Vector crypto extensions require V or Zve* extensions");
+        return;
+    }
+
+    if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) {
+        error_setg(
+            errp,
+            "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions");
+        return;
+    }
+
     if (cpu->cfg.ext_zk) {
         cpu->cfg.ext_zkn = true;
         cpu->cfg.ext_zkr = true;
@@ -1303,9 +1344,15 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
 static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
 {
     bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32;
-    uint8_t satp_mode_map_max;
-    uint8_t satp_mode_supported_max =
-                        satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
+    uint8_t satp_mode_map_max, satp_mode_supported_max;
+
+    /* The CPU wants the OS to decide which satp mode to use */
+    if (cpu->cfg.satp_mode.supported == 0) {
+        return;
+    }
+
+    satp_mode_supported_max =
+                    satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
 
     if (cpu->cfg.satp_mode.map == 0) {
         if (cpu->cfg.satp_mode.init == 0) {
@@ -1395,6 +1442,11 @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp)
     CPURISCVState *env = &cpu->env;
     Error *local_err = NULL;
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) {
+        error_setg(errp, "'host' CPU is not compatible with TCG acceleration");
+        return;
+    }
+
     riscv_cpu_validate_misa_mxl(cpu, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -1473,6 +1525,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
 
     riscv_cpu_register_gdb_regs_for_features(cs);
 
+#ifndef CONFIG_USER_ONLY
+    if (cpu->cfg.debug) {
+        riscv_trigger_realize(&cpu->env);
+    }
+#endif
+
     qemu_init_vcpu(cs);
     cpu_reset(cs);
 
@@ -1756,6 +1814,7 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
     DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
     DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
+    DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true),
     DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true),
     DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true),
     DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true),
@@ -1816,6 +1875,7 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false),
     DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
     DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
+    DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false),
 
     /* Vendor-specific custom extensions */
     DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
@@ -1832,7 +1892,6 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
 
     /* These are experimental so mark with 'x-' */
-    DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
 
     /* ePMP 0.9.3 */
     DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
@@ -1846,6 +1905,16 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
     DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
 
+    /* Vector cryptography extensions */
+    DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
+    DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
+    DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false),
+    DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
+    DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
+    DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
+    DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false),
+    DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false),
+
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6ea22e0eea..6316cbcc23 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -349,7 +349,7 @@ struct CPUArchState {
     target_ulong upmmask;
     target_ulong upmbase;
 
-    /* CSRs for execution enviornment configuration */
+    /* CSRs for execution environment configuration */
     uint64_t menvcfg;
     uint64_t mstateen[SMSTATEEN_MAX_COUNT];
     uint64_t hstateen[SMSTATEEN_MAX_COUNT];
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 59f0ffd9e1..3d6ffaabc7 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -656,7 +656,7 @@ typedef enum {
 /* Leaf page shift amount */
 #define PGSHIFT             12
 
-/* Default Reset Vector adress */
+/* Default Reset Vector address */
 #define DEFAULT_RSTVEC      0x1000
 
 /* Exception causes */
@@ -740,17 +740,17 @@ typedef enum RISCVException {
 #define PM_CURRENT      0x00000002ULL
 #define PM_INSN         0x00000004ULL
 
-/* Execution enviornment configuration bits */
+/* Execution environment configuration bits */
 #define MENVCFG_FIOM                       BIT(0)
 #define MENVCFG_CBIE                       (3UL << 4)
 #define MENVCFG_CBCFE                      BIT(6)
 #define MENVCFG_CBZE                       BIT(7)
-#define MENVCFG_HADE                       (1ULL << 61)
+#define MENVCFG_ADUE                       (1ULL << 61)
 #define MENVCFG_PBMTE                      (1ULL << 62)
 #define MENVCFG_STCE                       (1ULL << 63)
 
 /* For RV32 */
-#define MENVCFGH_HADE                      BIT(29)
+#define MENVCFGH_ADUE                      BIT(29)
 #define MENVCFGH_PBMTE                     BIT(30)
 #define MENVCFGH_STCE                      BIT(31)
 
@@ -763,12 +763,12 @@ typedef enum RISCVException {
 #define HENVCFG_CBIE                       MENVCFG_CBIE
 #define HENVCFG_CBCFE                      MENVCFG_CBCFE
 #define HENVCFG_CBZE                       MENVCFG_CBZE
-#define HENVCFG_HADE                       MENVCFG_HADE
+#define HENVCFG_ADUE                       MENVCFG_ADUE
 #define HENVCFG_PBMTE                      MENVCFG_PBMTE
 #define HENVCFG_STCE                       MENVCFG_STCE
 
 /* For RV32 */
-#define HENVCFGH_HADE                       MENVCFGH_HADE
+#define HENVCFGH_ADUE                       MENVCFGH_ADUE
 #define HENVCFGH_PBMTE                      MENVCFGH_PBMTE
 #define HENVCFGH_STCE                       MENVCFGH_STCE
 
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index 2bd9510ba3..0e6a0f245c 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -66,6 +66,7 @@ struct RISCVCPUConfig {
     bool ext_icbom;
     bool ext_icboz;
     bool ext_zicond;
+    bool ext_zihintntl;
     bool ext_zihintpause;
     bool ext_smstateen;
     bool ext_sstc;
@@ -85,6 +86,14 @@ struct RISCVCPUConfig {
     bool ext_zve32f;
     bool ext_zve64f;
     bool ext_zve64d;
+    bool ext_zvbb;
+    bool ext_zvbc;
+    bool ext_zvkg;
+    bool ext_zvkned;
+    bool ext_zvknha;
+    bool ext_zvknhb;
+    bool ext_zvksed;
+    bool ext_zvksh;
     bool ext_zmmul;
     bool ext_zvfbfmin;
     bool ext_zvfbfwma;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 9f611d89bb..3a02079290 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -861,11 +861,11 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
     }
 
     bool pbmte = env->menvcfg & MENVCFG_PBMTE;
-    bool hade = env->menvcfg & MENVCFG_HADE;
+    bool adue = env->menvcfg & MENVCFG_ADUE;
 
     if (first_stage && two_stage && env->virt_enabled) {
         pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE);
-        hade = hade && (env->henvcfg & HENVCFG_HADE);
+        adue = adue && (env->henvcfg & HENVCFG_ADUE);
     }
 
     int ptshift = (levels - 1) * ptidxbits;
@@ -1026,7 +1026,7 @@ restart:
 
     /* Page table updates need to be atomic with MTTCG enabled */
     if (updated_pte != pte && !is_debug) {
-        if (!hade) {
+        if (!adue) {
             return TRANSLATE_FAIL;
         }
 
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
index 99d85a6188..bb084e00ef 100644
--- a/target/riscv/crypto_helper.c
+++ b/target/riscv/crypto_helper.c
@@ -25,29 +25,6 @@
 #include "crypto/aes-round.h"
 #include "crypto/sm4.h"
 
-#define AES_XTIME(a) \
-    ((a << 1) ^ ((a & 0x80) ? 0x1b : 0))
-
-#define AES_GFMUL(a, b) (( \
-    (((b) & 0x1) ? (a) : 0) ^ \
-    (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \
-    (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \
-    (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF)
-
-static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd)
-{
-    uint32_t u;
-
-    if (fwd) {
-        u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) |
-            (AES_GFMUL(x, 2) << 0);
-    } else {
-        u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) |
-            (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0);
-    }
-    return u;
-}
-
 #define sext32_xlen(x) (target_ulong)(int32_t)(x)
 
 static inline target_ulong aes32_operation(target_ulong shamt,
@@ -55,23 +32,20 @@ static inline target_ulong aes32_operation(target_ulong shamt,
                                            bool enc, bool mix)
 {
     uint8_t si = rs2 >> shamt;
-    uint8_t so;
     uint32_t mixed;
     target_ulong res;
 
     if (enc) {
-        so = AES_sbox[si];
         if (mix) {
-            mixed = aes_mixcolumn_byte(so, true);
+            mixed = be32_to_cpu(AES_Te0[si]);
         } else {
-            mixed = so;
+            mixed = AES_sbox[si];
         }
     } else {
-        so = AES_isbox[si];
         if (mix) {
-            mixed = aes_mixcolumn_byte(so, false);
+            mixed = be32_to_cpu(AES_Td0[si]);
         } else {
-            mixed = so;
+            mixed = AES_isbox[si];
         }
     }
     mixed = rol32(mixed, shamt);
@@ -174,24 +148,17 @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum)
 
     uint8_t enc_rnum = rnum;
     uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF;
-    uint8_t rcon_ = 0;
-    target_ulong result;
+    AESState t, rc = {};
 
     if (enc_rnum != 0xA) {
         temp = ror32(temp, 8); /* Rotate right by 8 */
-        rcon_ = round_consts[enc_rnum];
+        rc.w[0] = rc.w[1] = round_consts[enc_rnum];
     }
 
-    temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
-           ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
-           ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
-           ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0);
+    t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp;
+    aesenc_SB_SR_AK(&t, &t, &rc, false);
 
-    temp ^= rcon_;
-
-    result = ((uint64_t)temp << 32) | temp;
-
-    return result;
+    return t.d[0];
 }
 
 target_ulong HELPER(aes64im)(target_ulong rs1)
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ea7585329e..85a31dc420 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -23,7 +23,6 @@
 #include "cpu.h"
 #include "pmu.h"
 #include "time_helper.h"
-#include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
 #include "sysemu/cpu-timers.h"
@@ -1685,7 +1684,7 @@ static int rmw_iprio(target_ulong xlen,
 static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
                      target_ulong new_val, target_ulong wr_mask)
 {
-    bool virt;
+    bool virt, isel_reserved;
     uint8_t *iprio;
     int ret = -EINVAL;
     target_ulong priv, isel, vgein;
@@ -1695,6 +1694,7 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
 
     /* Decode register details from CSR number */
     virt = false;
+    isel_reserved = false;
     switch (csrno) {
     case CSR_MIREG:
         iprio = env->miprio;
@@ -1739,11 +1739,13 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
                                                   riscv_cpu_mxl_bits(env)),
                                     val, new_val, wr_mask);
         }
+    } else {
+        isel_reserved = true;
     }
 
 done:
     if (ret) {
-        return (env->virt_enabled && virt) ?
+        return (env->virt_enabled && virt && !isel_reserved) ?
                RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
     }
     return RISCV_EXCP_NONE;
@@ -1834,8 +1836,11 @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno,
 {
     int cidx;
     PMUCTRState *counter;
+    RISCVCPU *cpu = env_archcpu(env);
 
-    env->mcountinhibit = val;
+    /* WARL register - disable unavailable counters; TM bit is always 0 */
+    env->mcountinhibit =
+        val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR);
 
     /* Check if any other counter is also monitoring cycles/instructions */
     for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) {
@@ -1858,7 +1863,11 @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
 static RISCVException write_mcounteren(CPURISCVState *env, int csrno,
                                        target_ulong val)
 {
-    env->mcounteren = val;
+    RISCVCPU *cpu = env_archcpu(env);
+
+    /* WARL register - disable unavailable counters */
+    env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM |
+                             COUNTEREN_IR);
     return RISCV_EXCP_NONE;
 }
 
@@ -1951,7 +1960,7 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno,
     if (riscv_cpu_mxl(env) == MXL_RV64) {
         mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
                 (cfg->ext_sstc ? MENVCFG_STCE : 0) |
-                (cfg->ext_svadu ? MENVCFG_HADE : 0);
+                (cfg->ext_svadu ? MENVCFG_ADUE : 0);
     }
     env->menvcfg = (env->menvcfg & ~mask) | (val & mask);
 
@@ -1971,7 +1980,7 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno,
     const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
     uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
                     (cfg->ext_sstc ? MENVCFG_STCE : 0) |
-                    (cfg->ext_svadu ? MENVCFG_HADE : 0);
+                    (cfg->ext_svadu ? MENVCFG_ADUE : 0);
     uint64_t valh = (uint64_t)val << 32;
 
     env->menvcfg = (env->menvcfg & ~mask) | (valh & mask);
@@ -2023,7 +2032,7 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
      * henvcfg.stce is read_only 0 when menvcfg.stce = 0
      * henvcfg.hade is read_only 0 when menvcfg.hade = 0
      */
-    *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
+    *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
                            env->menvcfg);
     return RISCV_EXCP_NONE;
 }
@@ -2040,7 +2049,7 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno,
     }
 
     if (riscv_cpu_mxl(env) == MXL_RV64) {
-        mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE);
+        mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE);
     }
 
     env->henvcfg = (env->henvcfg & ~mask) | (val & mask);
@@ -2058,7 +2067,7 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno,
         return ret;
     }
 
-    *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
+    *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
                             env->menvcfg)) >> 32;
     return RISCV_EXCP_NONE;
 }
@@ -2067,7 +2076,7 @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno,
                                      target_ulong val)
 {
     uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE |
-                                    HENVCFG_HADE);
+                                    HENVCFG_ADUE);
     uint64_t valh = (uint64_t)val << 32;
     RISCVException ret;
 
@@ -3216,7 +3225,7 @@ static int write_hvipriox(CPURISCVState *env, int first_index,
                RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
     }
 
-    /* Fill-up priority arrary */
+    /* Fill-up priority array */
     for (i = 0; i < num_irqs; i++) {
         if (riscv_cpu_hviprio_index2irq(first_index + i, &irq, &rdzero)) {
             continue;
@@ -3885,7 +3894,7 @@ static inline RISCVException riscv_csrrw_check(CPURISCVState *env,
     if (riscv_has_ext(env, RVH) && env->priv == PRV_S &&
         !env->virt_enabled) {
         /*
-         * We are in HS mode. Add 1 to the effective privledge level to
+         * We are in HS mode. Add 1 to the effective privilege level to
          * allow us to access the Hypervisor CSRs.
          */
         effective_priv++;
@@ -3908,21 +3917,27 @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno,
                                        target_ulong write_mask)
 {
     RISCVException ret;
-    target_ulong old_value;
+    target_ulong old_value = 0;
 
     /* execute combined read/write operation if it exists */
     if (csr_ops[csrno].op) {
         return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask);
     }
 
-    /* if no accessor exists then return failure */
-    if (!csr_ops[csrno].read) {
-        return RISCV_EXCP_ILLEGAL_INST;
-    }
-    /* read old value */
-    ret = csr_ops[csrno].read(env, csrno, &old_value);
-    if (ret != RISCV_EXCP_NONE) {
-        return ret;
+    /*
+     * ret_value == NULL means that rd=x0 and we're coming from helper_csrw()
+     * and we can't throw side effects caused by CSR reads.
+     */
+    if (ret_value) {
+        /* if no accessor exists then return failure */
+        if (!csr_ops[csrno].read) {
+            return RISCV_EXCP_ILLEGAL_INST;
+        }
+        /* read old value */
+        ret = csr_ops[csrno].read(env, csrno, &old_value);
+        if (ret != RISCV_EXCP_NONE) {
+            return ret;
+        }
     }
 
     /* write value if writable and write mask set, otherwise drop writes */
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index 75ee1c4971..4945d1a1f2 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -574,7 +574,7 @@ static void riscv_itrigger_update_count(CPURISCVState *env)
     int count, executed;
     /*
      * Record last icount, so that we can evaluate the executed instructions
-     * since last priviledge mode change or timer expire.
+     * since last privilege mode change or timer expire.
      */
     int64_t last_icount = env->last_icount, current_icount;
     current_icount = env->last_icount = icount_get_raw();
@@ -588,14 +588,14 @@ static void riscv_itrigger_update_count(CPURISCVState *env)
             continue;
         }
         /*
-         * Only when priviledge is changed or itrigger timer expires,
+         * Only when privilege is changed or itrigger timer expires,
          * the count field in itrigger tdata1 register is updated.
          * And the count field in itrigger only contains remaining value.
          */
         if (check_itrigger_priv(env, i)) {
             /*
-             * If itrigger enabled in this priviledge mode, the number of
-             * executed instructions since last priviledge change
+             * If itrigger enabled in this privilege mode, the number of
+             * executed instructions since last privilege change
              * should be reduced from current itrigger count.
              */
             executed = current_icount - last_icount;
@@ -605,7 +605,7 @@ static void riscv_itrigger_update_count(CPURISCVState *env)
             }
         } else {
             /*
-             * If itrigger is not enabled in this priviledge mode,
+             * If itrigger is not enabled in this privilege mode,
              * the number of executed instructions will be discard and
              * the count field in itrigger will not change.
              */
@@ -903,7 +903,17 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
     return false;
 }
 
-void riscv_trigger_init(CPURISCVState *env)
+void riscv_trigger_realize(CPURISCVState *env)
+{
+    int i;
+
+    for (i = 0; i < RV_MAX_TRIGGERS; i++) {
+        env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                              riscv_itrigger_timer_cb, env);
+    }
+}
+
+void riscv_trigger_reset_hold(CPURISCVState *env)
 {
     target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0);
     int i;
@@ -928,7 +938,6 @@ void riscv_trigger_init(CPURISCVState *env)
         env->tdata3[i] = 0;
         env->cpu_breakpoint[i] = NULL;
         env->cpu_watchpoint[i] = NULL;
-        env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                              riscv_itrigger_timer_cb, env);
+        timer_del(env->itrigger_timer[i]);
     }
 }
diff --git a/target/riscv/debug.h b/target/riscv/debug.h
index c471748d5a..5794aa6ee5 100644
--- a/target/riscv/debug.h
+++ b/target/riscv/debug.h
@@ -143,7 +143,8 @@ void riscv_cpu_debug_excp_handler(CPUState *cs);
 bool riscv_cpu_debug_check_breakpoint(CPUState *cs);
 bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
 
-void riscv_trigger_init(CPURISCVState *env);
+void riscv_trigger_realize(CPURISCVState *env);
+void riscv_trigger_reset_hold(CPURISCVState *env);
 
 bool riscv_itrigger_enabled(CPURISCVState *env);
 void riscv_itrigger_update_priv(CPURISCVState *env);
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index c95adaf08a..8a63523851 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1182,3 +1182,101 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
 
 DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
+
+/* Vector crypto functions */
+DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_2(egs_check, void, i32, env)
+
+DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32)
+DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32)
+DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32)
+
+DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
+
+DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32)
+DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e341fa9213..33597fe2bb 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -37,6 +37,7 @@
 %imm_u    12:s20                 !function=ex_shift_12
 %imm_bs   30:2                   !function=ex_shift_3
 %imm_rnum 20:4
+%imm_z6   26:1 15:5
 
 # Argument sets:
 &empty
@@ -74,6 +75,7 @@
 @r_rm    .......   ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
 @r2_rm   .......   ..... ..... ... ..... ....... %rs1 %rm %rd
 @r2      .......   ..... ..... ... ..... ....... &r2 %rs1 %rd
+@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd
 @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
 @r2_vm   ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
 @r1_vm   ...... vm:1 ..... ..... ... ..... ....... %rd
@@ -82,6 +84,7 @@
 @r_vm    ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
 @r_vm_1  ...... . ..... ..... ... ..... .......    &rmrr vm=1 %rs2 %rs1 %rd
 @r_vm_0  ...... . ..... ..... ... ..... .......    &rmrr vm=0 %rs2 %rs1 %rd
+@r2_zimm6  ..... . vm:1 ..... ..... ... ..... .......  &rmrr %rs2 rs1=%imm_z6 %rd
 @r2_zimm11 . zimm:11  ..... ... ..... ....... %rs1 %rd
 @r2_zimm10 .. zimm:10  ..... ... ..... ....... %rs1 %rd
 @r2_s    .......   ..... ..... ... ..... ....... %rs2 %rs1
@@ -946,3 +949,58 @@ vfwcvtbf16_f_f_v  010010 . ..... 01101 001 ..... 1010111 @r2_vm
 # *** Zvfbfwma Standard Extension ***
 vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
 vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
+
+# *** Zvbc vector crypto extension ***
+vclmul_vv   001100 . ..... ..... 010 ..... 1010111 @r_vm
+vclmul_vx   001100 . ..... ..... 110 ..... 1010111 @r_vm
+vclmulh_vv  001101 . ..... ..... 010 ..... 1010111 @r_vm
+vclmulh_vx  001101 . ..... ..... 110 ..... 1010111 @r_vm
+
+# *** Zvbb vector crypto extension ***
+vrol_vv     010101 . ..... ..... 000 ..... 1010111 @r_vm
+vrol_vx     010101 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vv     010100 . ..... ..... 000 ..... 1010111 @r_vm
+vror_vx     010100 . ..... ..... 100 ..... 1010111 @r_vm
+vror_vi     01010. . ..... ..... 011 ..... 1010111 @r2_zimm6
+vbrev8_v    010010 . ..... 01000 010 ..... 1010111 @r2_vm
+vrev8_v     010010 . ..... 01001 010 ..... 1010111 @r2_vm
+vandn_vv    000001 . ..... ..... 000 ..... 1010111 @r_vm
+vandn_vx    000001 . ..... ..... 100 ..... 1010111 @r_vm
+vbrev_v     010010 . ..... 01010 010 ..... 1010111 @r2_vm
+vclz_v      010010 . ..... 01100 010 ..... 1010111 @r2_vm
+vctz_v      010010 . ..... 01101 010 ..... 1010111 @r2_vm
+vcpop_v     010010 . ..... 01110 010 ..... 1010111 @r2_vm
+vwsll_vv    110101 . ..... ..... 000 ..... 1010111 @r_vm
+vwsll_vx    110101 . ..... ..... 100 ..... 1010111 @r_vm
+vwsll_vi    110101 . ..... ..... 011 ..... 1010111 @r_vm
+
+# *** Zvkned vector crypto extension ***
+vaesef_vv   101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1
+vaesef_vs   101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1
+vaesdf_vv   101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1
+vaesdf_vs   101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1
+vaesem_vv   101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1
+vaesem_vs   101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1
+vaesdm_vv   101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1
+vaesdm_vs   101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1
+vaesz_vs    101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1
+vaeskf1_vi  100010 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vaeskf2_vi  101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
+
+# *** Zvknh vector crypto extension ***
+vsha2ms_vv  101101 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vsha2ch_vv  101110 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vsha2cl_vv  101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
+
+# *** Zvksh vector crypto extension ***
+vsm3me_vv   100000 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vsm3c_vi    101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
+
+# *** Zvkg vector crypto extension ***
+vghsh_vv    101100 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vgmul_vv    101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1
+
+# *** Zvksed vector crypto extension ***
+vsm4k_vi    100001 1 ..... ..... 010 ..... 1110111 @r_vm_1
+vsm4r_vv    101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1
+vsm4r_vs    101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc
index a0da7391c7..e7ab84cd9a 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -300,7 +300,7 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
         tcg_gen_and_i64(dest, mask, rs1);
         tcg_gen_or_i64(dest, dest, rs2);
     }
-    /* signed-extended intead of nanboxing for result if enable zfinx */
+    /* signed-extended instead of nanboxing for result if enable zfinx */
     if (ctx->cfg_ptr->ext_zfinx) {
         tcg_gen_ext32s_i64(dest, dest);
     }
@@ -345,7 +345,7 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
         tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1));
         tcg_gen_xor_i64(dest, rs1, dest);
     }
-    /* signed-extended intead of nanboxing for result if enable zfinx */
+    /* signed-extended instead of nanboxing for result if enable zfinx */
     if (ctx->cfg_ptr->ext_zfinx) {
         tcg_gen_ext32s_i64(dest, dest);
     }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index c2f7527f53..63404f61fc 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -617,7 +617,6 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
     TCGv_i32 desc;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -786,7 +785,6 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
     TCGv_i32 desc;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -893,7 +891,6 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     TCGv_i32 desc;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -1034,7 +1031,6 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
     TCGv_i32 desc;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -1187,11 +1183,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
               gen_helper_gvec_4_ptr *fn)
 {
     TCGLabel *over = gen_new_label();
-    if (!opivv_check(s, a)) {
-        return false;
-    }
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
@@ -1223,6 +1215,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
         gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
     };                                                             \
+    if (!opivv_check(s, a)) {                                      \
+        return false;                                              \
+    }                                                              \
     return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
 }
 
@@ -1241,7 +1236,6 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
     uint32_t data = 0;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -1282,10 +1276,6 @@ static inline bool
 do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
               gen_helper_opivx *fn)
 {
-    if (!opivx_check(s, a)) {
-        return false;
-    }
-
     if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
         TCGv_i64 src1 = tcg_temp_new_i64();
 
@@ -1307,6 +1297,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
         gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
     };                                                             \
+    if (!opivx_check(s, a)) {                                      \
+        return false;                                              \
+    }                                                              \
     return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
 }
 
@@ -1405,7 +1398,6 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
     uint32_t data = 0;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -1439,10 +1431,6 @@ static inline bool
 do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
               gen_helper_opivx *fn, imm_mode_t imm_mode)
 {
-    if (!opivx_check(s, a)) {
-        return false;
-    }
-
     if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
         gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
                 extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
@@ -1460,6 +1448,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,            \
         gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,            \
     };                                                             \
+    if (!opivx_check(s, a)) {                                      \
+        return false;                                              \
+    }                                                              \
     return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF,                 \
                          fns[s->sew], IMM_MODE);                   \
 }
@@ -1492,7 +1483,6 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
     if (checkfn(s, a)) {
         uint32_t data = 0;
         TCGLabel *over = gen_new_label();
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -1536,30 +1526,24 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
            vext_check_ds(s, a->rd, a->rs2, a->vm);
 }
 
-static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
-                           gen_helper_opivx *fn)
-{
-    if (opivx_widen_check(s, a)) {
-        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
-    }
-    return false;
+#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
+{                                                                         \
+    if (CHECK(s, a)) {                                                    \
+        static gen_helper_opivx * const fns[3] = {                        \
+            gen_helper_##NAME##_b,                                        \
+            gen_helper_##NAME##_h,                                        \
+            gen_helper_##NAME##_w                                         \
+        };                                                                \
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \
+    }                                                                     \
+    return false;                                                         \
 }
 
-#define GEN_OPIVX_WIDEN_TRANS(NAME) \
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
-{                                                            \
-    static gen_helper_opivx * const fns[3] = {               \
-        gen_helper_##NAME##_b,                               \
-        gen_helper_##NAME##_h,                               \
-        gen_helper_##NAME##_w                                \
-    };                                                       \
-    return do_opivx_widen(s, a, fns[s->sew]);                \
-}
-
-GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
-GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
+GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check)
 
 /* WIDEN OPIVV with WIDEN */
 static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
@@ -1575,7 +1559,6 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
     if (opiwv_widen_check(s, a)) {
         uint32_t data = 0;
         TCGLabel *over = gen_new_label();
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -1643,38 +1626,39 @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
 GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
 GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
 
+static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm,
+                        gen_helper_gvec_4_ptr *fn, DisasContext *s)
+{
+    uint32_t data = 0;
+    TCGLabel *over = gen_new_label();
+    tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+
+    data = FIELD_DP32(data, VDATA, VM, vm);
+    data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+    data = FIELD_DP32(data, VDATA, VTA, s->vta);
+    data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
+    data = FIELD_DP32(data, VDATA, VMA, s->vma);
+    tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1),
+                       vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8,
+                       s->cfg_ptr->vlen / 8, data, fn);
+    mark_vs_dirty(s);
+    gen_set_label(over);
+    return true;
+}
+
 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
 /* OPIVV without GVEC IR */
-#define GEN_OPIVV_TRANS(NAME, CHECK)                               \
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
-{                                                                  \
-    if (CHECK(s, a)) {                                             \
-        uint32_t data = 0;                                         \
-        static gen_helper_gvec_4_ptr * const fns[4] = {            \
-            gen_helper_##NAME##_b, gen_helper_##NAME##_h,          \
-            gen_helper_##NAME##_w, gen_helper_##NAME##_d,          \
-        };                                                         \
-        TCGLabel *over = gen_new_label();                          \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
-        tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
-                                                                   \
-        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
-        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
-        data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
-        data =                                                     \
-            FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
-        data = FIELD_DP32(data, VDATA, VMA, s->vma);               \
-        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
-                           vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env,           \
-                           s->cfg_ptr->vlen / 8,                   \
-                           s->cfg_ptr->vlen / 8, data,             \
-                           fns[s->sew]);                           \
-        mark_vs_dirty(s);                                          \
-        gen_set_label(over);                                       \
-        return true;                                               \
-    }                                                              \
-    return false;                                                  \
+#define GEN_OPIVV_TRANS(NAME, CHECK)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        static gen_helper_gvec_4_ptr * const fns[4] = {                  \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
+        };                                                               \
+        return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
+    }                                                                    \
+    return false;                                                        \
 }
 
 /*
@@ -1783,10 +1767,6 @@ static inline bool
 do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
                     gen_helper_opivx *fn)
 {
-    if (!opivx_check(s, a)) {
-        return false;
-    }
-
     if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
         TCGv_i32 src1 = tcg_temp_new_i32();
 
@@ -1808,7 +1788,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
         gen_helper_##NAME##_b, gen_helper_##NAME##_h,                     \
         gen_helper_##NAME##_w, gen_helper_##NAME##_d,                     \
     };                                                                    \
-                                                                          \
+    if (!opivx_check(s, a)) {                                             \
+        return false;                                                     \
+    }                                                                     \
     return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);    \
 }
 
@@ -1840,7 +1822,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
             gen_helper_##NAME##_w,                                 \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2010,9 +1991,9 @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check)
 GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
 GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
 GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
-GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check)
 
 /* Vector Single-Width Integer Multiply-Add Instructions */
 GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
@@ -2028,10 +2009,10 @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
 GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
 GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
 GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
-GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check)
 
 /* Vector Integer Merge and Move Instructions */
 static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
@@ -2052,7 +2033,6 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
                 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
             };
             TCGLabel *over = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
             tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
             tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
@@ -2076,7 +2056,6 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
         vext_check_ss(s, a->rd, 0, 1)) {
         TCGv s1;
         TCGLabel *over = gen_new_label();
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         s1 = get_gpr(s, a->rs1, EXT_SIGN);
@@ -2138,7 +2117,6 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
                 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
             };
             TCGLabel *over = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
             tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
             s1 = tcg_constant_i64(simm);
@@ -2240,7 +2218,7 @@ GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx)
  *
  * If SEW < FLEN, check whether input fp register is a valid
  * NaN-boxed value, in which case the least-significant SEW bits
- * of the f regsiter are used, else the canonical NaN value is used.
+ * of the f register are used, else the canonical NaN value is used.
  */
 static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in)
 {
@@ -2286,7 +2264,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm(s, RISCV_FRM_DYN);                              \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2321,7 +2298,6 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     TCGv_i64 t1;
 
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     dest = tcg_temp_new_ptr();
@@ -2406,7 +2382,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         };                                                       \
         TCGLabel *over = gen_new_label();                        \
         gen_set_rm(s, RISCV_FRM_DYN);                            \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);        \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\
                                                                  \
         data = FIELD_DP32(data, VDATA, VM, a->vm);               \
@@ -2481,7 +2456,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm(s, RISCV_FRM_DYN);                              \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2599,7 +2573,6 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
         uint32_t data = 0;
         TCGLabel *over = gen_new_label();
         gen_set_rm_chkfrm(s, rm);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -2711,7 +2684,6 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
                 gen_helper_vmv_v_x_d,
             };
             TCGLabel *over = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
             tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
             t1 = tcg_temp_new_i64();
@@ -2790,7 +2762,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm_chkfrm(s, FRM);                                 \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2842,7 +2813,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm(s, RISCV_FRM_DYN);                              \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2910,7 +2880,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm_chkfrm(s, FRM);                                 \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -2960,7 +2929,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         };                                                         \
         TCGLabel *over = gen_new_label();                          \
         gen_set_rm_chkfrm(s, FRM);                                 \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
@@ -3051,7 +3019,6 @@ static bool trans_##NAME(DisasContext *s, arg_r *a)                \
         uint32_t data = 0;                                         \
         gen_helper_gvec_4_ptr *fn = gen_helper_##NAME;             \
         TCGLabel *over = gen_new_label();                          \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);          \
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
                                                                    \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
@@ -3222,7 +3189,6 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
         require_vm(a->vm, a->rd)) {
         uint32_t data = 0;
         TCGLabel *over = gen_new_label();
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -3282,7 +3248,7 @@ static void load_element(TCGv_i64 dest, TCGv_ptr base,
     }
 }
 
-/* offset of the idx element with base regsiter r */
+/* offset of the idx element with base register r */
 static uint32_t endian_ofs(DisasContext *s, int r, int idx)
 {
 #if HOST_BIG_ENDIAN
@@ -3409,7 +3375,6 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
         TCGv s1;
         TCGLabel *over = gen_new_label();
 
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         t1 = tcg_temp_new_i64();
@@ -3466,8 +3431,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
         TCGv_i64 t1;
         TCGLabel *over = gen_new_label();
 
-        /* if vl == 0 or vstart >= vl, skip vector register write back */
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+        /* if vstart >= vl, skip vector register write back */
         tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
         /* NaN-box f[rs1] */
@@ -3718,7 +3682,6 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
     uint32_t data = 0;
     gen_helper_gvec_3_ptr *fn;
     TCGLabel *over = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
     tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
     static gen_helper_gvec_3_ptr * const fns[6][4] = {
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
new file mode 100644
index 0000000000..c00c70dfc6
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
@@ -0,0 +1,606 @@
+/*
+ * RISC-V translation routines for the vector crypto extension.
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Written by Codethink Ltd and SiFive.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Zvbc
+ */
+
+#define GEN_VV_MASKED_TRANS(NAME, CHECK)                     \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
+    {                                                        \
+        if (CHECK(s, a)) {                                   \
+            return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \
+                               gen_helper_##NAME, s);        \
+        }                                                    \
+        return false;                                        \
+    }
+
+static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a)
+{
+    return opivv_check(s, a) &&
+           s->cfg_ptr->ext_zvbc == true &&
+           s->sew == MO_64;
+}
+
+GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check)
+GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check)
+
+#define GEN_VX_MASKED_TRANS(NAME, CHECK)                     \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
+    {                                                        \
+        if (CHECK(s, a)) {                                   \
+            return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \
+                               gen_helper_##NAME, s);        \
+        }                                                    \
+        return false;                                        \
+    }
+
+static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
+{
+    return opivx_check(s, a) &&
+           s->cfg_ptr->ext_zvbc == true &&
+           s->sew == MO_64;
+}
+
+GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
+GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
+
+/*
+ * Zvbb
+ */
+
+#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK)   \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)              \
+    {                                                                   \
+        if (CHECK(s, a)) {                                              \
+            static gen_helper_opivx *const fns[4] = {                   \
+                gen_helper_##OPIVX##_b,                                 \
+                gen_helper_##OPIVX##_h,                                 \
+                gen_helper_##OPIVX##_w,                                 \
+                gen_helper_##OPIVX##_d,                                 \
+            };                                                          \
+            return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \
+                                 IMM_MODE);                             \
+        }                                                               \
+        return false;                                                   \
+    }
+
+#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK)                     \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)               \
+    {                                                                    \
+        if (CHECK(s, a)) {                                               \
+            static gen_helper_gvec_4_ptr *const fns[4] = {               \
+                gen_helper_##NAME##_b,                                   \
+                gen_helper_##NAME##_h,                                   \
+                gen_helper_##NAME##_w,                                   \
+                gen_helper_##NAME##_d,                                   \
+            };                                                           \
+            return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
+        }                                                                \
+        return false;                                                    \
+    }
+
+#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK)       \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
+    {                                                            \
+        if (CHECK(s, a)) {                                       \
+            static gen_helper_opivx *const fns[4] = {            \
+                gen_helper_##NAME##_b,                           \
+                gen_helper_##NAME##_h,                           \
+                gen_helper_##NAME##_w,                           \
+                gen_helper_##NAME##_d,                           \
+            };                                                   \
+            return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \
+                                       fns[s->sew]);             \
+        }                                                        \
+        return false;                                            \
+    }
+
+static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a)
+{
+    return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true;
+}
+
+static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a)
+{
+    return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true;
+}
+
+/* vrol.v[vx] */
+GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check)
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check)
+
+/* vror.v[vxi] */
+GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check)
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check)
+GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check)
+
+#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK)                     \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)               \
+    {                                                                    \
+        if (CHECK(s, a)) {                                               \
+            static gen_helper_opivx *const fns[4] = {                    \
+                gen_helper_##NAME##_b,                                   \
+                gen_helper_##NAME##_h,                                   \
+                gen_helper_##NAME##_w,                                   \
+                gen_helper_##NAME##_d,                                   \
+            };                                                           \
+            return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
+        }                                                                \
+        return false;                                                    \
+    }
+
+/* vandn.v[vx] */
+GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check)
+GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check)
+
+#define GEN_OPIV_TRANS(NAME, CHECK)                                        \
+    static bool trans_##NAME(DisasContext *s, arg_rmr *a)                  \
+    {                                                                      \
+        if (CHECK(s, a)) {                                                 \
+            uint32_t data = 0;                                             \
+            static gen_helper_gvec_3_ptr *const fns[4] = {                 \
+                gen_helper_##NAME##_b,                                     \
+                gen_helper_##NAME##_h,                                     \
+                gen_helper_##NAME##_w,                                     \
+                gen_helper_##NAME##_d,                                     \
+            };                                                             \
+            TCGLabel *over = gen_new_label();                              \
+            tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);     \
+                                                                           \
+            data = FIELD_DP32(data, VDATA, VM, a->vm);                     \
+            data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                 \
+            data = FIELD_DP32(data, VDATA, VTA, s->vta);                   \
+            data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);                   \
+            tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),         \
+                               vreg_ofs(s, a->rs2), cpu_env,               \
+                               s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \
+                               data, fns[s->sew]);                         \
+            mark_vs_dirty(s);                                              \
+            gen_set_label(over);                                           \
+            return true;                                                   \
+        }                                                                  \
+        return false;                                                      \
+    }
+
+static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a)
+{
+    return s->cfg_ptr->ext_zvbb == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           vext_check_ss(s, a->rd, a->rs2, a->vm);
+}
+
+GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check)
+GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check)
+GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check)
+GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check)
+GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check)
+GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check)
+
+static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a)
+{
+    return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a);
+}
+
+static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a)
+{
+    return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a);
+}
+
+/* OPIVI without GVEC IR */
+#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK)                  \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+    {                                                                        \
+        if (CHECK(s, a)) {                                                   \
+            static gen_helper_opivx *const fns[3] = {                        \
+                gen_helper_##OPIVX##_b,                                      \
+                gen_helper_##OPIVX##_h,                                      \
+                gen_helper_##OPIVX##_w,                                      \
+            };                                                               \
+            return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \
+                               IMM_MODE);                                    \
+        }                                                                    \
+        return false;                                                        \
+    }
+
+GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check)
+GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check)
+GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check)
+
+/*
+ * Zvkned
+ */
+
+#define ZVKNED_EGS 4
+
+#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS)                                \
+    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)                  \
+    {                                                                         \
+        if (CHECK(s, a)) {                                                    \
+            TCGv_ptr rd_v, rs2_v;                                             \
+            TCGv_i32 desc, egs;                                               \
+            uint32_t data = 0;                                                \
+            TCGLabel *over = gen_new_label();                                 \
+                                                                              \
+            if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {                      \
+                /* save opcode for unwinding in case we throw an exception */ \
+                decode_save_opc(s);                                           \
+                egs = tcg_constant_i32(EGS);                                  \
+                gen_helper_egs_check(egs, cpu_env);                           \
+                tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);    \
+            }                                                                 \
+                                                                              \
+            data = FIELD_DP32(data, VDATA, VM, a->vm);                        \
+            data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                    \
+            data = FIELD_DP32(data, VDATA, VTA, s->vta);                      \
+            data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);    \
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);                      \
+            rd_v = tcg_temp_new_ptr();                                        \
+            rs2_v = tcg_temp_new_ptr();                                       \
+            desc = tcg_constant_i32(                                          \
+                simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
+            tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd));              \
+            tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2));            \
+            gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc);                    \
+            mark_vs_dirty(s);                                                 \
+            gen_set_label(over);                                              \
+            return true;                                                      \
+        }                                                                     \
+        return false;                                                         \
+    }
+
+static bool vaes_check_vv(DisasContext *s, arg_rmr *a)
+{
+    int egw_bytes = ZVKNED_EGS << s->sew;
+    return s->cfg_ptr->ext_zvkned == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul) &&
+           s->sew == MO_32;
+}
+
+static bool vaes_check_overlap(DisasContext *s, int vd, int vs2)
+{
+    int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul;
+    return !is_overlapped(vd, op_size, vs2, 1);
+}
+
+static bool vaes_check_vs(DisasContext *s, arg_rmr *a)
+{
+    int egw_bytes = ZVKNED_EGS << s->sew;
+    return vaes_check_overlap(s, a->rd, a->rs2) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->cfg_ptr->ext_zvkned == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           require_align(a->rd, s->lmul) &&
+           s->sew == MO_32;
+}
+
+GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS)
+GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS)
+
+#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS)                               \
+    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)                  \
+    {                                                                         \
+        if (CHECK(s, a)) {                                                    \
+            TCGv_ptr rd_v, rs2_v;                                             \
+            TCGv_i32 uimm_v, desc, egs;                                       \
+            uint32_t data = 0;                                                \
+            TCGLabel *over = gen_new_label();                                 \
+                                                                              \
+            if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {                      \
+                /* save opcode for unwinding in case we throw an exception */ \
+                decode_save_opc(s);                                           \
+                egs = tcg_constant_i32(EGS);                                  \
+                gen_helper_egs_check(egs, cpu_env);                           \
+                tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);    \
+            }                                                                 \
+                                                                              \
+            data = FIELD_DP32(data, VDATA, VM, a->vm);                        \
+            data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                    \
+            data = FIELD_DP32(data, VDATA, VTA, s->vta);                      \
+            data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);    \
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);                      \
+                                                                              \
+            rd_v = tcg_temp_new_ptr();                                        \
+            rs2_v = tcg_temp_new_ptr();                                       \
+            uimm_v = tcg_constant_i32(a->rs1);                                \
+            desc = tcg_constant_i32(                                          \
+                simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
+            tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd));              \
+            tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2));            \
+            gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc);            \
+            mark_vs_dirty(s);                                                 \
+            gen_set_label(over);                                              \
+            return true;                                                      \
+        }                                                                     \
+        return false;                                                         \
+    }
+
+static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a)
+{
+    int egw_bytes = ZVKNED_EGS << s->sew;
+    return s->cfg_ptr->ext_zvkned == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->sew == MO_32 &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul);
+}
+
+static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a)
+{
+    int egw_bytes = ZVKNED_EGS << s->sew;
+    return s->cfg_ptr->ext_zvkned == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->sew == MO_32 &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul);
+}
+
+GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS)
+GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS)
+
+/*
+ * Zvknh
+ */
+
+#define ZVKNH_EGS 4
+
+#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS)                               \
+    static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
+    {                                                                         \
+        if (CHECK(s, a)) {                                                    \
+            uint32_t data = 0;                                                \
+            TCGLabel *over = gen_new_label();                                 \
+            TCGv_i32 egs;                                                     \
+                                                                              \
+            if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {                      \
+                /* save opcode for unwinding in case we throw an exception */ \
+                decode_save_opc(s);                                           \
+                egs = tcg_constant_i32(EGS);                                  \
+                gen_helper_egs_check(egs, cpu_env);                           \
+                tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);    \
+            }                                                                 \
+                                                                              \
+            data = FIELD_DP32(data, VDATA, VM, a->vm);                        \
+            data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                    \
+            data = FIELD_DP32(data, VDATA, VTA, s->vta);                      \
+            data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);    \
+            data = FIELD_DP32(data, VDATA, VMA, s->vma);                      \
+                                                                              \
+            tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),       \
+                               vreg_ofs(s, a->rs2), cpu_env,                  \
+                               s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8,    \
+                               data, gen_helper_##NAME);                      \
+                                                                              \
+            mark_vs_dirty(s);                                                 \
+            gen_set_label(over);                                              \
+            return true;                                                      \
+        }                                                                     \
+        return false;                                                         \
+    }
+
+static bool vsha_check_sew(DisasContext *s)
+{
+    return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) ||
+           (s->cfg_ptr->ext_zvknhb == true &&
+            (s->sew == MO_32 || s->sew == MO_64));
+}
+
+static bool vsha_check(DisasContext *s, arg_rmrr *a)
+{
+    int egw_bytes = ZVKNH_EGS << s->sew;
+    int mult = 1 << MAX(s->lmul, 0);
+    return opivv_check(s, a) &&
+           vsha_check_sew(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           !is_overlapped(a->rd, mult, a->rs1, mult) &&
+           !is_overlapped(a->rd, mult, a->rs2, mult) &&
+           s->lmul >= 0;
+}
+
+GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS)
+
+static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a)
+{
+    if (vsha_check(s, a)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label();
+        TCGv_i32 egs;
+
+        if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
+            /* save opcode for unwinding in case we throw an exception */
+            decode_save_opc(s);
+            egs = tcg_constant_i32(ZVKNH_EGS);
+            gen_helper_egs_check(egs, cpu_env);
+            tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+        }
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
+
+        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
+            vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
+            s->cfg_ptr->vlen / 8, data,
+            s->sew == MO_32 ?
+                gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv);
+
+        mark_vs_dirty(s);
+        gen_set_label(over);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a)
+{
+    if (vsha_check(s, a)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label();
+        TCGv_i32 egs;
+
+        if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
+            /* save opcode for unwinding in case we throw an exception */
+            decode_save_opc(s);
+            egs = tcg_constant_i32(ZVKNH_EGS);
+            gen_helper_egs_check(egs, cpu_env);
+            tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+        }
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
+
+        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
+            vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
+            s->cfg_ptr->vlen / 8, data,
+            s->sew == MO_32 ?
+                gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv);
+
+        mark_vs_dirty(s);
+        gen_set_label(over);
+        return true;
+    }
+    return false;
+}
+
+/*
+ * Zvksh
+ */
+
+#define ZVKSH_EGS 8
+
+static inline bool vsm3_check(DisasContext *s, arg_rmrr *a)
+{
+    int egw_bytes = ZVKSH_EGS << s->sew;
+    int mult = 1 << MAX(s->lmul, 0);
+    return s->cfg_ptr->ext_zvksh == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           !is_overlapped(a->rd, mult, a->rs2, mult) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->sew == MO_32;
+}
+
+static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a)
+{
+    return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
+}
+
+static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a)
+{
+    return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm);
+}
+
+GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS)
+GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS)
+
+/*
+ * Zvkg
+ */
+
+#define ZVKG_EGS 4
+
+static bool vgmul_check(DisasContext *s, arg_rmr *a)
+{
+    int egw_bytes = ZVKG_EGS << s->sew;
+    return s->cfg_ptr->ext_zvkg == true &&
+           vext_check_isa_ill(s) &&
+           require_rvv(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           vext_check_ss(s, a->rd, a->rs2, a->vm) &&
+           s->sew == MO_32;
+}
+
+GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS)
+
+static bool vghsh_check(DisasContext *s, arg_rmrr *a)
+{
+    int egw_bytes = ZVKG_EGS << s->sew;
+    return s->cfg_ptr->ext_zvkg == true &&
+           opivv_check(s, a) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->sew == MO_32;
+}
+
+GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS)
+
+/*
+ * Zvksed
+ */
+
+#define ZVKSED_EGS 4
+
+static bool zvksed_check(DisasContext *s)
+{
+    int egw_bytes = ZVKSED_EGS << s->sew;
+    return s->cfg_ptr->ext_zvksed == true &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           MAXSZ(s) >= egw_bytes &&
+           s->sew == MO_32;
+}
+
+static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a)
+{
+    return zvksed_check(s) &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul);
+}
+
+GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS)
+
+static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a)
+{
+    return zvksed_check(s) &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul);
+}
+
+GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS)
+
+static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a)
+{
+    return zvksed_check(s) &&
+           !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
+           require_align(a->rd, s->lmul);
+}
+
+GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS)
diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc
index 2c715af3e5..0fdd2698f6 100644
--- a/target/riscv/insn_trans/trans_rvzfa.c.inc
+++ b/target/riscv/insn_trans/trans_rvzfa.c.inc
@@ -470,7 +470,7 @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
     TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
     TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
 
-    gen_helper_fltq_s(dest, cpu_env, src1, src2);
+    gen_helper_fleq_d(dest, cpu_env, src1, src2);
     gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
@@ -485,7 +485,7 @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
     TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
     TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
 
-    gen_helper_fltq_s(dest, cpu_env, src1, src2);
+    gen_helper_fltq_d(dest, cpu_env, src1, src2);
     gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc
index 8b1e2519bb..4b01812fd8 100644
--- a/target/riscv/insn_trans/trans_rvzfh.c.inc
+++ b/target/riscv/insn_trans/trans_rvzfh.c.inc
@@ -305,7 +305,7 @@ static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
         tcg_gen_and_i64(dest, mask, rs1);
         tcg_gen_or_i64(dest, dest, rs2);
     }
-    /* signed-extended intead of nanboxing for result if enable zfinx */
+    /* signed-extended instead of nanboxing for result if enable zfinx */
     if (ctx->cfg_ptr->ext_zfinx) {
         tcg_gen_ext16s_i64(dest, dest);
     }
@@ -349,7 +349,7 @@ static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
         tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(15, 1));
         tcg_gen_xor_i64(dest, rs1, dest);
     }
-    /* signed-extended intead of nanboxing for result if enable zfinx */
+    /* signed-extended instead of nanboxing for result if enable zfinx */
     if (ctx->cfg_ptr->ext_zfinx) {
         tcg_gen_ext16s_i64(dest, dest);
     }
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index dbcf26f27d..c01cfb03f4 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -36,6 +36,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "hw/intc/riscv_imsic.h"
 #include "qemu/log.h"
 #include "hw/loader.h"
 #include "kvm_riscv.h"
@@ -43,6 +44,12 @@
 #include "chardev/char-fe.h"
 #include "migration/migration.h"
 #include "sysemu/runstate.h"
+#include "hw/riscv/numa.h"
+
+void riscv_kvm_aplic_request(void *opaque, int irq, int level)
+{
+    kvm_set_irq(kvm_state, irq, !!level);
+}
 
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
                                  uint64_t idx)
@@ -926,7 +933,15 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 
 int kvm_arch_irqchip_create(KVMState *s)
 {
-    return 0;
+    if (kvm_kernel_irqchip_split()) {
+        error_report("-machine kernel_irqchip=split is not supported on RISC-V.");
+        exit(1);
+    }
+
+    /*
+     * We can create the VAIA using the newer device control API.
+     */
+    return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
 }
 
 int kvm_arch_process_async_events(CPUState *cs)
@@ -1027,6 +1042,190 @@ bool kvm_arch_cpu_check_are_resettable(void)
     return true;
 }
 
+static int aia_mode;
+
+static const char *kvm_aia_mode_str(uint64_t mode)
+{
+    switch (mode) {
+    case KVM_DEV_RISCV_AIA_MODE_EMUL:
+        return "emul";
+    case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
+        return "hwaccel";
+    case KVM_DEV_RISCV_AIA_MODE_AUTO:
+    default:
+        return "auto";
+    };
+}
+
+static char *riscv_get_kvm_aia(Object *obj, Error **errp)
+{
+    return g_strdup(kvm_aia_mode_str(aia_mode));
+}
+
+static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp)
+{
+    if (!strcmp(val, "emul")) {
+        aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL;
+    } else if (!strcmp(val, "hwaccel")) {
+        aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL;
+    } else if (!strcmp(val, "auto")) {
+        aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO;
+    } else {
+        error_setg(errp, "Invalid KVM AIA mode");
+        error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n");
+    }
+}
+
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
+    object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia,
+                                  riscv_set_kvm_aia);
+    object_class_property_set_description(oc, "riscv-aia",
+                                          "Set KVM AIA mode. Valid values are "
+                                          "emul, hwaccel, and auto. Default "
+                                          "is auto.");
+    object_property_set_default_str(object_class_property_find(oc, "riscv-aia"),
+                                    "auto");
+}
+
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
+                          uint64_t aia_irq_num, uint64_t aia_msi_num,
+                          uint64_t aplic_base, uint64_t imsic_base,
+                          uint64_t guest_num)
+{
+    int ret, i;
+    int aia_fd = -1;
+    uint64_t default_aia_mode;
+    uint64_t socket_count = riscv_socket_count(machine);
+    uint64_t max_hart_per_socket = 0;
+    uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr;
+    uint64_t socket_bits, hart_bits, guest_bits;
+
+    aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false);
+
+    if (aia_fd < 0) {
+        error_report("Unable to create in-kernel irqchip");
+        exit(1);
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_MODE,
+                            &default_aia_mode, false, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to get current KVM AIA mode");
+        exit(1);
+    }
+    qemu_log("KVM AIA: default mode is %s\n",
+             kvm_aia_mode_str(default_aia_mode));
+
+    if (default_aia_mode != aia_mode) {
+        ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                                KVM_DEV_RISCV_AIA_CONFIG_MODE,
+                                &aia_mode, true, NULL);
+        if (ret < 0)
+            warn_report("KVM AIA: failed to set KVM AIA mode");
+        else
+            qemu_log("KVM AIA: set current mode to %s\n",
+                     kvm_aia_mode_str(aia_mode));
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_SRCS,
+                            &aia_irq_num, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set number of input irq lines");
+        exit(1);
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_IDS,
+                            &aia_msi_num, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set number of msi");
+        exit(1);
+    }
+
+    socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1;
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS,
+                            &socket_bits, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set group_bits");
+        exit(1);
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT,
+                            &group_shift, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set group_shift");
+        exit(1);
+    }
+
+    guest_bits = guest_num == 0 ? 0 :
+                 find_last_bit(&guest_num, BITS_PER_LONG) + 1;
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS,
+                            &guest_bits, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set guest_bits");
+        exit(1);
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
+                            KVM_DEV_RISCV_AIA_ADDR_APLIC,
+                            &aplic_base, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set the base address of APLIC");
+        exit(1);
+    }
+
+    for (socket = 0; socket < socket_count; socket++) {
+        socket_imsic_base = imsic_base + socket * (1U << group_shift);
+        hart_count = riscv_socket_hart_count(machine, socket);
+        base_hart = riscv_socket_first_hartid(machine, socket);
+
+        if (max_hart_per_socket < hart_count) {
+            max_hart_per_socket = hart_count;
+        }
+
+        for (i = 0; i < hart_count; i++) {
+            imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits);
+            ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
+                                    KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart),
+                                    &imsic_addr, true, NULL);
+            if (ret < 0) {
+                error_report("KVM AIA: failed to set the IMSIC address for hart %d", i);
+                exit(1);
+            }
+        }
+    }
+
+    hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
+                            KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
+                            &hart_bits, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: failed to set hart_bits");
+        exit(1);
+    }
+
+    if (kvm_has_gsi_routing()) {
+        for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) {
+            /* KVM AIA only has one APLIC instance */
+            kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx);
+        }
+        kvm_gsi_routing_allowed = true;
+        kvm_irqchip_commit_routes(kvm_state);
+    }
+
+    ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL,
+                            KVM_DEV_RISCV_AIA_CTRL_INIT,
+                            NULL, true, NULL);
+    if (ret < 0) {
+        error_report("KVM AIA: initialized fail");
+        exit(1);
+    }
+
+    kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
 }
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index e3ba935808..de8c209ebc 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -22,5 +22,10 @@
 void kvm_riscv_init_user_properties(Object *cpu_obj);
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
+                          uint64_t aia_irq_num, uint64_t aia_msi_num,
+                          uint64_t aplic_base, uint64_t imsic_base,
+                          uint64_t guest_num);
+void riscv_kvm_aplic_request(void *opaque, int irq, int level);
 
 #endif
diff --git a/target/riscv/m128_helper.c b/target/riscv/m128_helper.c
index e6a4f6120a..ec14aaa901 100644
--- a/target/riscv/m128_helper.c
+++ b/target/riscv/m128_helper.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 7f56c5f88d..660078bda1 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -16,11 +16,13 @@ riscv_ss.add(files(
   'gdbstub.c',
   'op_helper.c',
   'vector_helper.c',
+  'vector_internals.c',
   'bitmanip_helper.c',
   'translate.c',
   'm128_helper.c',
   'crypto_helper.c',
-  'zce_helper.c'
+  'zce_helper.c',
+  'vcrypto_helper.c'
 ))
 riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
 
diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c
index f36ddfa967..f5b1ffe6c3 100644
--- a/target/riscv/monitor.c
+++ b/target/riscv/monitor.c
@@ -55,7 +55,7 @@ static void print_pte_header(Monitor *mon)
 static void print_pte(Monitor *mon, int va_bits, target_ulong vaddr,
                       hwaddr paddr, target_ulong size, int attr)
 {
-    /* santity check on vaddr */
+    /* sanity check on vaddr */
     if (vaddr >= (1UL << va_bits)) {
         return;
     }
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index 9cdb9cdd06..5355225d56 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -21,8 +21,8 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internals.h"
-#include "qemu/main-loop.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 
 /* Exceptions processing helpers */
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index 9d8db493e6..5e60c26031 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -44,6 +44,10 @@ static inline uint8_t pmp_get_a_field(uint8_t cfg)
  */
 static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index)
 {
+    /* mseccfg.RLB is set */
+    if (MSECCFG_RLB_ISSET(env)) {
+        return 0;
+    }
 
     if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) {
         return 1;
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index db06b3882f..36f6307d28 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -17,6 +17,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "pmu.h"
 #include "sysemu/cpu-timers.h"
diff --git a/target/riscv/pmu.h b/target/riscv/pmu.h
index 0c819ca983..2bfb71ba87 100644
--- a/target/riscv/pmu.h
+++ b/target/riscv/pmu.h
@@ -16,10 +16,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "qemu/log.h"
 #include "cpu.h"
-#include "qemu/main-loop.h"
-#include "exec/exec-all.h"
 
 bool riscv_pmu_ctr_monitor_instructions(CPURISCVState *env,
                                         uint32_t target_ctr);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 697df1be9e..7dbf173adb 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1094,6 +1094,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 #include "insn_trans/trans_rvzfa.c.inc"
 #include "insn_trans/trans_rvzfh.c.inc"
 #include "insn_trans/trans_rvk.c.inc"
+#include "insn_trans/trans_rvvk.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
 #include "insn_trans/trans_svinval.c.inc"
 #include "insn_trans/trans_rvbf16.c.inc"
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
new file mode 100644
index 0000000000..e2d719b13b
--- /dev/null
+++ b/target/riscv/vcrypto_helper.c
@@ -0,0 +1,970 @@
+/*
+ * RISC-V Vector Crypto Extension Helpers for QEMU.
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Written by Codethink Ltd and SiFive.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/bitops.h"
+#include "qemu/bswap.h"
+#include "cpu.h"
+#include "crypto/aes.h"
+#include "crypto/aes-round.h"
+#include "crypto/sm4.h"
+#include "exec/memop.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "internals.h"
+#include "vector_internals.h"
+
+static uint64_t clmul64(uint64_t y, uint64_t x)
+{
+    uint64_t result = 0;
+    for (int j = 63; j >= 0; j--) {
+        if ((y >> j) & 1) {
+            result ^= (x << j);
+        }
+    }
+    return result;
+}
+
+static uint64_t clmulh64(uint64_t y, uint64_t x)
+{
+    uint64_t result = 0;
+    for (int j = 63; j >= 1; j--) {
+        if ((y >> j) & 1) {
+            result ^= (x >> (64 - j));
+        }
+    }
+    return result;
+}
+
+RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64)
+GEN_VEXT_VV(vclmul_vv, 8)
+RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64)
+GEN_VEXT_VX(vclmul_vx, 8)
+RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64)
+GEN_VEXT_VV(vclmulh_vv, 8)
+RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64)
+GEN_VEXT_VX(vclmulh_vx, 8)
+
+RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8)
+RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16)
+RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32)
+RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64)
+GEN_VEXT_VV(vror_vv_b, 1)
+GEN_VEXT_VV(vror_vv_h, 2)
+GEN_VEXT_VV(vror_vv_w, 4)
+GEN_VEXT_VV(vror_vv_d, 8)
+
+RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8)
+RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16)
+RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32)
+RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64)
+GEN_VEXT_VX(vror_vx_b, 1)
+GEN_VEXT_VX(vror_vx_h, 2)
+GEN_VEXT_VX(vror_vx_w, 4)
+GEN_VEXT_VX(vror_vx_d, 8)
+
+RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8)
+RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16)
+RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32)
+RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64)
+GEN_VEXT_VV(vrol_vv_b, 1)
+GEN_VEXT_VV(vrol_vv_h, 2)
+GEN_VEXT_VV(vrol_vv_w, 4)
+GEN_VEXT_VV(vrol_vv_d, 8)
+
+RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8)
+RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16)
+RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32)
+RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64)
+GEN_VEXT_VX(vrol_vx_b, 1)
+GEN_VEXT_VX(vrol_vx_h, 2)
+GEN_VEXT_VX(vrol_vx_w, 4)
+GEN_VEXT_VX(vrol_vx_d, 8)
+
+static uint64_t brev8(uint64_t val)
+{
+    val = ((val & 0x5555555555555555ull) << 1) |
+          ((val & 0xAAAAAAAAAAAAAAAAull) >> 1);
+    val = ((val & 0x3333333333333333ull) << 2) |
+          ((val & 0xCCCCCCCCCCCCCCCCull) >> 2);
+    val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) |
+          ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4);
+
+    return val;
+}
+
+RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8)
+RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8)
+RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8)
+RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8)
+GEN_VEXT_V(vbrev8_v_b, 1)
+GEN_VEXT_V(vbrev8_v_h, 2)
+GEN_VEXT_V(vbrev8_v_w, 4)
+GEN_VEXT_V(vbrev8_v_d, 8)
+
+#define DO_IDENTITY(a) (a)
+RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY)
+RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16)
+RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32)
+RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64)
+GEN_VEXT_V(vrev8_v_b, 1)
+GEN_VEXT_V(vrev8_v_h, 2)
+GEN_VEXT_V(vrev8_v_w, 4)
+GEN_VEXT_V(vrev8_v_d, 8)
+
+#define DO_ANDN(a, b) ((a) & ~(b))
+RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN)
+RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN)
+RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN)
+RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN)
+GEN_VEXT_VV(vandn_vv_b, 1)
+GEN_VEXT_VV(vandn_vv_h, 2)
+GEN_VEXT_VV(vandn_vv_w, 4)
+GEN_VEXT_VV(vandn_vv_d, 8)
+
+RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN)
+RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN)
+RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN)
+RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN)
+GEN_VEXT_VX(vandn_vx_b, 1)
+GEN_VEXT_VX(vandn_vx_h, 2)
+GEN_VEXT_VX(vandn_vx_w, 4)
+GEN_VEXT_VX(vandn_vx_d, 8)
+
+RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8)
+RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16)
+RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32)
+RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64)
+GEN_VEXT_V(vbrev_v_b, 1)
+GEN_VEXT_V(vbrev_v_h, 2)
+GEN_VEXT_V(vbrev_v_w, 4)
+GEN_VEXT_V(vbrev_v_d, 8)
+
+RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8)
+RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16)
+RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32)
+RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64)
+GEN_VEXT_V(vclz_v_b, 1)
+GEN_VEXT_V(vclz_v_h, 2)
+GEN_VEXT_V(vclz_v_w, 4)
+GEN_VEXT_V(vclz_v_d, 8)
+
+RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8)
+RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16)
+RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32)
+RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64)
+GEN_VEXT_V(vctz_v_b, 1)
+GEN_VEXT_V(vctz_v_h, 2)
+GEN_VEXT_V(vctz_v_w, 4)
+GEN_VEXT_V(vctz_v_d, 8)
+
+RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8)
+RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16)
+RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32)
+RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64)
+GEN_VEXT_V(vcpop_v_b, 1)
+GEN_VEXT_V(vcpop_v_h, 2)
+GEN_VEXT_V(vcpop_v_w, 4)
+GEN_VEXT_V(vcpop_v_d, 8)
+
+#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1)))
+RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL)
+RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL)
+RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL)
+GEN_VEXT_VV(vwsll_vv_b, 2)
+GEN_VEXT_VV(vwsll_vv_h, 4)
+GEN_VEXT_VV(vwsll_vv_w, 8)
+
+RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL)
+RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL)
+RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
+GEN_VEXT_VX(vwsll_vx_b, 2)
+GEN_VEXT_VX(vwsll_vx_h, 4)
+GEN_VEXT_VX(vwsll_vx_w, 8)
+
+void HELPER(egs_check)(uint32_t egs, CPURISCVState *env)
+{
+    uint32_t vl = env->vl;
+    uint32_t vstart = env->vstart;
+
+    if (vl % egs != 0 || vstart % egs != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+    }
+}
+
+static inline void xor_round_key(AESState *round_state, AESState *round_key)
+{
+    round_state->v = round_state->v ^ round_key->v;
+}
+
+#define GEN_ZVKNED_HELPER_VV(NAME, ...)                                   \
+    void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env,            \
+                      uint32_t desc)                                      \
+    {                                                                     \
+        uint32_t vl = env->vl;                                            \
+        uint32_t total_elems = vext_get_total_elems(env, desc, 4);        \
+        uint32_t vta = vext_vta(desc);                                    \
+                                                                          \
+        for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {        \
+            AESState round_key;                                           \
+            round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0));          \
+            round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1));          \
+            AESState round_state;                                         \
+            round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0));         \
+            round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1));         \
+            __VA_ARGS__;                                                  \
+            *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0];         \
+            *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1];         \
+        }                                                                 \
+        env->vstart = 0;                                                  \
+        /* set tail elements to 1s */                                     \
+        vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);              \
+    }
+
+#define GEN_ZVKNED_HELPER_VS(NAME, ...)                                   \
+    void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env,            \
+                      uint32_t desc)                                      \
+    {                                                                     \
+        uint32_t vl = env->vl;                                            \
+        uint32_t total_elems = vext_get_total_elems(env, desc, 4);        \
+        uint32_t vta = vext_vta(desc);                                    \
+                                                                          \
+        for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {        \
+            AESState round_key;                                           \
+            round_key.d[0] = *((uint64_t *)vs2 + H8(0));                  \
+            round_key.d[1] = *((uint64_t *)vs2 + H8(1));                  \
+            AESState round_state;                                         \
+            round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0));         \
+            round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1));         \
+            __VA_ARGS__;                                                  \
+            *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0];         \
+            *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1];         \
+        }                                                                 \
+        env->vstart = 0;                                                  \
+        /* set tail elements to 1s */                                     \
+        vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);              \
+    }
+
+GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state,
+                                                &round_state,
+                                                &round_key,
+                                                false);)
+GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state,
+                                                &round_state,
+                                                &round_key,
+                                                false);)
+GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state,
+                                                  &round_state,
+                                                  &round_key,
+                                                  false);)
+GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state,
+                                                  &round_state,
+                                                  &round_key,
+                                                  false);)
+GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state,
+                                                   &round_state,
+                                                   &round_key,
+                                                   false);)
+GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state,
+                                                   &round_state,
+                                                   &round_key,
+                                                   false);)
+GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state,
+                                                      &round_state,
+                                                      &round_key,
+                                                      false);)
+GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state,
+                                                      &round_state,
+                                                      &round_key,
+                                                      false);)
+GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);)
+
+void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
+                        CPURISCVState *env, uint32_t desc)
+{
+    uint32_t *vd = vd_vptr;
+    uint32_t *vs2 = vs2_vptr;
+    uint32_t vl = env->vl;
+    uint32_t total_elems = vext_get_total_elems(env, desc, 4);
+    uint32_t vta = vext_vta(desc);
+
+    uimm &= 0b1111;
+    if (uimm > 10 || uimm == 0) {
+        uimm ^= 0b1000;
+    }
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        uint32_t rk[8], tmp;
+        static const uint32_t rcon[] = {
+            0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
+            0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
+        };
+
+        rk[0] = vs2[i * 4 + H4(0)];
+        rk[1] = vs2[i * 4 + H4(1)];
+        rk[2] = vs2[i * 4 + H4(2)];
+        rk[3] = vs2[i * 4 + H4(3)];
+        tmp = ror32(rk[3], 8);
+
+        rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
+                         ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
+                         ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
+                         ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
+                      ^ rcon[uimm - 1];
+        rk[5] = rk[1] ^ rk[4];
+        rk[6] = rk[2] ^ rk[5];
+        rk[7] = rk[3] ^ rk[6];
+
+        vd[i * 4 + H4(0)] = rk[4];
+        vd[i * 4 + H4(1)] = rk[5];
+        vd[i * 4 + H4(2)] = rk[6];
+        vd[i * 4 + H4(3)] = rk[7];
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
+}
+
+void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
+                        CPURISCVState *env, uint32_t desc)
+{
+    uint32_t *vd = vd_vptr;
+    uint32_t *vs2 = vs2_vptr;
+    uint32_t vl = env->vl;
+    uint32_t total_elems = vext_get_total_elems(env, desc, 4);
+    uint32_t vta = vext_vta(desc);
+
+    uimm &= 0b1111;
+    if (uimm > 14 || uimm < 2) {
+        uimm ^= 0b1000;
+    }
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        uint32_t rk[12], tmp;
+        static const uint32_t rcon[] = {
+            0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
+            0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
+        };
+
+        rk[0] = vd[i * 4 + H4(0)];
+        rk[1] = vd[i * 4 + H4(1)];
+        rk[2] = vd[i * 4 + H4(2)];
+        rk[3] = vd[i * 4 + H4(3)];
+        rk[4] = vs2[i * 4 + H4(0)];
+        rk[5] = vs2[i * 4 + H4(1)];
+        rk[6] = vs2[i * 4 + H4(2)];
+        rk[7] = vs2[i * 4 + H4(3)];
+
+        if (uimm % 2 == 0) {
+            tmp = ror32(rk[7], 8);
+            rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
+                             ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
+                             ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
+                             ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
+                          ^ rcon[(uimm - 1) / 2];
+        } else {
+            rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) |
+                             ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) |
+                             ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) |
+                             ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0));
+        }
+        rk[9] = rk[1] ^ rk[8];
+        rk[10] = rk[2] ^ rk[9];
+        rk[11] = rk[3] ^ rk[10];
+
+        vd[i * 4 + H4(0)] = rk[8];
+        vd[i * 4 + H4(1)] = rk[9];
+        vd[i * 4 + H4(2)] = rk[10];
+        vd[i * 4 + H4(3)] = rk[11];
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
+}
+
+static inline uint32_t sig0_sha256(uint32_t x)
+{
+    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
+}
+
+static inline uint32_t sig1_sha256(uint32_t x)
+{
+    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
+}
+
+static inline uint64_t sig0_sha512(uint64_t x)
+{
+    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
+}
+
+static inline uint64_t sig1_sha512(uint64_t x)
+{
+    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
+}
+
+static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2)
+{
+    uint32_t res[4];
+    res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) +
+             vd[H4(0)];
+    res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) +
+             vd[H4(1)];
+    res[2] =
+        sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)];
+    res[3] =
+        sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)];
+    vd[H4(3)] = res[3];
+    vd[H4(2)] = res[2];
+    vd[H4(1)] = res[1];
+    vd[H4(0)] = res[0];
+}
+
+static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2)
+{
+    uint64_t res[4];
+    res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0];
+    res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1];
+    res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2];
+    res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3];
+    vd[3] = res[3];
+    vd[2] = res[2];
+    vd[1] = res[1];
+    vd[0] = res[0];
+}
+
+void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
+                        uint32_t desc)
+{
+    uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW);
+    uint32_t esz = sew == MO_32 ? 4 : 8;
+    uint32_t total_elems;
+    uint32_t vta = vext_vta(desc);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        if (sew == MO_32) {
+            vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4,
+                        ((uint32_t *)vs2) + i * 4);
+        } else {
+            /* If not 32 then SEW should be 64 */
+            vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4,
+                        ((uint64_t *)vs2) + i * 4);
+        }
+    }
+    /* set tail elements to 1s */
+    total_elems = vext_get_total_elems(env, desc, esz);
+    vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+static inline uint64_t sum0_64(uint64_t x)
+{
+    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
+}
+
+static inline uint32_t sum0_32(uint32_t x)
+{
+    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
+}
+
+static inline uint64_t sum1_64(uint64_t x)
+{
+    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
+}
+
+static inline uint32_t sum1_32(uint32_t x)
+{
+    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
+}
+
+#define ch(x, y, z) ((x & y) ^ ((~x) & z))
+
+#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+
+static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1)
+{
+    uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0];
+    uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0];
+    uint64_t W0 = vs1[0], W1 = vs1[1];
+    uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0;
+    uint64_t T2 = sum0_64(a) + maj(a, b, c);
+
+    h = g;
+    g = f;
+    f = e;
+    e = d + T1;
+    d = c;
+    c = b;
+    b = a;
+    a = T1 + T2;
+
+    T1 = h + sum1_64(e) + ch(e, f, g) + W1;
+    T2 = sum0_64(a) + maj(a, b, c);
+    h = g;
+    g = f;
+    f = e;
+    e = d + T1;
+    d = c;
+    c = b;
+    b = a;
+    a = T1 + T2;
+
+    vd[0] = f;
+    vd[1] = e;
+    vd[2] = b;
+    vd[3] = a;
+}
+
+static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1)
+{
+    uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)];
+    uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)];
+    uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)];
+    uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0;
+    uint32_t T2 = sum0_32(a) + maj(a, b, c);
+
+    h = g;
+    g = f;
+    f = e;
+    e = d + T1;
+    d = c;
+    c = b;
+    b = a;
+    a = T1 + T2;
+
+    T1 = h + sum1_32(e) + ch(e, f, g) + W1;
+    T2 = sum0_32(a) + maj(a, b, c);
+    h = g;
+    g = f;
+    f = e;
+    e = d + T1;
+    d = c;
+    c = b;
+    b = a;
+    a = T1 + T2;
+
+    vd[H4(0)] = f;
+    vd[H4(1)] = e;
+    vd[H4(2)] = b;
+    vd[H4(3)] = a;
+}
+
+void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
+                          uint32_t desc)
+{
+    const uint32_t esz = 4;
+    uint32_t total_elems;
+    uint32_t vta = vext_vta(desc);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
+                  ((uint32_t *)vs1) + 4 * i + 2);
+    }
+
+    /* set tail elements to 1s */
+    total_elems = vext_get_total_elems(env, desc, esz);
+    vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
+                          uint32_t desc)
+{
+    const uint32_t esz = 8;
+    uint32_t total_elems;
+    uint32_t vta = vext_vta(desc);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
+                  ((uint64_t *)vs1) + 4 * i + 2);
+    }
+
+    /* set tail elements to 1s */
+    total_elems = vext_get_total_elems(env, desc, esz);
+    vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
+                          uint32_t desc)
+{
+    const uint32_t esz = 4;
+    uint32_t total_elems;
+    uint32_t vta = vext_vta(desc);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
+                  (((uint32_t *)vs1) + 4 * i));
+    }
+
+    /* set tail elements to 1s */
+    total_elems = vext_get_total_elems(env, desc, esz);
+    vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
+                          uint32_t desc)
+{
+    uint32_t esz = 8;
+    uint32_t total_elems;
+    uint32_t vta = vext_vta(desc);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
+                  (((uint64_t *)vs1) + 4 * i));
+    }
+
+    /* set tail elements to 1s */
+    total_elems = vext_get_total_elems(env, desc, esz);
+    vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+static inline uint32_t p1(uint32_t x)
+{
+    return x ^ rol32(x, 15) ^ rol32(x, 23);
+}
+
+static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3,
+                               uint32_t m13, uint32_t m6)
+{
+    return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6;
+}
+
+void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
+                       CPURISCVState *env, uint32_t desc)
+{
+    uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t *vd = vd_vptr;
+    uint32_t *vs1 = vs1_vptr;
+    uint32_t *vs2 = vs2_vptr;
+
+    for (int i = env->vstart / 8; i < env->vl / 8; i++) {
+        uint32_t w[24];
+        for (int j = 0; j < 8; j++) {
+            w[j] = bswap32(vs1[H4((i * 8) + j)]);
+            w[j + 8] = bswap32(vs2[H4((i * 8) + j)]);
+        }
+        for (int j = 0; j < 8; j++) {
+            w[j + 16] =
+                zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]);
+        }
+        for (int j = 0; j < 8; j++) {
+            vd[(i * 8) + j] = bswap32(w[H4(j + 16)]);
+        }
+    }
+    vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z)
+{
+    return x ^ y ^ z;
+}
+
+static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z)
+{
+    return (x & y) | (x & z) | (y & z);
+}
+
+static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
+{
+    return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z);
+}
+
+static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z)
+{
+    return x ^ y ^ z;
+}
+
+static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z)
+{
+    return (x & y) | (~x & z);
+}
+
+static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
+{
+    return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z);
+}
+
+static inline uint32_t t_j(uint32_t j)
+{
+    return (j <= 15) ? 0x79cc4519 : 0x7a879d8a;
+}
+
+static inline uint32_t p_0(uint32_t x)
+{
+    return x ^ rol32(x, 9) ^ rol32(x, 17);
+}
+
+static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm)
+{
+    uint32_t x0, x1;
+    uint32_t j;
+    uint32_t ss1, ss2, tt1, tt2;
+    x0 = vs2[0] ^ vs2[4];
+    x1 = vs2[1] ^ vs2[5];
+    j = 2 * uimm;
+    ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7);
+    ss2 = ss1 ^ rol32(vs1[0], 12);
+    tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0;
+    tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0];
+    vs1[3] = vs1[2];
+    vd[3] = rol32(vs1[1], 9);
+    vs1[1] = vs1[0];
+    vd[1] = tt1;
+    vs1[7] = vs1[6];
+    vd[7] = rol32(vs1[5], 19);
+    vs1[5] = vs1[4];
+    vd[5] = p_0(tt2);
+    j = 2 * uimm + 1;
+    ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7);
+    ss2 = ss1 ^ rol32(vd[1], 12);
+    tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1;
+    tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1];
+    vd[2] = rol32(vs1[1], 9);
+    vd[0] = tt1;
+    vd[6] = rol32(vs1[5], 19);
+    vd[4] = p_0(tt2);
+}
+
+void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
+                      CPURISCVState *env, uint32_t desc)
+{
+    uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t *vd = vd_vptr;
+    uint32_t *vs2 = vs2_vptr;
+    uint32_t v1[8], v2[8], v3[8];
+
+    for (int i = env->vstart / 8; i < env->vl / 8; i++) {
+        for (int k = 0; k < 8; k++) {
+            v2[k] = bswap32(vd[H4(i * 8 + k)]);
+            v3[k] = bswap32(vs2[H4(i * 8 + k)]);
+        }
+        sm3c(v1, v2, v3, uimm);
+        for (int k = 0; k < 8; k++) {
+            vd[i * 8 + k] = bswap32(v1[H4(k)]);
+        }
+    }
+    vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
+    env->vstart = 0;
+}
+
+void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
+                      CPURISCVState *env, uint32_t desc)
+{
+    uint64_t *vd = vd_vptr;
+    uint64_t *vs1 = vs1_vptr;
+    uint64_t *vs2 = vs2_vptr;
+    uint32_t vta = vext_vta(desc);
+    uint32_t total_elems = vext_get_total_elems(env, desc, 4);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]};
+        uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
+        uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]};
+        uint64_t Z[2] = {0, 0};
+
+        uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])};
+
+        for (int j = 0; j < 128; j++) {
+            if ((S[j / 64] >> (j % 64)) & 1) {
+                Z[0] ^= H[0];
+                Z[1] ^= H[1];
+            }
+            bool reduce = ((H[1] >> 63) & 1);
+            H[1] = H[1] << 1 | H[0] >> 63;
+            H[0] = H[0] << 1;
+            if (reduce) {
+                H[0] ^= 0x87;
+            }
+        }
+
+        vd[i * 2 + 0] = brev8(Z[0]);
+        vd[i * 2 + 1] = brev8(Z[1]);
+    }
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
+    env->vstart = 0;
+}
+
+void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
+                      uint32_t desc)
+{
+    uint64_t *vd = vd_vptr;
+    uint64_t *vs2 = vs2_vptr;
+    uint32_t vta = vext_vta(desc);
+    uint32_t total_elems = vext_get_total_elems(env, desc, 4);
+
+    for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
+        uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])};
+        uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
+        uint64_t Z[2] = {0, 0};
+
+        for (int j = 0; j < 128; j++) {
+            if ((Y[j / 64] >> (j % 64)) & 1) {
+                Z[0] ^= H[0];
+                Z[1] ^= H[1];
+            }
+            bool reduce = ((H[1] >> 63) & 1);
+            H[1] = H[1] << 1 | H[0] >> 63;
+            H[0] = H[0] << 1;
+            if (reduce) {
+                H[0] ^= 0x87;
+            }
+        }
+
+        vd[i * 2 + 0] = brev8(Z[0]);
+        vd[i * 2 + 1] = brev8(Z[1]);
+    }
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
+    env->vstart = 0;
+}
+
+void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env,
+                      uint32_t desc)
+{
+    const uint32_t egs = 4;
+    uint32_t rnd = uimm5 & 0x7;
+    uint32_t group_start = env->vstart / egs;
+    uint32_t group_end = env->vl / egs;
+    uint32_t esz = sizeof(uint32_t);
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+
+    for (uint32_t i = group_start; i < group_end; ++i) {
+        uint32_t vstart = i * egs;
+        uint32_t vend = (i + 1) * egs;
+        uint32_t rk[4] = {0};
+        uint32_t tmp[8] = {0};
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
+        }
+
+        for (uint32_t j = 0; j < egs; ++j) {
+            tmp[j] = rk[j];
+        }
+
+        for (uint32_t j = 0; j < egs; ++j) {
+            uint32_t b, s;
+            b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j];
+
+            s = sm4_subword(b);
+
+            tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23));
+        }
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
+        }
+    }
+
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
+}
+
+static void do_sm4_round(uint32_t *rk, uint32_t *buf)
+{
+    const uint32_t egs = 4;
+    uint32_t s, b;
+
+    for (uint32_t j = egs; j < egs * 2; ++j) {
+        b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4];
+
+        s = sm4_subword(b);
+
+        buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^
+                               rol32(s, 24));
+    }
+}
+
+void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    const uint32_t egs = 4;
+    uint32_t group_start = env->vstart / egs;
+    uint32_t group_end = env->vl / egs;
+    uint32_t esz = sizeof(uint32_t);
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+
+    for (uint32_t i = group_start; i < group_end; ++i) {
+        uint32_t vstart = i * egs;
+        uint32_t vend = (i + 1) * egs;
+        uint32_t rk[4] = {0};
+        uint32_t tmp[8] = {0};
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
+        }
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            tmp[j - vstart] = *((uint32_t *)vd + H4(j));
+        }
+
+        do_sm4_round(rk, tmp);
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
+        }
+    }
+
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
+}
+
+void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    const uint32_t egs = 4;
+    uint32_t group_start = env->vstart / egs;
+    uint32_t group_end = env->vl / egs;
+    uint32_t esz = sizeof(uint32_t);
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+
+    for (uint32_t i = group_start; i < group_end; ++i) {
+        uint32_t vstart = i * egs;
+        uint32_t vend = (i + 1) * egs;
+        uint32_t rk[4] = {0};
+        uint32_t tmp[8] = {0};
+
+        for (uint32_t j = 0; j < egs; ++j) {
+            rk[j] = *((uint32_t *)vs2 + H4(j));
+        }
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            tmp[j - vstart] = *((uint32_t *)vd + H4(j));
+        }
+
+        do_sm4_round(rk, tmp);
+
+        for (uint32_t j = vstart; j < vend; ++j) {
+            *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
+        }
+    }
+
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
+}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index bf7e0029a1..3fb05cc3d6 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -22,10 +22,12 @@
 #include "cpu.h"
 #include "exec/memop.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "internals.h"
+#include "vector_internals.h"
 #include <math.h>
 
 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
@@ -72,68 +74,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
     return vl;
 }
 
-/*
- * Note that vector data is stored in host-endian 64-bit chunks,
- * so addressing units smaller than that needs a host-endian fixup.
- */
-#if HOST_BIG_ENDIAN
-#define H1(x)   ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
-#define H8(x)   ((x))
-#else
-#define H1(x)   (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
-#define H8(x)   (x)
-#endif
-
-static inline uint32_t vext_nf(uint32_t desc)
-{
-    return FIELD_EX32(simd_data(desc), VDATA, NF);
-}
-
-static inline uint32_t vext_vm(uint32_t desc)
-{
-    return FIELD_EX32(simd_data(desc), VDATA, VM);
-}
-
-/*
- * Encode LMUL to lmul as following:
- *     LMUL    vlmul    lmul
- *      1       000       0
- *      2       001       1
- *      4       010       2
- *      8       011       3
- *      -       100       -
- *     1/8      101      -3
- *     1/4      110      -2
- *     1/2      111      -1
- */
-static inline int32_t vext_lmul(uint32_t desc)
-{
-    return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
-}
-
-static inline uint32_t vext_vta(uint32_t desc)
-{
-    return FIELD_EX32(simd_data(desc), VDATA, VTA);
-}
-
-static inline uint32_t vext_vma(uint32_t desc)
-{
-    return FIELD_EX32(simd_data(desc), VDATA, VMA);
-}
-
-static inline uint32_t vext_vta_all_1s(uint32_t desc)
-{
-    return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
-}
-
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -152,21 +92,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
     return scale < 0 ? vlenb >> -scale : vlenb << scale;
 }
 
-/*
- * Get number of total elements, including prestart, body and tail elements.
- * Note that when LMUL < 1, the tail includes the elements past VLMAX that
- * are held in the same vector register.
- */
-static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
-                                            uint32_t esz)
-{
-    uint32_t vlenb = simd_maxsz(desc);
-    uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
-    int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
-                  ctzl(esz) - ctzl(sew) + vext_lmul(desc);
-    return (vlenb << emul) / esz;
-}
-
 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
 {
     return (addr & ~env->cur_pmmask) | env->cur_pmbase;
@@ -199,20 +124,6 @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
     }
 }
 
-/* set agnostic elements to 1s */
-static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
-                              uint32_t tot)
-{
-    if (is_agnostic == 0) {
-        /* policy undisturbed */
-        return;
-    }
-    if (tot - cnt == 0) {
-        return;
-    }
-    memset(base + cnt, -1, tot - cnt);
-}
-
 static inline void vext_set_elem_mask(void *v0, int index,
                                       uint8_t value)
 {
@@ -222,18 +133,6 @@ static inline void vext_set_elem_mask(void *v0, int index,
     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
 }
 
-/*
- * Earlier designs (pre-0.9) had a varying number of bits
- * per mask value (MLEN). In the 0.9 design, MLEN=1.
- * (Section 4.5)
- */
-static inline int vext_elem_mask(void *v0, int index)
-{
-    int idx = index / 64;
-    int pos = index  % 64;
-    return (((uint64_t *)v0)[idx] >> pos) & 1;
-}
-
 /* elements operations for load and store */
 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
                                uint32_t idx, void *vd, uintptr_t retaddr);
@@ -583,7 +482,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
                                          cpu_mmu_index(env, false));
                 if (host) {
 #ifdef CONFIG_USER_ONLY
-                    if (page_check_range(addr, offset, PAGE_READ)) {
+                    if (!page_check_range(addr, offset, PAGE_READ)) {
                         vl = i;
                         goto ProbeSuccess;
                     }
@@ -728,25 +627,15 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
  * Vector Integer Arithmetic Instructions
  */
 
-/* expand macro args before macro */
-#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
-
 /* (TD, T1, T2, TX1, TX2) */
 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
-#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
-#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
-#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
-#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
-#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
-#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
-#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
@@ -763,16 +652,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 
-/* operation of two vector elements */
-typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
-
-#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
-static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
-{                                                               \
-    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
-    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
-}
 #define DO_SUB(N, M) (N - M)
 #define DO_RSUB(N, M) (M - N)
 
@@ -785,40 +664,6 @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
 
-static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
-                       CPURISCVState *env, uint32_t desc,
-                       opivv2_fn *fn, uint32_t esz)
-{
-    uint32_t vm = vext_vm(desc);
-    uint32_t vl = env->vl;
-    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
-    uint32_t vta = vext_vta(desc);
-    uint32_t vma = vext_vma(desc);
-    uint32_t i;
-
-    for (i = env->vstart; i < vl; i++) {
-        if (!vm && !vext_elem_mask(v0, i)) {
-            /* set masked-off elements to 1s */
-            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
-            continue;
-        }
-        fn(vd, vs1, vs2, i);
-    }
-    env->vstart = 0;
-    /* set tail elements to 1s */
-    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
-}
-
-/* generate the helpers for OPIVV */
-#define GEN_VEXT_VV(NAME, ESZ)                            \
-void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
-                  void *vs2, CPURISCVState *env,          \
-                  uint32_t desc)                          \
-{                                                         \
-    do_vext_vv(vd, v0, vs1, vs2, env, desc,               \
-               do_##NAME, ESZ);                           \
-}
-
 GEN_VEXT_VV(vadd_vv_b, 1)
 GEN_VEXT_VV(vadd_vv_h, 2)
 GEN_VEXT_VV(vadd_vv_w, 4)
@@ -828,18 +673,6 @@ GEN_VEXT_VV(vsub_vv_h, 2)
 GEN_VEXT_VV(vsub_vv_w, 4)
 GEN_VEXT_VV(vsub_vv_d, 8)
 
-typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
-
-/*
- * (T1)s1 gives the real operator type.
- * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
- */
-#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
-static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
-{                                                                   \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
-    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
-}
 
 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
@@ -854,40 +687,6 @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
 
-static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
-                       CPURISCVState *env, uint32_t desc,
-                       opivx2_fn fn, uint32_t esz)
-{
-    uint32_t vm = vext_vm(desc);
-    uint32_t vl = env->vl;
-    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
-    uint32_t vta = vext_vta(desc);
-    uint32_t vma = vext_vma(desc);
-    uint32_t i;
-
-    for (i = env->vstart; i < vl; i++) {
-        if (!vm && !vext_elem_mask(v0, i)) {
-            /* set masked-off elements to 1s */
-            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
-            continue;
-        }
-        fn(vd, s1, vs2, i);
-    }
-    env->vstart = 0;
-    /* set tail elements to 1s */
-    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
-}
-
-/* generate the helpers for OPIVX */
-#define GEN_VEXT_VX(NAME, ESZ)                            \
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
-                  void *vs2, CPURISCVState *env,          \
-                  uint32_t desc)                          \
-{                                                         \
-    do_vext_vx(vd, v0, s1, vs2, env, desc,                \
-               do_##NAME, ESZ);                           \
-}
-
 GEN_VEXT_VX(vadd_vx_b, 1)
 GEN_VEXT_VX(vadd_vx_h, 2)
 GEN_VEXT_VX(vadd_vx_w, 4)
@@ -3636,11 +3435,6 @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4)
 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
 
 /* Vector Floating-Point Square-Root Instruction */
-/* (TD, T2, TX2) */
-#define OP_UU_H uint16_t, uint16_t, uint16_t
-#define OP_UU_W uint32_t, uint32_t, uint32_t
-#define OP_UU_D uint64_t, uint64_t, uint64_t
-
 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
 static void do_##NAME(void *vd, void *vs2, int i,      \
                       CPURISCVState *env)              \
@@ -4337,40 +4131,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
 
 /* Vector Floating-Point Classify Instruction */
-#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
-static void do_##NAME(void *vd, void *vs2, int i)      \
-{                                                      \
-    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
-    *((TD *)vd + HD(i)) = OP(s2);                      \
-}
-
-#define GEN_VEXT_V(NAME, ESZ)                          \
-void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
-                  CPURISCVState *env, uint32_t desc)   \
-{                                                      \
-    uint32_t vm = vext_vm(desc);                       \
-    uint32_t vl = env->vl;                             \
-    uint32_t total_elems =                             \
-        vext_get_total_elems(env, desc, ESZ);          \
-    uint32_t vta = vext_vta(desc);                     \
-    uint32_t vma = vext_vma(desc);                     \
-    uint32_t i;                                        \
-                                                       \
-    for (i = env->vstart; i < vl; i++) {               \
-        if (!vm && !vext_elem_mask(v0, i)) {           \
-            /* set masked-off elements to 1s */        \
-            vext_set_elems_1s(vd, vma, i * ESZ,        \
-                              (i + 1) * ESZ);          \
-            continue;                                  \
-        }                                              \
-        do_##NAME(vd, vs2, i);                         \
-    }                                                  \
-    env->vstart = 0;                                   \
-    /* set tail elements to 1s */                      \
-    vext_set_elems_1s(vd, vta, vl * ESZ,               \
-                      total_elems * ESZ);              \
-}
-
 target_ulong fclass_h(uint64_t frs1)
 {
     float16 f = frs1;
diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
new file mode 100644
index 0000000000..9cf5c17cde
--- /dev/null
+++ b/target/riscv/vector_internals.c
@@ -0,0 +1,81 @@
+/*
+ * RISC-V Vector Extension Internals
+ *
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "vector_internals.h"
+
+/* set agnostic elements to 1s */
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
+                       uint32_t tot)
+{
+    if (is_agnostic == 0) {
+        /* policy undisturbed */
+        return;
+    }
+    if (tot - cnt == 0) {
+        return ;
+    }
+    memset(base + cnt, -1, tot - cnt);
+}
+
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
+                CPURISCVState *env, uint32_t desc,
+                opivv2_fn *fn, uint32_t esz)
+{
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
+    uint32_t i;
+
+    for (i = env->vstart; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
+            continue;
+        }
+        fn(vd, vs1, vs2, i);
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
+}
+
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
+                CPURISCVState *env, uint32_t desc,
+                opivx2_fn fn, uint32_t esz)
+{
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
+    uint32_t i;
+
+    for (i = env->vstart; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
+            continue;
+        }
+        fn(vd, s1, vs2, i);
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
+}
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
new file mode 100644
index 0000000000..8133111e5f
--- /dev/null
+++ b/target/riscv/vector_internals.h
@@ -0,0 +1,228 @@
+/*
+ * RISC-V Vector Extension Internals
+ *
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_RISCV_VECTOR_INTERNALS_H
+#define TARGET_RISCV_VECTOR_INTERNALS_H
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "cpu.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "internals.h"
+
+static inline uint32_t vext_nf(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, NF);
+}
+
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ */
+#if HOST_BIG_ENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#define H8(x)   ((x))
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#define H8(x)   (x)
+#endif
+
+/*
+ * Encode LMUL to lmul as following:
+ *     LMUL    vlmul    lmul
+ *      1       000       0
+ *      2       001       1
+ *      4       010       2
+ *      8       011       3
+ *      -       100       -
+ *     1/8      101      -3
+ *     1/4      110      -2
+ *     1/2      111      -1
+ */
+static inline int32_t vext_lmul(uint32_t desc)
+{
+    return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
+}
+
+static inline uint32_t vext_vm(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VM);
+}
+
+static inline uint32_t vext_vma(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VMA);
+}
+
+static inline uint32_t vext_vta(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VTA);
+}
+
+static inline uint32_t vext_vta_all_1s(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
+}
+
+/*
+ * Earlier designs (pre-0.9) had a varying number of bits
+ * per mask value (MLEN). In the 0.9 design, MLEN=1.
+ * (Section 4.5)
+ */
+static inline int vext_elem_mask(void *v0, int index)
+{
+    int idx = index / 64;
+    int pos = index  % 64;
+    return (((uint64_t *)v0)[idx] >> pos) & 1;
+}
+
+/*
+ * Get number of total elements, including prestart, body and tail elements.
+ * Note that when LMUL < 1, the tail includes the elements past VLMAX that
+ * are held in the same vector register.
+ */
+static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
+                                            uint32_t esz)
+{
+    uint32_t vlenb = simd_maxsz(desc);
+    uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
+    int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
+                  ctzl(esz) - ctzl(sew) + vext_lmul(desc);
+    return (vlenb << emul) / esz;
+}
+
+/* set agnostic elements to 1s */
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
+                       uint32_t tot);
+
+/* expand macro args before macro */
+#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
+
+/* (TD, T2, TX2) */
+#define OP_UU_B uint8_t, uint8_t, uint8_t
+#define OP_UU_H uint16_t, uint16_t, uint16_t
+#define OP_UU_W uint32_t, uint32_t, uint32_t
+#define OP_UU_D uint64_t, uint64_t, uint64_t
+
+/* (TD, T1, T2, TX1, TX2) */
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
+
+#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
+static void do_##NAME(void *vd, void *vs2, int i)      \
+{                                                      \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
+    *((TD *)vd + HD(i)) = OP(s2);                      \
+}
+
+#define GEN_VEXT_V(NAME, ESZ)                          \
+void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
+                  CPURISCVState *env, uint32_t desc)   \
+{                                                      \
+    uint32_t vm = vext_vm(desc);                       \
+    uint32_t vl = env->vl;                             \
+    uint32_t total_elems =                             \
+        vext_get_total_elems(env, desc, ESZ);          \
+    uint32_t vta = vext_vta(desc);                     \
+    uint32_t vma = vext_vma(desc);                     \
+    uint32_t i;                                        \
+                                                       \
+    for (i = env->vstart; i < vl; i++) {               \
+        if (!vm && !vext_elem_mask(v0, i)) {           \
+            /* set masked-off elements to 1s */        \
+            vext_set_elems_1s(vd, vma, i * ESZ,        \
+                              (i + 1) * ESZ);          \
+            continue;                                  \
+        }                                              \
+        do_##NAME(vd, vs2, i);                         \
+    }                                                  \
+    env->vstart = 0;                                   \
+    /* set tail elements to 1s */                      \
+    vext_set_elems_1s(vd, vta, vl * ESZ,               \
+                      total_elems * ESZ);              \
+}
+
+/* operation of two vector elements */
+typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
+
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
+{                                                               \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
+    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
+}
+
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
+                CPURISCVState *env, uint32_t desc,
+                opivv2_fn *fn, uint32_t esz);
+
+/* generate the helpers for OPIVV */
+#define GEN_VEXT_VV(NAME, ESZ)                            \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    do_vext_vv(vd, v0, vs1, vs2, env, desc,               \
+               do_##NAME, ESZ);                           \
+}
+
+typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
+
+/*
+ * (T1)s1 gives the real operator type.
+ * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
+}
+
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
+                CPURISCVState *env, uint32_t desc,
+                opivx2_fn fn, uint32_t esz);
+
+/* generate the helpers for OPIVX */
+#define GEN_VEXT_VX(NAME, ESZ)                            \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    do_vext_vx(vd, v0, s1, vs2, env, desc,                \
+               do_##NAME, ESZ);                           \
+}
+
+/* Three of the widening shortening macros: */
+/* (TD, T1, T2, TX1, TX2) */
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
+
+#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index eb5b65b7d3..304029e57c 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -36,6 +36,8 @@
 /* The z/Architecture has a strong memory model with some store-after-load re-ordering */
 #define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
+#define TARGET_HAS_PRECISE_SMC
+
 #define TARGET_INSN_START_EXTRA_WORDS 2
 
 #define MMU_USER_IDX 0
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index 87463f064d..a9bd68a2e1 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -43,6 +43,7 @@ typedef enum {
     S390_FEAT_TYPE_KDSA,
     S390_FEAT_TYPE_SORTL,
     S390_FEAT_TYPE_DFLTCC,
+    S390_FEAT_TYPE_UV_FEAT_GUEST,
 } S390FeatType;
 
 /* Definition of a CPU feature */
diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc
index e3cfe63735..e68da9b8ff 100644
--- a/target/s390x/cpu_features_def.h.inc
+++ b/target/s390x/cpu_features_def.h.inc
@@ -379,3 +379,7 @@ DEF_FEAT(DEFLATE_GHDT, "dfltcc-gdht", DFLTCC, 1, "DFLTCC GDHT")
 DEF_FEAT(DEFLATE_CMPR, "dfltcc-cmpr", DFLTCC, 2, "DFLTCC CMPR")
 DEF_FEAT(DEFLATE_XPND, "dfltcc-xpnd", DFLTCC, 4, "DFLTCC XPND")
 DEF_FEAT(DEFLATE_F0, "dfltcc-f0", DFLTCC, 192, "DFLTCC format 0 parameter-block")
+
+/* Features exposed via the UV-CALL instruction */
+DEF_FEAT(UV_FEAT_AP, "appv", UV_FEAT_GUEST, 4, "AP instructions installed for secure guests")
+DEF_FEAT(UV_FEAT_AP_INTR, "appvi", UV_FEAT_GUEST, 5, "AP instructions interruption support for secure guests")
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 91ce896491..98f14c09c2 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -483,6 +483,8 @@ static void check_consistency(const S390CPUModel *model)
         { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB },
         { S390_FEAT_NNPA, S390_FEAT_VECTOR },
         { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING },
+        { S390_FEAT_UV_FEAT_AP, S390_FEAT_AP },
+        { S390_FEAT_UV_FEAT_AP_INTR, S390_FEAT_UV_FEAT_AP },
     };
     int i;
 
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 1e3b7c0dc9..2b2bfc3736 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -576,6 +576,8 @@ static uint16_t full_GEN16_GA1[] = {
     S390_FEAT_RDP,
     S390_FEAT_PAI,
     S390_FEAT_PAIE,
+    S390_FEAT_UV_FEAT_AP,
+    S390_FEAT_UV_FEAT_AP_INTR,
 };
 
 
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 852fbd0df7..bc5c56a305 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -240,7 +240,7 @@ static void kvm_s390_enable_cmma(void)
     trace_kvm_enable_cmma(rc);
 }
 
-static void kvm_s390_set_attr(uint64_t attr)
+static void kvm_s390_set_crypto_attr(uint64_t attr)
 {
     struct kvm_device_attr attribute = {
         .group = KVM_S390_VM_CRYPTO,
@@ -265,7 +265,7 @@ static void kvm_s390_init_aes_kw(void)
     }
 
     if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) {
-            kvm_s390_set_attr(attr);
+            kvm_s390_set_crypto_attr(attr);
     }
 }
 
@@ -279,7 +279,7 @@ static void kvm_s390_init_dea_kw(void)
     }
 
     if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) {
-            kvm_s390_set_attr(attr);
+            kvm_s390_set_crypto_attr(attr);
     }
 }
 
@@ -2288,6 +2288,53 @@ static int configure_cpu_subfunc(const S390FeatBitmap features)
     return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
 }
 
+static bool ap_available(void)
+{
+    return kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO,
+                             KVM_S390_VM_CRYPTO_ENABLE_APIE);
+}
+
+static bool ap_enabled(const S390FeatBitmap features)
+{
+    return test_bit(S390_FEAT_AP, features);
+}
+
+static bool uv_feat_supported(void)
+{
+    return kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST);
+}
+
+static int query_uv_feat_guest(S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_uv_feat prop = {};
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST,
+        .addr = (uint64_t) &prop,
+    };
+    int rc;
+
+    /* AP support check is currently the only user of the UV feature test */
+    if (!(uv_feat_supported() && ap_available())) {
+        return 0;
+    }
+
+    rc = kvm_vm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr);
+    if (rc) {
+        return  rc;
+    }
+
+    if (prop.ap) {
+        set_bit(S390_FEAT_UV_FEAT_AP, features);
+    }
+    if (prop.ap_intr) {
+        set_bit(S390_FEAT_UV_FEAT_AP_INTR, features);
+    }
+
+    return 0;
+}
+
 static int kvm_to_feat[][2] = {
     { KVM_S390_VM_CPU_FEAT_ESOP, S390_FEAT_ESOP },
     { KVM_S390_VM_CPU_FEAT_SIEF2, S390_FEAT_SIE_F2 },
@@ -2467,8 +2514,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp)
         return;
     }
     /* for now, we can only provide the AP feature with HW support */
-    if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO,
-        KVM_S390_VM_CRYPTO_ENABLE_APIE)) {
+    if (ap_available()) {
         set_bit(S390_FEAT_AP, model->features);
     }
 
@@ -2483,18 +2529,45 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp)
         set_bit(S390_FEAT_DIAG_318, model->features);
     }
 
+    /* Test for Ultravisor features that influence secure guest behavior */
+    query_uv_feat_guest(model->features);
+
     /* strip of features that are not part of the maximum model */
     bitmap_and(model->features, model->features, model->def->full_feat,
                S390_FEAT_MAX);
 }
 
+static int configure_uv_feat_guest(const S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_uv_feat uv_feat = {};
+    struct kvm_device_attr attribute = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST,
+        .addr = (__u64) &uv_feat,
+    };
+
+    /* AP support check is currently the only user of the UV feature test */
+    if (!(uv_feat_supported() && ap_enabled(features))) {
+        return 0;
+    }
+
+    if (test_bit(S390_FEAT_UV_FEAT_AP, features)) {
+        uv_feat.ap = 1;
+    }
+    if (test_bit(S390_FEAT_UV_FEAT_AP_INTR, features)) {
+        uv_feat.ap_intr = 1;
+    }
+
+    return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attribute);
+}
+
 static void kvm_s390_configure_apie(bool interpret)
 {
     uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE :
                                 KVM_S390_VM_CRYPTO_DISABLE_APIE;
 
     if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) {
-        kvm_s390_set_attr(attr);
+        kvm_s390_set_crypto_attr(attr);
     }
 }
 
@@ -2548,9 +2621,16 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
         kvm_s390_enable_cmma();
     }
 
-    if (test_bit(S390_FEAT_AP, model->features)) {
+    if (ap_enabled(model->features)) {
         kvm_s390_configure_apie(true);
     }
+
+    /* configure UV-features for the guest indicated via query / test_bit */
+    rc = configure_uv_feat_guest(model->features);
+    if (rc) {
+        error_setg(errp, "KVM: Error configuring CPU UV features %d", rc);
+        return;
+    }
 }
 
 void kvm_s390_restart_interrupt(S390CPU *cpu)
diff --git a/target/s390x/kvm/trace-events b/target/s390x/kvm/trace-events
index 818f1a37a1..cdf2c4f8f2 100644
--- a/target/s390x/kvm/trace-events
+++ b/target/s390x/kvm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # kvm.c
 kvm_enable_cmma(int rc) "CMMA: enabling with result code %d"
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 762b277884..93aabd236f 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -13,7 +13,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
 #include "s390x-internal.h"
 #include "tcg_s390x.h"
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
index b7116d0577..b875bf14e5 100644
--- a/target/s390x/tcg/excp_helper.c
+++ b/target/s390x/tcg/excp_helper.c
@@ -23,7 +23,6 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "s390x-internal.h"
 #include "tcg_s390x.h"
 #ifndef CONFIG_USER_ONLY
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index c329b31261..d8bd5748fa 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -23,7 +23,6 @@
 #include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 
diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c
index 576157b1f3..e85658ce22 100644
--- a/target/s390x/tcg/misc_helper.c
+++ b/target/s390x/tcg/misc_helper.c
@@ -20,7 +20,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "s390x-internal.h"
 #include "qemu/host-utils.h"
diff --git a/target/s390x/tcg/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c
index 53ab5c5eb3..b18d8a6d16 100644
--- a/target/s390x/tcg/vec_int_helper.c
+++ b/target/s390x/tcg/vec_int_helper.c
@@ -14,19 +14,13 @@
 #include "vec.h"
 #include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"
+#include "crypto/clmul.h"
 
 static bool s390_vec_is_zero(const S390Vector *v)
 {
     return !v->doubleword[0] && !v->doubleword[1];
 }
 
-static void s390_vec_xor(S390Vector *res, const S390Vector *a,
-                         const S390Vector *b)
-{
-    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
-    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
-}
-
 static void s390_vec_and(S390Vector *res, const S390Vector *a,
                          const S390Vector *b)
 {
@@ -164,117 +158,105 @@ DEF_VCTZ(8)
 DEF_VCTZ(16)
 
 /* like binary multiplication, but XOR instead of addition */
-#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
-static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
-                                             uint##TBITS##_t b)                \
-{                                                                              \
-    uint##TBITS##_t res = 0;                                                   \
-                                                                               \
-    while (b) {                                                                \
-        if (b & 0x1) {                                                         \
-            res = res ^ a;                                                     \
-        }                                                                      \
-        a = a << 1;                                                            \
-        b = b >> 1;                                                            \
-    }                                                                          \
-    return res;                                                                \
-}
-DEF_GALOIS_MULTIPLY(8, 16)
-DEF_GALOIS_MULTIPLY(16, 32)
-DEF_GALOIS_MULTIPLY(32, 64)
 
-static S390Vector galois_multiply64(uint64_t a, uint64_t b)
+/*
+ * There is no carry across the two doublewords, so their order does
+ * not matter.  Nor is there partial overlap between registers.
+ */
+static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
 {
-    S390Vector res = {};
-    S390Vector va = {
-        .doubleword[1] = a,
-    };
-    S390Vector vb = {
-        .doubleword[1] = b,
-    };
-
-    while (!s390_vec_is_zero(&vb)) {
-        if (vb.doubleword[1] & 0x1) {
-            s390_vec_xor(&res, &res, &va);
-        }
-        s390_vec_shl(&va, &va, 1);
-        s390_vec_shr(&vb, &vb, 1);
-    }
-    return res;
+    return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
 }
 
-#define DEF_VGFM(BITS, TBITS)                                                  \
-void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
-                             uint32_t desc)                                    \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / TBITS); i++) {                                      \
-        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
-        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
-        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
-                                                                               \
-        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
-        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
-        d = d ^ galois_multiply32(a, b);                                       \
-        s390_vec_write_element##TBITS(v1, i, d);                               \
-    }                                                                          \
+void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+
+    q1[0] = do_gfma8(q2[0], q3[0], 0);
+    q1[1] = do_gfma8(q2[1], q3[1], 0);
+}
+
+void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t desc)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
+}
+
+static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
+{
+    return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
+}
+
+void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+
+    q1[0] = do_gfma16(q2[0], q3[0], 0);
+    q1[1] = do_gfma16(q2[1], q3[1], 0);
+}
+
+void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
+}
+
+static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
+{
+    return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
+}
+
+void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+
+    q1[0] = do_gfma32(q2[0], q3[0], 0);
+    q1[1] = do_gfma32(q2[1], q3[1], 0);
+}
+
+void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
 }
-DEF_VGFM(8, 16)
-DEF_VGFM(16, 32)
-DEF_VGFM(32, 64)
 
 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
                          uint32_t desc)
 {
-    S390Vector tmp1, tmp2;
-    uint64_t a, b;
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+    Int128 r;
 
-    a = s390_vec_read_element64(v2, 0);
-    b = s390_vec_read_element64(v3, 0);
-    tmp1 = galois_multiply64(a, b);
-    a = s390_vec_read_element64(v2, 1);
-    b = s390_vec_read_element64(v3, 1);
-    tmp2 = galois_multiply64(a, b);
-    s390_vec_xor(v1, &tmp1, &tmp2);
+    r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
+    q1[0] = int128_gethi(r);
+    q1[1] = int128_getlo(r);
 }
 
-#define DEF_VGFMA(BITS, TBITS)                                                 \
-void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
-                              const void *v4, uint32_t desc)                   \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / TBITS); i++) {                                      \
-        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
-        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
-        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
-                                                                               \
-        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
-        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
-        d = d ^ galois_multiply32(a, b);                                       \
-        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
-        s390_vec_write_element##TBITS(v1, i, d);                               \
-    }                                                                          \
-}
-DEF_VGFMA(8, 16)
-DEF_VGFMA(16, 32)
-DEF_VGFMA(32, 64)
-
 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
                           const void *v4, uint32_t desc)
 {
-    S390Vector tmp1, tmp2;
-    uint64_t a, b;
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+    Int128 r;
 
-    a = s390_vec_read_element64(v2, 0);
-    b = s390_vec_read_element64(v3, 0);
-    tmp1 = galois_multiply64(a, b);
-    a = s390_vec_read_element64(v2, 1);
-    b = s390_vec_read_element64(v3, 1);
-    tmp2 = galois_multiply64(a, b);
-    s390_vec_xor(&tmp1, &tmp1, &tmp2);
-    s390_vec_xor(v1, &tmp1, v4);
+    r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
+    q1[0] = q4[0] ^ int128_gethi(r);
+    q1[1] = q4[1] ^ int128_getlo(r);
 }
 
 #define DEF_VMAL(BITS)                                                         \
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 49c87d7a01..c1e590feb3 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -22,7 +22,6 @@
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
-#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index fa80a91161..3bf0ab8135 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -25,7 +25,6 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
-#include "exec/cpu_ldst.h"
 
 #include "exec/helper-gen.h"
 
diff --git a/target/xtensa/dbg_helper.c b/target/xtensa/dbg_helper.c
index ce2a820c60..3e0c9e8e8b 100644
--- a/target/xtensa/dbg_helper.c
+++ b/target/xtensa/dbg_helper.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c
index 43f6a862de..91354884f7 100644
--- a/target/xtensa/exc_helper.c
+++ b/target/xtensa/exc_helper.c
@@ -31,6 +31,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
+#include "qemu/atomic.h"
 #include "exec/exec-all.h"
 
 void HELPER(exception)(CPUXtensaState *env, uint32_t excp)
diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c
index d2a10cc797..381e83ded8 100644
--- a/target/xtensa/fpu_helper.c
+++ b/target/xtensa/fpu_helper.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
index fa66e8e867..12552a3347 100644
--- a/target/xtensa/mmu_helper.c
+++ b/target/xtensa/mmu_helper.c
@@ -27,14 +27,12 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "qemu/qemu-print.h"
 #include "qemu/units.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 
 #define XTENSA_MPU_SEGMENT_MASK 0x0000001f
 #define XTENSA_MPU_ACC_RIGHTS_MASK 0x00000f00
diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c
index 1af7becc54..7bb8cd6726 100644
--- a/target/xtensa/op_helper.c
+++ b/target/xtensa/op_helper.c
@@ -26,12 +26,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "qemu/atomic.h"
 #include "qemu/timer.h"
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/xtensa/win_helper.c b/target/xtensa/win_helper.c
index 5a1555360a..ec9ff44db0 100644
--- a/target/xtensa/win_helper.c
+++ b/target/xtensa/win_helper.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 0931a69448..06ea3c7652 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -272,7 +272,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
     }
 }
 
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -602,6 +602,10 @@ typedef enum {
     DMB_ISH         = 0xd50338bf,
     DMB_LD          = 0x00000100,
     DMB_ST          = 0x00000200,
+
+    BTI_C           = 0xd503245f,
+    BTI_J           = 0xd503249f,
+    BTI_JC          = 0xd50324df,
 } AArch64Insn;
 
 static inline uint32_t tcg_in32(TCGContext *s)
@@ -843,6 +847,17 @@ static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
               | rn << 5 | (rd & 0x1f));
 }
 
+static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
+{
+    /*
+     * While BTI insns are nops on hosts without FEAT_BTI,
+     * there is no point in emitting them in that case either.
+     */
+    if (cpuinfo & CPUINFO_BTI) {
+        tcg_out32(s, insn);
+    }
+}
+
 /* Register to register move using ORR (shifted register with no shift). */
 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 {
@@ -1351,18 +1366,6 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
     tcg_out_insn(s, 3206, B, offset);
 }
 
-static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
-{
-    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
-    if (offset == sextract64(offset, 0, 26)) {
-        tcg_out_insn(s, 3206, B, offset);
-    } else {
-        /* Choose X9 as a call-clobbered non-LR temporary. */
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
-        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
-    }
-}
-
 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
 {
     ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
@@ -1947,12 +1950,28 @@ static const tcg_insn_unit *tb_ret_addr;
 
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
 {
+    const tcg_insn_unit *target;
+    ptrdiff_t offset;
+
     /* Reuse the zeroing that exists for goto_ptr.  */
     if (a0 == 0) {
-        tcg_out_goto_long(s, tcg_code_gen_epilogue);
+        target = tcg_code_gen_epilogue;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-        tcg_out_goto_long(s, tb_ret_addr);
+        target = tb_ret_addr;
+    }
+
+    offset = tcg_pcrel_diff(s, target) >> 2;
+    if (offset == sextract64(offset, 0, 26)) {
+        tcg_out_insn(s, 3206, B, offset);
+    } else {
+        /*
+         * Only x16/x17 generate BTI type Jump (2),
+         * other registers generate BTI type Jump|Call (3).
+         */
+        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
+        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
     }
 }
 
@@ -1970,6 +1989,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
     tcg_out32(s, I3206_B);
     tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
     set_jmp_reset_offset(s, which);
+    tcg_out_bti(s, BTI_J);
 }
 
 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
@@ -3074,6 +3094,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
     TCGReg r;
 
+    tcg_out_bti(s, BTI_C);
+
     /* Push (FP, LR) and allocate space for all saved registers.  */
     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
@@ -3114,10 +3136,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
      * and fall through to the rest of the epilogue.
      */
     tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
 
     /* TB epilogue */
     tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
 
     /* Remove TCG locals stack space.  */
     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@@ -3135,6 +3159,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    tcg_out_bti(s, BTI_J);
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     int i;
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index acb5f23b54..b1d56362a7 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -509,7 +509,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
  * mov operand2:     values represented with x << (2 * y), x < 0x100
  * add, sub, eor...: ditto
  */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2962,6 +2962,11 @@ static void tcg_out_epilogue(TCGContext *s)
                   (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 typedef struct {
     DebugFrameHeader h;
     uint8_t fde_def_cfa[4];
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 0c3d1e4cef..4e47151241 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -198,7 +198,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -4191,6 +4191,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     memset(p, 0x90, count);
diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
index b5bb0c5e73..ee3b483b02 100644
--- a/tcg/loongarch64/tcg-insn-defs.c.inc
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -4,7 +4,7 @@
  *
  * This file is auto-generated by genqemutcgdefs from
  * https://github.com/loongson-community/loongarch-opcodes,
- * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
+ * from commit 8027da9a8157a8b47fc48ff1def292e09c5668bd.
  * DO NOT EDIT.
  */
 
@@ -74,6 +74,60 @@ typedef enum {
     OPC_ANDI = 0x03400000,
     OPC_ORI = 0x03800000,
     OPC_XORI = 0x03c00000,
+    OPC_VFMADD_S = 0x09100000,
+    OPC_VFMADD_D = 0x09200000,
+    OPC_VFMSUB_S = 0x09500000,
+    OPC_VFMSUB_D = 0x09600000,
+    OPC_VFNMADD_S = 0x09900000,
+    OPC_VFNMADD_D = 0x09a00000,
+    OPC_VFNMSUB_S = 0x09d00000,
+    OPC_VFNMSUB_D = 0x09e00000,
+    OPC_VFCMP_CAF_S = 0x0c500000,
+    OPC_VFCMP_SAF_S = 0x0c508000,
+    OPC_VFCMP_CLT_S = 0x0c510000,
+    OPC_VFCMP_SLT_S = 0x0c518000,
+    OPC_VFCMP_CEQ_S = 0x0c520000,
+    OPC_VFCMP_SEQ_S = 0x0c528000,
+    OPC_VFCMP_CLE_S = 0x0c530000,
+    OPC_VFCMP_SLE_S = 0x0c538000,
+    OPC_VFCMP_CUN_S = 0x0c540000,
+    OPC_VFCMP_SUN_S = 0x0c548000,
+    OPC_VFCMP_CULT_S = 0x0c550000,
+    OPC_VFCMP_SULT_S = 0x0c558000,
+    OPC_VFCMP_CUEQ_S = 0x0c560000,
+    OPC_VFCMP_SUEQ_S = 0x0c568000,
+    OPC_VFCMP_CULE_S = 0x0c570000,
+    OPC_VFCMP_SULE_S = 0x0c578000,
+    OPC_VFCMP_CNE_S = 0x0c580000,
+    OPC_VFCMP_SNE_S = 0x0c588000,
+    OPC_VFCMP_COR_S = 0x0c5a0000,
+    OPC_VFCMP_SOR_S = 0x0c5a8000,
+    OPC_VFCMP_CUNE_S = 0x0c5c0000,
+    OPC_VFCMP_SUNE_S = 0x0c5c8000,
+    OPC_VFCMP_CAF_D = 0x0c600000,
+    OPC_VFCMP_SAF_D = 0x0c608000,
+    OPC_VFCMP_CLT_D = 0x0c610000,
+    OPC_VFCMP_SLT_D = 0x0c618000,
+    OPC_VFCMP_CEQ_D = 0x0c620000,
+    OPC_VFCMP_SEQ_D = 0x0c628000,
+    OPC_VFCMP_CLE_D = 0x0c630000,
+    OPC_VFCMP_SLE_D = 0x0c638000,
+    OPC_VFCMP_CUN_D = 0x0c640000,
+    OPC_VFCMP_SUN_D = 0x0c648000,
+    OPC_VFCMP_CULT_D = 0x0c650000,
+    OPC_VFCMP_SULT_D = 0x0c658000,
+    OPC_VFCMP_CUEQ_D = 0x0c660000,
+    OPC_VFCMP_SUEQ_D = 0x0c668000,
+    OPC_VFCMP_CULE_D = 0x0c670000,
+    OPC_VFCMP_SULE_D = 0x0c678000,
+    OPC_VFCMP_CNE_D = 0x0c680000,
+    OPC_VFCMP_SNE_D = 0x0c688000,
+    OPC_VFCMP_COR_D = 0x0c6a0000,
+    OPC_VFCMP_SOR_D = 0x0c6a8000,
+    OPC_VFCMP_CUNE_D = 0x0c6c0000,
+    OPC_VFCMP_SUNE_D = 0x0c6c8000,
+    OPC_VBITSEL_V = 0x0d100000,
+    OPC_VSHUF_B = 0x0d500000,
     OPC_ADDU16I_D = 0x10000000,
     OPC_LU12I_W = 0x14000000,
     OPC_CU32I_D = 0x16000000,
@@ -92,6 +146,16 @@ typedef enum {
     OPC_LD_BU = 0x2a000000,
     OPC_LD_HU = 0x2a400000,
     OPC_LD_WU = 0x2a800000,
+    OPC_VLD = 0x2c000000,
+    OPC_VST = 0x2c400000,
+    OPC_VLDREPL_D = 0x30100000,
+    OPC_VLDREPL_W = 0x30200000,
+    OPC_VLDREPL_H = 0x30400000,
+    OPC_VLDREPL_B = 0x30800000,
+    OPC_VSTELM_D = 0x31100000,
+    OPC_VSTELM_W = 0x31200000,
+    OPC_VSTELM_H = 0x31400000,
+    OPC_VSTELM_B = 0x31800000,
     OPC_LDX_B = 0x38000000,
     OPC_LDX_H = 0x38040000,
     OPC_LDX_W = 0x38080000,
@@ -103,6 +167,8 @@ typedef enum {
     OPC_LDX_BU = 0x38200000,
     OPC_LDX_HU = 0x38240000,
     OPC_LDX_WU = 0x38280000,
+    OPC_VLDX = 0x38400000,
+    OPC_VSTX = 0x38440000,
     OPC_DBAR = 0x38720000,
     OPC_JIRL = 0x4c000000,
     OPC_B = 0x50000000,
@@ -113,6 +179,652 @@ typedef enum {
     OPC_BLE = 0x64000000,
     OPC_BGTU = 0x68000000,
     OPC_BLEU = 0x6c000000,
+    OPC_VSEQ_B = 0x70000000,
+    OPC_VSEQ_H = 0x70008000,
+    OPC_VSEQ_W = 0x70010000,
+    OPC_VSEQ_D = 0x70018000,
+    OPC_VSLE_B = 0x70020000,
+    OPC_VSLE_H = 0x70028000,
+    OPC_VSLE_W = 0x70030000,
+    OPC_VSLE_D = 0x70038000,
+    OPC_VSLE_BU = 0x70040000,
+    OPC_VSLE_HU = 0x70048000,
+    OPC_VSLE_WU = 0x70050000,
+    OPC_VSLE_DU = 0x70058000,
+    OPC_VSLT_B = 0x70060000,
+    OPC_VSLT_H = 0x70068000,
+    OPC_VSLT_W = 0x70070000,
+    OPC_VSLT_D = 0x70078000,
+    OPC_VSLT_BU = 0x70080000,
+    OPC_VSLT_HU = 0x70088000,
+    OPC_VSLT_WU = 0x70090000,
+    OPC_VSLT_DU = 0x70098000,
+    OPC_VADD_B = 0x700a0000,
+    OPC_VADD_H = 0x700a8000,
+    OPC_VADD_W = 0x700b0000,
+    OPC_VADD_D = 0x700b8000,
+    OPC_VSUB_B = 0x700c0000,
+    OPC_VSUB_H = 0x700c8000,
+    OPC_VSUB_W = 0x700d0000,
+    OPC_VSUB_D = 0x700d8000,
+    OPC_VADDWEV_H_B = 0x701e0000,
+    OPC_VADDWEV_W_H = 0x701e8000,
+    OPC_VADDWEV_D_W = 0x701f0000,
+    OPC_VADDWEV_Q_D = 0x701f8000,
+    OPC_VSUBWEV_H_B = 0x70200000,
+    OPC_VSUBWEV_W_H = 0x70208000,
+    OPC_VSUBWEV_D_W = 0x70210000,
+    OPC_VSUBWEV_Q_D = 0x70218000,
+    OPC_VADDWOD_H_B = 0x70220000,
+    OPC_VADDWOD_W_H = 0x70228000,
+    OPC_VADDWOD_D_W = 0x70230000,
+    OPC_VADDWOD_Q_D = 0x70238000,
+    OPC_VSUBWOD_H_B = 0x70240000,
+    OPC_VSUBWOD_W_H = 0x70248000,
+    OPC_VSUBWOD_D_W = 0x70250000,
+    OPC_VSUBWOD_Q_D = 0x70258000,
+    OPC_VADDWEV_H_BU = 0x702e0000,
+    OPC_VADDWEV_W_HU = 0x702e8000,
+    OPC_VADDWEV_D_WU = 0x702f0000,
+    OPC_VADDWEV_Q_DU = 0x702f8000,
+    OPC_VSUBWEV_H_BU = 0x70300000,
+    OPC_VSUBWEV_W_HU = 0x70308000,
+    OPC_VSUBWEV_D_WU = 0x70310000,
+    OPC_VSUBWEV_Q_DU = 0x70318000,
+    OPC_VADDWOD_H_BU = 0x70320000,
+    OPC_VADDWOD_W_HU = 0x70328000,
+    OPC_VADDWOD_D_WU = 0x70330000,
+    OPC_VADDWOD_Q_DU = 0x70338000,
+    OPC_VSUBWOD_H_BU = 0x70340000,
+    OPC_VSUBWOD_W_HU = 0x70348000,
+    OPC_VSUBWOD_D_WU = 0x70350000,
+    OPC_VSUBWOD_Q_DU = 0x70358000,
+    OPC_VADDWEV_H_BU_B = 0x703e0000,
+    OPC_VADDWEV_W_HU_H = 0x703e8000,
+    OPC_VADDWEV_D_WU_W = 0x703f0000,
+    OPC_VADDWEV_Q_DU_D = 0x703f8000,
+    OPC_VADDWOD_H_BU_B = 0x70400000,
+    OPC_VADDWOD_W_HU_H = 0x70408000,
+    OPC_VADDWOD_D_WU_W = 0x70410000,
+    OPC_VADDWOD_Q_DU_D = 0x70418000,
+    OPC_VSADD_B = 0x70460000,
+    OPC_VSADD_H = 0x70468000,
+    OPC_VSADD_W = 0x70470000,
+    OPC_VSADD_D = 0x70478000,
+    OPC_VSSUB_B = 0x70480000,
+    OPC_VSSUB_H = 0x70488000,
+    OPC_VSSUB_W = 0x70490000,
+    OPC_VSSUB_D = 0x70498000,
+    OPC_VSADD_BU = 0x704a0000,
+    OPC_VSADD_HU = 0x704a8000,
+    OPC_VSADD_WU = 0x704b0000,
+    OPC_VSADD_DU = 0x704b8000,
+    OPC_VSSUB_BU = 0x704c0000,
+    OPC_VSSUB_HU = 0x704c8000,
+    OPC_VSSUB_WU = 0x704d0000,
+    OPC_VSSUB_DU = 0x704d8000,
+    OPC_VHADDW_H_B = 0x70540000,
+    OPC_VHADDW_W_H = 0x70548000,
+    OPC_VHADDW_D_W = 0x70550000,
+    OPC_VHADDW_Q_D = 0x70558000,
+    OPC_VHSUBW_H_B = 0x70560000,
+    OPC_VHSUBW_W_H = 0x70568000,
+    OPC_VHSUBW_D_W = 0x70570000,
+    OPC_VHSUBW_Q_D = 0x70578000,
+    OPC_VHADDW_HU_BU = 0x70580000,
+    OPC_VHADDW_WU_HU = 0x70588000,
+    OPC_VHADDW_DU_WU = 0x70590000,
+    OPC_VHADDW_QU_DU = 0x70598000,
+    OPC_VHSUBW_HU_BU = 0x705a0000,
+    OPC_VHSUBW_WU_HU = 0x705a8000,
+    OPC_VHSUBW_DU_WU = 0x705b0000,
+    OPC_VHSUBW_QU_DU = 0x705b8000,
+    OPC_VADDA_B = 0x705c0000,
+    OPC_VADDA_H = 0x705c8000,
+    OPC_VADDA_W = 0x705d0000,
+    OPC_VADDA_D = 0x705d8000,
+    OPC_VABSD_B = 0x70600000,
+    OPC_VABSD_H = 0x70608000,
+    OPC_VABSD_W = 0x70610000,
+    OPC_VABSD_D = 0x70618000,
+    OPC_VABSD_BU = 0x70620000,
+    OPC_VABSD_HU = 0x70628000,
+    OPC_VABSD_WU = 0x70630000,
+    OPC_VABSD_DU = 0x70638000,
+    OPC_VAVG_B = 0x70640000,
+    OPC_VAVG_H = 0x70648000,
+    OPC_VAVG_W = 0x70650000,
+    OPC_VAVG_D = 0x70658000,
+    OPC_VAVG_BU = 0x70660000,
+    OPC_VAVG_HU = 0x70668000,
+    OPC_VAVG_WU = 0x70670000,
+    OPC_VAVG_DU = 0x70678000,
+    OPC_VAVGR_B = 0x70680000,
+    OPC_VAVGR_H = 0x70688000,
+    OPC_VAVGR_W = 0x70690000,
+    OPC_VAVGR_D = 0x70698000,
+    OPC_VAVGR_BU = 0x706a0000,
+    OPC_VAVGR_HU = 0x706a8000,
+    OPC_VAVGR_WU = 0x706b0000,
+    OPC_VAVGR_DU = 0x706b8000,
+    OPC_VMAX_B = 0x70700000,
+    OPC_VMAX_H = 0x70708000,
+    OPC_VMAX_W = 0x70710000,
+    OPC_VMAX_D = 0x70718000,
+    OPC_VMIN_B = 0x70720000,
+    OPC_VMIN_H = 0x70728000,
+    OPC_VMIN_W = 0x70730000,
+    OPC_VMIN_D = 0x70738000,
+    OPC_VMAX_BU = 0x70740000,
+    OPC_VMAX_HU = 0x70748000,
+    OPC_VMAX_WU = 0x70750000,
+    OPC_VMAX_DU = 0x70758000,
+    OPC_VMIN_BU = 0x70760000,
+    OPC_VMIN_HU = 0x70768000,
+    OPC_VMIN_WU = 0x70770000,
+    OPC_VMIN_DU = 0x70778000,
+    OPC_VMUL_B = 0x70840000,
+    OPC_VMUL_H = 0x70848000,
+    OPC_VMUL_W = 0x70850000,
+    OPC_VMUL_D = 0x70858000,
+    OPC_VMUH_B = 0x70860000,
+    OPC_VMUH_H = 0x70868000,
+    OPC_VMUH_W = 0x70870000,
+    OPC_VMUH_D = 0x70878000,
+    OPC_VMUH_BU = 0x70880000,
+    OPC_VMUH_HU = 0x70888000,
+    OPC_VMUH_WU = 0x70890000,
+    OPC_VMUH_DU = 0x70898000,
+    OPC_VMULWEV_H_B = 0x70900000,
+    OPC_VMULWEV_W_H = 0x70908000,
+    OPC_VMULWEV_D_W = 0x70910000,
+    OPC_VMULWEV_Q_D = 0x70918000,
+    OPC_VMULWOD_H_B = 0x70920000,
+    OPC_VMULWOD_W_H = 0x70928000,
+    OPC_VMULWOD_D_W = 0x70930000,
+    OPC_VMULWOD_Q_D = 0x70938000,
+    OPC_VMULWEV_H_BU = 0x70980000,
+    OPC_VMULWEV_W_HU = 0x70988000,
+    OPC_VMULWEV_D_WU = 0x70990000,
+    OPC_VMULWEV_Q_DU = 0x70998000,
+    OPC_VMULWOD_H_BU = 0x709a0000,
+    OPC_VMULWOD_W_HU = 0x709a8000,
+    OPC_VMULWOD_D_WU = 0x709b0000,
+    OPC_VMULWOD_Q_DU = 0x709b8000,
+    OPC_VMULWEV_H_BU_B = 0x70a00000,
+    OPC_VMULWEV_W_HU_H = 0x70a08000,
+    OPC_VMULWEV_D_WU_W = 0x70a10000,
+    OPC_VMULWEV_Q_DU_D = 0x70a18000,
+    OPC_VMULWOD_H_BU_B = 0x70a20000,
+    OPC_VMULWOD_W_HU_H = 0x70a28000,
+    OPC_VMULWOD_D_WU_W = 0x70a30000,
+    OPC_VMULWOD_Q_DU_D = 0x70a38000,
+    OPC_VMADD_B = 0x70a80000,
+    OPC_VMADD_H = 0x70a88000,
+    OPC_VMADD_W = 0x70a90000,
+    OPC_VMADD_D = 0x70a98000,
+    OPC_VMSUB_B = 0x70aa0000,
+    OPC_VMSUB_H = 0x70aa8000,
+    OPC_VMSUB_W = 0x70ab0000,
+    OPC_VMSUB_D = 0x70ab8000,
+    OPC_VMADDWEV_H_B = 0x70ac0000,
+    OPC_VMADDWEV_W_H = 0x70ac8000,
+    OPC_VMADDWEV_D_W = 0x70ad0000,
+    OPC_VMADDWEV_Q_D = 0x70ad8000,
+    OPC_VMADDWOD_H_B = 0x70ae0000,
+    OPC_VMADDWOD_W_H = 0x70ae8000,
+    OPC_VMADDWOD_D_W = 0x70af0000,
+    OPC_VMADDWOD_Q_D = 0x70af8000,
+    OPC_VMADDWEV_H_BU = 0x70b40000,
+    OPC_VMADDWEV_W_HU = 0x70b48000,
+    OPC_VMADDWEV_D_WU = 0x70b50000,
+    OPC_VMADDWEV_Q_DU = 0x70b58000,
+    OPC_VMADDWOD_H_BU = 0x70b60000,
+    OPC_VMADDWOD_W_HU = 0x70b68000,
+    OPC_VMADDWOD_D_WU = 0x70b70000,
+    OPC_VMADDWOD_Q_DU = 0x70b78000,
+    OPC_VMADDWEV_H_BU_B = 0x70bc0000,
+    OPC_VMADDWEV_W_HU_H = 0x70bc8000,
+    OPC_VMADDWEV_D_WU_W = 0x70bd0000,
+    OPC_VMADDWEV_Q_DU_D = 0x70bd8000,
+    OPC_VMADDWOD_H_BU_B = 0x70be0000,
+    OPC_VMADDWOD_W_HU_H = 0x70be8000,
+    OPC_VMADDWOD_D_WU_W = 0x70bf0000,
+    OPC_VMADDWOD_Q_DU_D = 0x70bf8000,
+    OPC_VDIV_B = 0x70e00000,
+    OPC_VDIV_H = 0x70e08000,
+    OPC_VDIV_W = 0x70e10000,
+    OPC_VDIV_D = 0x70e18000,
+    OPC_VMOD_B = 0x70e20000,
+    OPC_VMOD_H = 0x70e28000,
+    OPC_VMOD_W = 0x70e30000,
+    OPC_VMOD_D = 0x70e38000,
+    OPC_VDIV_BU = 0x70e40000,
+    OPC_VDIV_HU = 0x70e48000,
+    OPC_VDIV_WU = 0x70e50000,
+    OPC_VDIV_DU = 0x70e58000,
+    OPC_VMOD_BU = 0x70e60000,
+    OPC_VMOD_HU = 0x70e68000,
+    OPC_VMOD_WU = 0x70e70000,
+    OPC_VMOD_DU = 0x70e78000,
+    OPC_VSLL_B = 0x70e80000,
+    OPC_VSLL_H = 0x70e88000,
+    OPC_VSLL_W = 0x70e90000,
+    OPC_VSLL_D = 0x70e98000,
+    OPC_VSRL_B = 0x70ea0000,
+    OPC_VSRL_H = 0x70ea8000,
+    OPC_VSRL_W = 0x70eb0000,
+    OPC_VSRL_D = 0x70eb8000,
+    OPC_VSRA_B = 0x70ec0000,
+    OPC_VSRA_H = 0x70ec8000,
+    OPC_VSRA_W = 0x70ed0000,
+    OPC_VSRA_D = 0x70ed8000,
+    OPC_VROTR_B = 0x70ee0000,
+    OPC_VROTR_H = 0x70ee8000,
+    OPC_VROTR_W = 0x70ef0000,
+    OPC_VROTR_D = 0x70ef8000,
+    OPC_VSRLR_B = 0x70f00000,
+    OPC_VSRLR_H = 0x70f08000,
+    OPC_VSRLR_W = 0x70f10000,
+    OPC_VSRLR_D = 0x70f18000,
+    OPC_VSRAR_B = 0x70f20000,
+    OPC_VSRAR_H = 0x70f28000,
+    OPC_VSRAR_W = 0x70f30000,
+    OPC_VSRAR_D = 0x70f38000,
+    OPC_VSRLN_B_H = 0x70f48000,
+    OPC_VSRLN_H_W = 0x70f50000,
+    OPC_VSRLN_W_D = 0x70f58000,
+    OPC_VSRAN_B_H = 0x70f68000,
+    OPC_VSRAN_H_W = 0x70f70000,
+    OPC_VSRAN_W_D = 0x70f78000,
+    OPC_VSRLRN_B_H = 0x70f88000,
+    OPC_VSRLRN_H_W = 0x70f90000,
+    OPC_VSRLRN_W_D = 0x70f98000,
+    OPC_VSRARN_B_H = 0x70fa8000,
+    OPC_VSRARN_H_W = 0x70fb0000,
+    OPC_VSRARN_W_D = 0x70fb8000,
+    OPC_VSSRLN_B_H = 0x70fc8000,
+    OPC_VSSRLN_H_W = 0x70fd0000,
+    OPC_VSSRLN_W_D = 0x70fd8000,
+    OPC_VSSRAN_B_H = 0x70fe8000,
+    OPC_VSSRAN_H_W = 0x70ff0000,
+    OPC_VSSRAN_W_D = 0x70ff8000,
+    OPC_VSSRLRN_B_H = 0x71008000,
+    OPC_VSSRLRN_H_W = 0x71010000,
+    OPC_VSSRLRN_W_D = 0x71018000,
+    OPC_VSSRARN_B_H = 0x71028000,
+    OPC_VSSRARN_H_W = 0x71030000,
+    OPC_VSSRARN_W_D = 0x71038000,
+    OPC_VSSRLN_BU_H = 0x71048000,
+    OPC_VSSRLN_HU_W = 0x71050000,
+    OPC_VSSRLN_WU_D = 0x71058000,
+    OPC_VSSRAN_BU_H = 0x71068000,
+    OPC_VSSRAN_HU_W = 0x71070000,
+    OPC_VSSRAN_WU_D = 0x71078000,
+    OPC_VSSRLRN_BU_H = 0x71088000,
+    OPC_VSSRLRN_HU_W = 0x71090000,
+    OPC_VSSRLRN_WU_D = 0x71098000,
+    OPC_VSSRARN_BU_H = 0x710a8000,
+    OPC_VSSRARN_HU_W = 0x710b0000,
+    OPC_VSSRARN_WU_D = 0x710b8000,
+    OPC_VBITCLR_B = 0x710c0000,
+    OPC_VBITCLR_H = 0x710c8000,
+    OPC_VBITCLR_W = 0x710d0000,
+    OPC_VBITCLR_D = 0x710d8000,
+    OPC_VBITSET_B = 0x710e0000,
+    OPC_VBITSET_H = 0x710e8000,
+    OPC_VBITSET_W = 0x710f0000,
+    OPC_VBITSET_D = 0x710f8000,
+    OPC_VBITREV_B = 0x71100000,
+    OPC_VBITREV_H = 0x71108000,
+    OPC_VBITREV_W = 0x71110000,
+    OPC_VBITREV_D = 0x71118000,
+    OPC_VPACKEV_B = 0x71160000,
+    OPC_VPACKEV_H = 0x71168000,
+    OPC_VPACKEV_W = 0x71170000,
+    OPC_VPACKEV_D = 0x71178000,
+    OPC_VPACKOD_B = 0x71180000,
+    OPC_VPACKOD_H = 0x71188000,
+    OPC_VPACKOD_W = 0x71190000,
+    OPC_VPACKOD_D = 0x71198000,
+    OPC_VILVL_B = 0x711a0000,
+    OPC_VILVL_H = 0x711a8000,
+    OPC_VILVL_W = 0x711b0000,
+    OPC_VILVL_D = 0x711b8000,
+    OPC_VILVH_B = 0x711c0000,
+    OPC_VILVH_H = 0x711c8000,
+    OPC_VILVH_W = 0x711d0000,
+    OPC_VILVH_D = 0x711d8000,
+    OPC_VPICKEV_B = 0x711e0000,
+    OPC_VPICKEV_H = 0x711e8000,
+    OPC_VPICKEV_W = 0x711f0000,
+    OPC_VPICKEV_D = 0x711f8000,
+    OPC_VPICKOD_B = 0x71200000,
+    OPC_VPICKOD_H = 0x71208000,
+    OPC_VPICKOD_W = 0x71210000,
+    OPC_VPICKOD_D = 0x71218000,
+    OPC_VREPLVE_B = 0x71220000,
+    OPC_VREPLVE_H = 0x71228000,
+    OPC_VREPLVE_W = 0x71230000,
+    OPC_VREPLVE_D = 0x71238000,
+    OPC_VAND_V = 0x71260000,
+    OPC_VOR_V = 0x71268000,
+    OPC_VXOR_V = 0x71270000,
+    OPC_VNOR_V = 0x71278000,
+    OPC_VANDN_V = 0x71280000,
+    OPC_VORN_V = 0x71288000,
+    OPC_VFRSTP_B = 0x712b0000,
+    OPC_VFRSTP_H = 0x712b8000,
+    OPC_VADD_Q = 0x712d0000,
+    OPC_VSUB_Q = 0x712d8000,
+    OPC_VSIGNCOV_B = 0x712e0000,
+    OPC_VSIGNCOV_H = 0x712e8000,
+    OPC_VSIGNCOV_W = 0x712f0000,
+    OPC_VSIGNCOV_D = 0x712f8000,
+    OPC_VFADD_S = 0x71308000,
+    OPC_VFADD_D = 0x71310000,
+    OPC_VFSUB_S = 0x71328000,
+    OPC_VFSUB_D = 0x71330000,
+    OPC_VFMUL_S = 0x71388000,
+    OPC_VFMUL_D = 0x71390000,
+    OPC_VFDIV_S = 0x713a8000,
+    OPC_VFDIV_D = 0x713b0000,
+    OPC_VFMAX_S = 0x713c8000,
+    OPC_VFMAX_D = 0x713d0000,
+    OPC_VFMIN_S = 0x713e8000,
+    OPC_VFMIN_D = 0x713f0000,
+    OPC_VFMAXA_S = 0x71408000,
+    OPC_VFMAXA_D = 0x71410000,
+    OPC_VFMINA_S = 0x71428000,
+    OPC_VFMINA_D = 0x71430000,
+    OPC_VFCVT_H_S = 0x71460000,
+    OPC_VFCVT_S_D = 0x71468000,
+    OPC_VFFINT_S_L = 0x71480000,
+    OPC_VFTINT_W_D = 0x71498000,
+    OPC_VFTINTRM_W_D = 0x714a0000,
+    OPC_VFTINTRP_W_D = 0x714a8000,
+    OPC_VFTINTRZ_W_D = 0x714b0000,
+    OPC_VFTINTRNE_W_D = 0x714b8000,
+    OPC_VSHUF_H = 0x717a8000,
+    OPC_VSHUF_W = 0x717b0000,
+    OPC_VSHUF_D = 0x717b8000,
+    OPC_VSEQI_B = 0x72800000,
+    OPC_VSEQI_H = 0x72808000,
+    OPC_VSEQI_W = 0x72810000,
+    OPC_VSEQI_D = 0x72818000,
+    OPC_VSLEI_B = 0x72820000,
+    OPC_VSLEI_H = 0x72828000,
+    OPC_VSLEI_W = 0x72830000,
+    OPC_VSLEI_D = 0x72838000,
+    OPC_VSLEI_BU = 0x72840000,
+    OPC_VSLEI_HU = 0x72848000,
+    OPC_VSLEI_WU = 0x72850000,
+    OPC_VSLEI_DU = 0x72858000,
+    OPC_VSLTI_B = 0x72860000,
+    OPC_VSLTI_H = 0x72868000,
+    OPC_VSLTI_W = 0x72870000,
+    OPC_VSLTI_D = 0x72878000,
+    OPC_VSLTI_BU = 0x72880000,
+    OPC_VSLTI_HU = 0x72888000,
+    OPC_VSLTI_WU = 0x72890000,
+    OPC_VSLTI_DU = 0x72898000,
+    OPC_VADDI_BU = 0x728a0000,
+    OPC_VADDI_HU = 0x728a8000,
+    OPC_VADDI_WU = 0x728b0000,
+    OPC_VADDI_DU = 0x728b8000,
+    OPC_VSUBI_BU = 0x728c0000,
+    OPC_VSUBI_HU = 0x728c8000,
+    OPC_VSUBI_WU = 0x728d0000,
+    OPC_VSUBI_DU = 0x728d8000,
+    OPC_VBSLL_V = 0x728e0000,
+    OPC_VBSRL_V = 0x728e8000,
+    OPC_VMAXI_B = 0x72900000,
+    OPC_VMAXI_H = 0x72908000,
+    OPC_VMAXI_W = 0x72910000,
+    OPC_VMAXI_D = 0x72918000,
+    OPC_VMINI_B = 0x72920000,
+    OPC_VMINI_H = 0x72928000,
+    OPC_VMINI_W = 0x72930000,
+    OPC_VMINI_D = 0x72938000,
+    OPC_VMAXI_BU = 0x72940000,
+    OPC_VMAXI_HU = 0x72948000,
+    OPC_VMAXI_WU = 0x72950000,
+    OPC_VMAXI_DU = 0x72958000,
+    OPC_VMINI_BU = 0x72960000,
+    OPC_VMINI_HU = 0x72968000,
+    OPC_VMINI_WU = 0x72970000,
+    OPC_VMINI_DU = 0x72978000,
+    OPC_VFRSTPI_B = 0x729a0000,
+    OPC_VFRSTPI_H = 0x729a8000,
+    OPC_VCLO_B = 0x729c0000,
+    OPC_VCLO_H = 0x729c0400,
+    OPC_VCLO_W = 0x729c0800,
+    OPC_VCLO_D = 0x729c0c00,
+    OPC_VCLZ_B = 0x729c1000,
+    OPC_VCLZ_H = 0x729c1400,
+    OPC_VCLZ_W = 0x729c1800,
+    OPC_VCLZ_D = 0x729c1c00,
+    OPC_VPCNT_B = 0x729c2000,
+    OPC_VPCNT_H = 0x729c2400,
+    OPC_VPCNT_W = 0x729c2800,
+    OPC_VPCNT_D = 0x729c2c00,
+    OPC_VNEG_B = 0x729c3000,
+    OPC_VNEG_H = 0x729c3400,
+    OPC_VNEG_W = 0x729c3800,
+    OPC_VNEG_D = 0x729c3c00,
+    OPC_VMSKLTZ_B = 0x729c4000,
+    OPC_VMSKLTZ_H = 0x729c4400,
+    OPC_VMSKLTZ_W = 0x729c4800,
+    OPC_VMSKLTZ_D = 0x729c4c00,
+    OPC_VMSKGEZ_B = 0x729c5000,
+    OPC_VMSKNZ_B = 0x729c6000,
+    OPC_VSETEQZ_V = 0x729c9800,
+    OPC_VSETNEZ_V = 0x729c9c00,
+    OPC_VSETANYEQZ_B = 0x729ca000,
+    OPC_VSETANYEQZ_H = 0x729ca400,
+    OPC_VSETANYEQZ_W = 0x729ca800,
+    OPC_VSETANYEQZ_D = 0x729cac00,
+    OPC_VSETALLNEZ_B = 0x729cb000,
+    OPC_VSETALLNEZ_H = 0x729cb400,
+    OPC_VSETALLNEZ_W = 0x729cb800,
+    OPC_VSETALLNEZ_D = 0x729cbc00,
+    OPC_VFLOGB_S = 0x729cc400,
+    OPC_VFLOGB_D = 0x729cc800,
+    OPC_VFCLASS_S = 0x729cd400,
+    OPC_VFCLASS_D = 0x729cd800,
+    OPC_VFSQRT_S = 0x729ce400,
+    OPC_VFSQRT_D = 0x729ce800,
+    OPC_VFRECIP_S = 0x729cf400,
+    OPC_VFRECIP_D = 0x729cf800,
+    OPC_VFRSQRT_S = 0x729d0400,
+    OPC_VFRSQRT_D = 0x729d0800,
+    OPC_VFRINT_S = 0x729d3400,
+    OPC_VFRINT_D = 0x729d3800,
+    OPC_VFRINTRM_S = 0x729d4400,
+    OPC_VFRINTRM_D = 0x729d4800,
+    OPC_VFRINTRP_S = 0x729d5400,
+    OPC_VFRINTRP_D = 0x729d5800,
+    OPC_VFRINTRZ_S = 0x729d6400,
+    OPC_VFRINTRZ_D = 0x729d6800,
+    OPC_VFRINTRNE_S = 0x729d7400,
+    OPC_VFRINTRNE_D = 0x729d7800,
+    OPC_VFCVTL_S_H = 0x729de800,
+    OPC_VFCVTH_S_H = 0x729dec00,
+    OPC_VFCVTL_D_S = 0x729df000,
+    OPC_VFCVTH_D_S = 0x729df400,
+    OPC_VFFINT_S_W = 0x729e0000,
+    OPC_VFFINT_S_WU = 0x729e0400,
+    OPC_VFFINT_D_L = 0x729e0800,
+    OPC_VFFINT_D_LU = 0x729e0c00,
+    OPC_VFFINTL_D_W = 0x729e1000,
+    OPC_VFFINTH_D_W = 0x729e1400,
+    OPC_VFTINT_W_S = 0x729e3000,
+    OPC_VFTINT_L_D = 0x729e3400,
+    OPC_VFTINTRM_W_S = 0x729e3800,
+    OPC_VFTINTRM_L_D = 0x729e3c00,
+    OPC_VFTINTRP_W_S = 0x729e4000,
+    OPC_VFTINTRP_L_D = 0x729e4400,
+    OPC_VFTINTRZ_W_S = 0x729e4800,
+    OPC_VFTINTRZ_L_D = 0x729e4c00,
+    OPC_VFTINTRNE_W_S = 0x729e5000,
+    OPC_VFTINTRNE_L_D = 0x729e5400,
+    OPC_VFTINT_WU_S = 0x729e5800,
+    OPC_VFTINT_LU_D = 0x729e5c00,
+    OPC_VFTINTRZ_WU_S = 0x729e7000,
+    OPC_VFTINTRZ_LU_D = 0x729e7400,
+    OPC_VFTINTL_L_S = 0x729e8000,
+    OPC_VFTINTH_L_S = 0x729e8400,
+    OPC_VFTINTRML_L_S = 0x729e8800,
+    OPC_VFTINTRMH_L_S = 0x729e8c00,
+    OPC_VFTINTRPL_L_S = 0x729e9000,
+    OPC_VFTINTRPH_L_S = 0x729e9400,
+    OPC_VFTINTRZL_L_S = 0x729e9800,
+    OPC_VFTINTRZH_L_S = 0x729e9c00,
+    OPC_VFTINTRNEL_L_S = 0x729ea000,
+    OPC_VFTINTRNEH_L_S = 0x729ea400,
+    OPC_VEXTH_H_B = 0x729ee000,
+    OPC_VEXTH_W_H = 0x729ee400,
+    OPC_VEXTH_D_W = 0x729ee800,
+    OPC_VEXTH_Q_D = 0x729eec00,
+    OPC_VEXTH_HU_BU = 0x729ef000,
+    OPC_VEXTH_WU_HU = 0x729ef400,
+    OPC_VEXTH_DU_WU = 0x729ef800,
+    OPC_VEXTH_QU_DU = 0x729efc00,
+    OPC_VREPLGR2VR_B = 0x729f0000,
+    OPC_VREPLGR2VR_H = 0x729f0400,
+    OPC_VREPLGR2VR_W = 0x729f0800,
+    OPC_VREPLGR2VR_D = 0x729f0c00,
+    OPC_VROTRI_B = 0x72a02000,
+    OPC_VROTRI_H = 0x72a04000,
+    OPC_VROTRI_W = 0x72a08000,
+    OPC_VROTRI_D = 0x72a10000,
+    OPC_VSRLRI_B = 0x72a42000,
+    OPC_VSRLRI_H = 0x72a44000,
+    OPC_VSRLRI_W = 0x72a48000,
+    OPC_VSRLRI_D = 0x72a50000,
+    OPC_VSRARI_B = 0x72a82000,
+    OPC_VSRARI_H = 0x72a84000,
+    OPC_VSRARI_W = 0x72a88000,
+    OPC_VSRARI_D = 0x72a90000,
+    OPC_VINSGR2VR_B = 0x72eb8000,
+    OPC_VINSGR2VR_H = 0x72ebc000,
+    OPC_VINSGR2VR_W = 0x72ebe000,
+    OPC_VINSGR2VR_D = 0x72ebf000,
+    OPC_VPICKVE2GR_B = 0x72ef8000,
+    OPC_VPICKVE2GR_H = 0x72efc000,
+    OPC_VPICKVE2GR_W = 0x72efe000,
+    OPC_VPICKVE2GR_D = 0x72eff000,
+    OPC_VPICKVE2GR_BU = 0x72f38000,
+    OPC_VPICKVE2GR_HU = 0x72f3c000,
+    OPC_VPICKVE2GR_WU = 0x72f3e000,
+    OPC_VPICKVE2GR_DU = 0x72f3f000,
+    OPC_VREPLVEI_B = 0x72f78000,
+    OPC_VREPLVEI_H = 0x72f7c000,
+    OPC_VREPLVEI_W = 0x72f7e000,
+    OPC_VREPLVEI_D = 0x72f7f000,
+    OPC_VSLLWIL_H_B = 0x73082000,
+    OPC_VSLLWIL_W_H = 0x73084000,
+    OPC_VSLLWIL_D_W = 0x73088000,
+    OPC_VEXTL_Q_D = 0x73090000,
+    OPC_VSLLWIL_HU_BU = 0x730c2000,
+    OPC_VSLLWIL_WU_HU = 0x730c4000,
+    OPC_VSLLWIL_DU_WU = 0x730c8000,
+    OPC_VEXTL_QU_DU = 0x730d0000,
+    OPC_VBITCLRI_B = 0x73102000,
+    OPC_VBITCLRI_H = 0x73104000,
+    OPC_VBITCLRI_W = 0x73108000,
+    OPC_VBITCLRI_D = 0x73110000,
+    OPC_VBITSETI_B = 0x73142000,
+    OPC_VBITSETI_H = 0x73144000,
+    OPC_VBITSETI_W = 0x73148000,
+    OPC_VBITSETI_D = 0x73150000,
+    OPC_VBITREVI_B = 0x73182000,
+    OPC_VBITREVI_H = 0x73184000,
+    OPC_VBITREVI_W = 0x73188000,
+    OPC_VBITREVI_D = 0x73190000,
+    OPC_VSAT_B = 0x73242000,
+    OPC_VSAT_H = 0x73244000,
+    OPC_VSAT_W = 0x73248000,
+    OPC_VSAT_D = 0x73250000,
+    OPC_VSAT_BU = 0x73282000,
+    OPC_VSAT_HU = 0x73284000,
+    OPC_VSAT_WU = 0x73288000,
+    OPC_VSAT_DU = 0x73290000,
+    OPC_VSLLI_B = 0x732c2000,
+    OPC_VSLLI_H = 0x732c4000,
+    OPC_VSLLI_W = 0x732c8000,
+    OPC_VSLLI_D = 0x732d0000,
+    OPC_VSRLI_B = 0x73302000,
+    OPC_VSRLI_H = 0x73304000,
+    OPC_VSRLI_W = 0x73308000,
+    OPC_VSRLI_D = 0x73310000,
+    OPC_VSRAI_B = 0x73342000,
+    OPC_VSRAI_H = 0x73344000,
+    OPC_VSRAI_W = 0x73348000,
+    OPC_VSRAI_D = 0x73350000,
+    OPC_VSRLNI_B_H = 0x73404000,
+    OPC_VSRLNI_H_W = 0x73408000,
+    OPC_VSRLNI_W_D = 0x73410000,
+    OPC_VSRLNI_D_Q = 0x73420000,
+    OPC_VSRLRNI_B_H = 0x73444000,
+    OPC_VSRLRNI_H_W = 0x73448000,
+    OPC_VSRLRNI_W_D = 0x73450000,
+    OPC_VSRLRNI_D_Q = 0x73460000,
+    OPC_VSSRLNI_B_H = 0x73484000,
+    OPC_VSSRLNI_H_W = 0x73488000,
+    OPC_VSSRLNI_W_D = 0x73490000,
+    OPC_VSSRLNI_D_Q = 0x734a0000,
+    OPC_VSSRLNI_BU_H = 0x734c4000,
+    OPC_VSSRLNI_HU_W = 0x734c8000,
+    OPC_VSSRLNI_WU_D = 0x734d0000,
+    OPC_VSSRLNI_DU_Q = 0x734e0000,
+    OPC_VSSRLRNI_B_H = 0x73504000,
+    OPC_VSSRLRNI_H_W = 0x73508000,
+    OPC_VSSRLRNI_W_D = 0x73510000,
+    OPC_VSSRLRNI_D_Q = 0x73520000,
+    OPC_VSSRLRNI_BU_H = 0x73544000,
+    OPC_VSSRLRNI_HU_W = 0x73548000,
+    OPC_VSSRLRNI_WU_D = 0x73550000,
+    OPC_VSSRLRNI_DU_Q = 0x73560000,
+    OPC_VSRANI_B_H = 0x73584000,
+    OPC_VSRANI_H_W = 0x73588000,
+    OPC_VSRANI_W_D = 0x73590000,
+    OPC_VSRANI_D_Q = 0x735a0000,
+    OPC_VSRARNI_B_H = 0x735c4000,
+    OPC_VSRARNI_H_W = 0x735c8000,
+    OPC_VSRARNI_W_D = 0x735d0000,
+    OPC_VSRARNI_D_Q = 0x735e0000,
+    OPC_VSSRANI_B_H = 0x73604000,
+    OPC_VSSRANI_H_W = 0x73608000,
+    OPC_VSSRANI_W_D = 0x73610000,
+    OPC_VSSRANI_D_Q = 0x73620000,
+    OPC_VSSRANI_BU_H = 0x73644000,
+    OPC_VSSRANI_HU_W = 0x73648000,
+    OPC_VSSRANI_WU_D = 0x73650000,
+    OPC_VSSRANI_DU_Q = 0x73660000,
+    OPC_VSSRARNI_B_H = 0x73684000,
+    OPC_VSSRARNI_H_W = 0x73688000,
+    OPC_VSSRARNI_W_D = 0x73690000,
+    OPC_VSSRARNI_D_Q = 0x736a0000,
+    OPC_VSSRARNI_BU_H = 0x736c4000,
+    OPC_VSSRARNI_HU_W = 0x736c8000,
+    OPC_VSSRARNI_WU_D = 0x736d0000,
+    OPC_VSSRARNI_DU_Q = 0x736e0000,
+    OPC_VEXTRINS_D = 0x73800000,
+    OPC_VEXTRINS_W = 0x73840000,
+    OPC_VEXTRINS_H = 0x73880000,
+    OPC_VEXTRINS_B = 0x738c0000,
+    OPC_VSHUF4I_B = 0x73900000,
+    OPC_VSHUF4I_H = 0x73940000,
+    OPC_VSHUF4I_W = 0x73980000,
+    OPC_VSHUF4I_D = 0x739c0000,
+    OPC_VBITSELI_B = 0x73c40000,
+    OPC_VANDI_B = 0x73d00000,
+    OPC_VORI_B = 0x73d40000,
+    OPC_VXORI_B = 0x73d80000,
+    OPC_VNORI_B = 0x73dc0000,
+    OPC_VLDI = 0x73e00000,
+    OPC_VPERMI_W = 0x73e40000,
 } LoongArchInsn;
 
 static int32_t __attribute__((unused))
@@ -133,6 +845,13 @@ encode_djk_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k)
     return opc | d | j << 5 | k << 10;
 }
 
+static int32_t __attribute__((unused))
+encode_djka_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t a)
+{
+    return opc | d | j << 5 | k << 10 | a << 15;
+}
+
 static int32_t __attribute__((unused))
 encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
                   uint32_t m)
@@ -140,12 +859,27 @@ encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
     return opc | d | j << 5 | k << 10 | m << 16;
 }
 
+static int32_t __attribute__((unused))
+encode_djkn_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t n)
+{
+    return opc | d | j << 5 | k << 10 | n << 18;
+}
+
 static int32_t __attribute__((unused))
 encode_dk_slots(LoongArchInsn opc, uint32_t d, uint32_t k)
 {
     return opc | d | k << 10;
 }
 
+static int32_t __attribute__((unused))
+encode_cdvj_insn(LoongArchInsn opc, TCGReg cd, TCGReg vj)
+{
+    tcg_debug_assert(cd >= 0 && cd <= 0x7);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, cd, vj & 0x1f);
+}
+
 static int32_t __attribute__((unused))
 encode_dj_insn(LoongArchInsn opc, TCGReg d, TCGReg j)
 {
@@ -238,6 +972,42 @@ encode_dsj20_insn(LoongArchInsn opc, TCGReg d, int32_t sj20)
     return encode_dj_slots(opc, d, sj20 & 0xfffff);
 }
 
+static int32_t __attribute__((unused))
+encode_dvjuk1_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk2_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk3_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk4_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk4);
+}
+
 static int32_t __attribute__((unused))
 encode_sd10k16_insn(LoongArchInsn opc, int32_t sd10k16)
 {
@@ -252,6 +1022,265 @@ encode_ud15_insn(LoongArchInsn opc, uint32_t ud15)
     return encode_d_slot(opc, ud15);
 }
 
+static int32_t __attribute__((unused))
+encode_vdj_insn(LoongArchInsn opc, TCGReg vd, TCGReg j)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    return encode_dj_slots(opc, vd & 0x1f, j);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, j, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk10_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk10 >= -0x200 && sk10 <= 0x1ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk10 & 0x3ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk11_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk11 >= -0x400 && sk11 <= 0x3ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk11 & 0x7ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk12_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk12 & 0xfff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un1 <= 0x1);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un2 <= 0x3);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un3 <= 0x7);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un4 <= 0xf);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk9_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk9 >= -0x100 && sk9 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk9 & 0x1ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdsj13_insn(LoongArchInsn opc, TCGReg vd, int32_t sj13)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(sj13 >= -0x1000 && sj13 <= 0xfff);
+    return encode_dj_slots(opc, vd & 0x1f, sj13 & 0x1fff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvj_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, vd & 0x1f, vj & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjsk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(sk5 >= -0x10 && sk5 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, sk5 & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk5 <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk5);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk6_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk6 <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk6);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk7_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk7 <= 0x7f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk7);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk8_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk8 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk8);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvkva_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk,
+                     TCGReg va)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    tcg_debug_assert(va >= 0x20 && va <= 0x3f);
+    return encode_djka_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f, va & 0x1f);
+}
+
 /* Emits the `clz.w d, j` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_clz_w(TCGContext *s, TCGReg d, TCGReg j)
@@ -711,6 +1740,384 @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
     tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
 }
 
+/* Emits the `vfmadd.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfmadd.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfmsub.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfmsub.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmadd.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmadd.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmsub.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmsub.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfcmp.caf.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_caf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.saf.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_saf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.clt.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_clt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.slt.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_slt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.ceq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_ceq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.seq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_seq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cle.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sle.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cun.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sun.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cult.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sult.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cueq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sueq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cule.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sule.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cne.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sne.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cor.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sor.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cune.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sune.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.caf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_caf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.saf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_saf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.clt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_clt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.slt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_slt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.ceq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_ceq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.seq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_seq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vbitsel.v vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitsel_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VBITSEL_V, vd, vj, vk, va));
+}
+
+/* Emits the `vshuf.b vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VSHUF_B, vd, vj, vk, va));
+}
+
 /* Emits the `addu16i.d d, j, sk16` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
@@ -837,6 +2244,80 @@ tcg_out_opc_ld_wu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
     tcg_out32(s, encode_djsk12_insn(OPC_LD_WU, d, j, sk12));
 }
 
+/* Emits the `vld vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vld(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLD, vd, j, sk12));
+}
+
+/* Emits the `vst vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vst(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VST, vd, j, sk12));
+}
+
+/* Emits the `vldrepl.d vd, j, sk9` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_out32(s, encode_vdjsk9_insn(OPC_VLDREPL_D, vd, j, sk9));
+}
+
+/* Emits the `vldrepl.w vd, j, sk10` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_out32(s, encode_vdjsk10_insn(OPC_VLDREPL_W, vd, j, sk10));
+}
+
+/* Emits the `vldrepl.h vd, j, sk11` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_out32(s, encode_vdjsk11_insn(OPC_VLDREPL_H, vd, j, sk11));
+}
+
+/* Emits the `vldrepl.b vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLDREPL_B, vd, j, sk12));
+}
+
+/* Emits the `vstelm.d vd, j, sk8, un1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un1)
+{
+    tcg_out32(s, encode_vdjsk8un1_insn(OPC_VSTELM_D, vd, j, sk8, un1));
+}
+
+/* Emits the `vstelm.w vd, j, sk8, un2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un2)
+{
+    tcg_out32(s, encode_vdjsk8un2_insn(OPC_VSTELM_W, vd, j, sk8, un2));
+}
+
+/* Emits the `vstelm.h vd, j, sk8, un3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un3)
+{
+    tcg_out32(s, encode_vdjsk8un3_insn(OPC_VSTELM_H, vd, j, sk8, un3));
+}
+
+/* Emits the `vstelm.b vd, j, sk8, un4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un4)
+{
+    tcg_out32(s, encode_vdjsk8un4_insn(OPC_VSTELM_B, vd, j, sk8, un4));
+}
+
 /* Emits the `ldx.b d, j, k` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_ldx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
@@ -914,6 +2395,20 @@ tcg_out_opc_ldx_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
     tcg_out32(s, encode_djk_insn(OPC_LDX_WU, d, j, k));
 }
 
+/* Emits the `vldx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VLDX, vd, j, k));
+}
+
+/* Emits the `vstx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VSTX, vd, j, k));
+}
+
 /* Emits the `dbar ud15` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_dbar(TCGContext *s, uint32_t ud15)
@@ -984,4 +2479,4526 @@ tcg_out_opc_bleu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
     tcg_out32(s, encode_djsk16_insn(OPC_BLEU, d, j, sk16));
 }
 
+/* Emits the `vseq.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_B, vd, vj, vk));
+}
+
+/* Emits the `vseq.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_H, vd, vj, vk));
+}
+
+/* Emits the `vseq.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_W, vd, vj, vk));
+}
+
+/* Emits the `vseq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_B, vd, vj, vk));
+}
+
+/* Emits the `vsle.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_H, vd, vj, vk));
+}
+
+/* Emits the `vsle.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_W, vd, vj, vk));
+}
+
+/* Emits the `vsle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_BU, vd, vj, vk));
+}
+
+/* Emits the `vsle.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_HU, vd, vj, vk));
+}
+
+/* Emits the `vsle.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_WU, vd, vj, vk));
+}
+
+/* Emits the `vsle.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_DU, vd, vj, vk));
+}
+
+/* Emits the `vslt.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_B, vd, vj, vk));
+}
+
+/* Emits the `vslt.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_H, vd, vj, vk));
+}
+
+/* Emits the `vslt.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_W, vd, vj, vk));
+}
+
+/* Emits the `vslt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_D, vd, vj, vk));
+}
+
+/* Emits the `vslt.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_BU, vd, vj, vk));
+}
+
+/* Emits the `vslt.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_HU, vd, vj, vk));
+}
+
+/* Emits the `vslt.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_WU, vd, vj, vk));
+}
+
+/* Emits the `vslt.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_DU, vd, vj, vk));
+}
+
+/* Emits the `vadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_B, vd, vj, vk));
+}
+
+/* Emits the `vadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_W, vd, vj, vk));
+}
+
+/* Emits the `vadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_D, vd, vj, vk));
+}
+
+/* Emits the `vsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_B, vd, vj, vk));
+}
+
+/* Emits the `vsadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_H, vd, vj, vk));
+}
+
+/* Emits the `vsadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_W, vd, vj, vk));
+}
+
+/* Emits the `vsadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_D, vd, vj, vk));
+}
+
+/* Emits the `vssub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vssub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vssub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vssub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_BU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_HU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_WU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_DU, vd, vj, vk));
+}
+
+/* Emits the `vssub.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_BU, vd, vj, vk));
+}
+
+/* Emits the `vssub.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_HU, vd, vj, vk));
+}
+
+/* Emits the `vssub.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_WU, vd, vj, vk));
+}
+
+/* Emits the `vssub.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_DU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vadda.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_B, vd, vj, vk));
+}
+
+/* Emits the `vadda.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_H, vd, vj, vk));
+}
+
+/* Emits the `vadda.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_W, vd, vj, vk));
+}
+
+/* Emits the `vadda.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_B, vd, vj, vk));
+}
+
+/* Emits the `vabsd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_H, vd, vj, vk));
+}
+
+/* Emits the `vabsd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_W, vd, vj, vk));
+}
+
+/* Emits the `vabsd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_BU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_HU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_WU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_DU, vd, vj, vk));
+}
+
+/* Emits the `vavg.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_B, vd, vj, vk));
+}
+
+/* Emits the `vavg.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_H, vd, vj, vk));
+}
+
+/* Emits the `vavg.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_W, vd, vj, vk));
+}
+
+/* Emits the `vavg.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_D, vd, vj, vk));
+}
+
+/* Emits the `vavg.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_BU, vd, vj, vk));
+}
+
+/* Emits the `vavg.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_HU, vd, vj, vk));
+}
+
+/* Emits the `vavg.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_WU, vd, vj, vk));
+}
+
+/* Emits the `vavg.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_DU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_B, vd, vj, vk));
+}
+
+/* Emits the `vavgr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_H, vd, vj, vk));
+}
+
+/* Emits the `vavgr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_W, vd, vj, vk));
+}
+
+/* Emits the `vavgr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_D, vd, vj, vk));
+}
+
+/* Emits the `vavgr.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_BU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_HU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_WU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_DU, vd, vj, vk));
+}
+
+/* Emits the `vmax.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_B, vd, vj, vk));
+}
+
+/* Emits the `vmax.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_H, vd, vj, vk));
+}
+
+/* Emits the `vmax.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_W, vd, vj, vk));
+}
+
+/* Emits the `vmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vmin.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_B, vd, vj, vk));
+}
+
+/* Emits the `vmin.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_H, vd, vj, vk));
+}
+
+/* Emits the `vmin.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_W, vd, vj, vk));
+}
+
+/* Emits the `vmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vmax.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_BU, vd, vj, vk));
+}
+
+/* Emits the `vmax.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_HU, vd, vj, vk));
+}
+
+/* Emits the `vmax.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_WU, vd, vj, vk));
+}
+
+/* Emits the `vmax.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_DU, vd, vj, vk));
+}
+
+/* Emits the `vmin.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_BU, vd, vj, vk));
+}
+
+/* Emits the `vmin.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_HU, vd, vj, vk));
+}
+
+/* Emits the `vmin.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_WU, vd, vj, vk));
+}
+
+/* Emits the `vmin.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_DU, vd, vj, vk));
+}
+
+/* Emits the `vmul.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_B, vd, vj, vk));
+}
+
+/* Emits the `vmul.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_H, vd, vj, vk));
+}
+
+/* Emits the `vmul.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_W, vd, vj, vk));
+}
+
+/* Emits the `vmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_B, vd, vj, vk));
+}
+
+/* Emits the `vmuh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_H, vd, vj, vk));
+}
+
+/* Emits the `vmuh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_W, vd, vj, vk));
+}
+
+/* Emits the `vmuh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_BU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_HU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_WU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_B, vd, vj, vk));
+}
+
+/* Emits the `vmadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_H, vd, vj, vk));
+}
+
+/* Emits the `vmadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_W, vd, vj, vk));
+}
+
+/* Emits the `vmadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_D, vd, vj, vk));
+}
+
+/* Emits the `vmsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vmsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vmsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vmsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_B, vd, vj, vk));
+}
+
+/* Emits the `vdiv.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_H, vd, vj, vk));
+}
+
+/* Emits the `vdiv.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_W, vd, vj, vk));
+}
+
+/* Emits the `vdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vmod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_B, vd, vj, vk));
+}
+
+/* Emits the `vmod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_H, vd, vj, vk));
+}
+
+/* Emits the `vmod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_W, vd, vj, vk));
+}
+
+/* Emits the `vmod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_BU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_HU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_WU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_DU, vd, vj, vk));
+}
+
+/* Emits the `vmod.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_BU, vd, vj, vk));
+}
+
+/* Emits the `vmod.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_HU, vd, vj, vk));
+}
+
+/* Emits the `vmod.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_WU, vd, vj, vk));
+}
+
+/* Emits the `vmod.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_DU, vd, vj, vk));
+}
+
+/* Emits the `vsll.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_B, vd, vj, vk));
+}
+
+/* Emits the `vsll.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_H, vd, vj, vk));
+}
+
+/* Emits the `vsll.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_W, vd, vj, vk));
+}
+
+/* Emits the `vsll.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_D, vd, vj, vk));
+}
+
+/* Emits the `vsrl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_B, vd, vj, vk));
+}
+
+/* Emits the `vsrl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_H, vd, vj, vk));
+}
+
+/* Emits the `vsrl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_W, vd, vj, vk));
+}
+
+/* Emits the `vsrl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_D, vd, vj, vk));
+}
+
+/* Emits the `vsra.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_B, vd, vj, vk));
+}
+
+/* Emits the `vsra.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_H, vd, vj, vk));
+}
+
+/* Emits the `vsra.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_W, vd, vj, vk));
+}
+
+/* Emits the `vsra.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_D, vd, vj, vk));
+}
+
+/* Emits the `vrotr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_B, vd, vj, vk));
+}
+
+/* Emits the `vrotr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_H, vd, vj, vk));
+}
+
+/* Emits the `vrotr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_W, vd, vj, vk));
+}
+
+/* Emits the `vrotr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrar.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrar.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrar.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrar.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_B, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_H, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_W, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_D, vd, vj, vk));
+}
+
+/* Emits the `vbitset.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_B, vd, vj, vk));
+}
+
+/* Emits the `vbitset.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_H, vd, vj, vk));
+}
+
+/* Emits the `vbitset.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_W, vd, vj, vk));
+}
+
+/* Emits the `vbitset.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_D, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_B, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_H, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_W, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpackev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpackev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpackev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpackod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpackod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpackod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vilvl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_B, vd, vj, vk));
+}
+
+/* Emits the `vilvl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_H, vd, vj, vk));
+}
+
+/* Emits the `vilvl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_W, vd, vj, vk));
+}
+
+/* Emits the `vilvl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_D, vd, vj, vk));
+}
+
+/* Emits the `vilvh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_B, vd, vj, vk));
+}
+
+/* Emits the `vilvh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_H, vd, vj, vk));
+}
+
+/* Emits the `vilvh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_W, vd, vj, vk));
+}
+
+/* Emits the `vilvh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_D, vd, vj, vk));
+}
+
+/* Emits the `vpickev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpickev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpickev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpickev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpickod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpickod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpickod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpickod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vreplve.b vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_B, vd, vj, k));
+}
+
+/* Emits the `vreplve.h vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_H, vd, vj, k));
+}
+
+/* Emits the `vreplve.w vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_W, vd, vj, k));
+}
+
+/* Emits the `vreplve.d vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_D, vd, vj, k));
+}
+
+/* Emits the `vand.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vand_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAND_V, vd, vj, vk));
+}
+
+/* Emits the `vor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VOR_V, vd, vj, vk));
+}
+
+/* Emits the `vxor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VXOR_V, vd, vj, vk));
+}
+
+/* Emits the `vnor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VNOR_V, vd, vj, vk));
+}
+
+/* Emits the `vandn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VANDN_V, vd, vj, vk));
+}
+
+/* Emits the `vorn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vorn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VORN_V, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_B, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_Q, vd, vj, vk));
+}
+
+/* Emits the `vsub.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_Q, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_B, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_H, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_W, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_D, vd, vj, vk));
+}
+
+/* Emits the `vfadd.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_S, vd, vj, vk));
+}
+
+/* Emits the `vfadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_D, vd, vj, vk));
+}
+
+/* Emits the `vfsub.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_S, vd, vj, vk));
+}
+
+/* Emits the `vfsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vfmul.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_S, vd, vj, vk));
+}
+
+/* Emits the `vfmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_S, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vfmax.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_S, vd, vj, vk));
+}
+
+/* Emits the `vfmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vfmin.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_S, vd, vj, vk));
+}
+
+/* Emits the `vfmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_D, vd, vj, vk));
+}
+
+/* Emits the `vfmina.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmina.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_D, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.h.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_h_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_H_S, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.s.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_s_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_S_D, vd, vj, vk));
+}
+
+/* Emits the `vffint.s.l vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_l(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFFINT_S_L, vd, vj, vk));
+}
+
+/* Emits the `vftint.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINT_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrm.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRM_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrp.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRP_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrz.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRZ_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrne.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRNE_W_D, vd, vj, vk));
+}
+
+/* Emits the `vshuf.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_H, vd, vj, vk));
+}
+
+/* Emits the `vshuf.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_W, vd, vj, vk));
+}
+
+/* Emits the `vshuf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_D, vd, vj, vk));
+}
+
+/* Emits the `vseqi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_B, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_H, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_W, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslei.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslei.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslei.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslti.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslti.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslti.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslti.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vbsll.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsll_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSLL_V, vd, vj, uk5));
+}
+
+/* Emits the `vbsrl.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsrl_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSRL_V, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmini.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmini.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmini.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmini.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.b vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_B, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.h vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_H, vd, vj, uk5));
+}
+
+/* Emits the `vclo.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_B, vd, vj));
+}
+
+/* Emits the `vclo.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_H, vd, vj));
+}
+
+/* Emits the `vclo.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_W, vd, vj));
+}
+
+/* Emits the `vclo.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_D, vd, vj));
+}
+
+/* Emits the `vclz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_B, vd, vj));
+}
+
+/* Emits the `vclz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_H, vd, vj));
+}
+
+/* Emits the `vclz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_W, vd, vj));
+}
+
+/* Emits the `vclz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_D, vd, vj));
+}
+
+/* Emits the `vpcnt.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_B, vd, vj));
+}
+
+/* Emits the `vpcnt.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_H, vd, vj));
+}
+
+/* Emits the `vpcnt.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_W, vd, vj));
+}
+
+/* Emits the `vpcnt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_D, vd, vj));
+}
+
+/* Emits the `vneg.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_B, vd, vj));
+}
+
+/* Emits the `vneg.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_H, vd, vj));
+}
+
+/* Emits the `vneg.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_W, vd, vj));
+}
+
+/* Emits the `vneg.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_D, vd, vj));
+}
+
+/* Emits the `vmskltz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_B, vd, vj));
+}
+
+/* Emits the `vmskltz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_H, vd, vj));
+}
+
+/* Emits the `vmskltz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_W, vd, vj));
+}
+
+/* Emits the `vmskltz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_D, vd, vj));
+}
+
+/* Emits the `vmskgez.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskgez_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKGEZ_B, vd, vj));
+}
+
+/* Emits the `vmsknz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsknz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKNZ_B, vd, vj));
+}
+
+/* Emits the `vseteqz.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseteqz_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETEQZ_V, cd, vj));
+}
+
+/* Emits the `vsetnez.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetnez_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETNEZ_V, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_B, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_H, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_W, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_D, cd, vj));
+}
+
+/* Emits the `vsetallnez.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_B, cd, vj));
+}
+
+/* Emits the `vsetallnez.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_H, cd, vj));
+}
+
+/* Emits the `vsetallnez.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_W, cd, vj));
+}
+
+/* Emits the `vsetallnez.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_D, cd, vj));
+}
+
+/* Emits the `vflogb.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_S, vd, vj));
+}
+
+/* Emits the `vflogb.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_D, vd, vj));
+}
+
+/* Emits the `vfclass.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_S, vd, vj));
+}
+
+/* Emits the `vfclass.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_D, vd, vj));
+}
+
+/* Emits the `vfsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_S, vd, vj));
+}
+
+/* Emits the `vfsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrecip.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_S, vd, vj));
+}
+
+/* Emits the `vfrecip.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_D, vd, vj));
+}
+
+/* Emits the `vfrsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_S, vd, vj));
+}
+
+/* Emits the `vfrsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrint.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_S, vd, vj));
+}
+
+/* Emits the `vfrint.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_D, vd, vj));
+}
+
+/* Emits the `vfrintrm.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_S, vd, vj));
+}
+
+/* Emits the `vfrintrm.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_D, vd, vj));
+}
+
+/* Emits the `vfrintrp.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_S, vd, vj));
+}
+
+/* Emits the `vfrintrp.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_D, vd, vj));
+}
+
+/* Emits the `vfrintrz.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_S, vd, vj));
+}
+
+/* Emits the `vfrintrz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_D, vd, vj));
+}
+
+/* Emits the `vfrintrne.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_S, vd, vj));
+}
+
+/* Emits the `vfrintrne.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_D, vd, vj));
+}
+
+/* Emits the `vfcvtl.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_S_H, vd, vj));
+}
+
+/* Emits the `vfcvth.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_S_H, vd, vj));
+}
+
+/* Emits the `vfcvtl.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_D_S, vd, vj));
+}
+
+/* Emits the `vfcvth.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_D_S, vd, vj));
+}
+
+/* Emits the `vffint.s.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_W, vd, vj));
+}
+
+/* Emits the `vffint.s.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_WU, vd, vj));
+}
+
+/* Emits the `vffint.d.l vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_l(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_L, vd, vj));
+}
+
+/* Emits the `vffint.d.lu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_lu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_LU, vd, vj));
+}
+
+/* Emits the `vffintl.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffintl_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTL_D_W, vd, vj));
+}
+
+/* Emits the `vffinth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffinth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTH_D_W, vd, vj));
+}
+
+/* Emits the `vftint.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_W_S, vd, vj));
+}
+
+/* Emits the `vftint.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_L_D, vd, vj));
+}
+
+/* Emits the `vftintrm.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_W_S, vd, vj));
+}
+
+/* Emits the `vftintrm.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_L_D, vd, vj));
+}
+
+/* Emits the `vftintrp.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_W_S, vd, vj));
+}
+
+/* Emits the `vftintrp.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_L_D, vd, vj));
+}
+
+/* Emits the `vftintrz.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_W_S, vd, vj));
+}
+
+/* Emits the `vftintrz.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_L_D, vd, vj));
+}
+
+/* Emits the `vftintrne.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_W_S, vd, vj));
+}
+
+/* Emits the `vftintrne.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_L_D, vd, vj));
+}
+
+/* Emits the `vftint.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_WU_S, vd, vj));
+}
+
+/* Emits the `vftint.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_LU_D, vd, vj));
+}
+
+/* Emits the `vftintrz.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_WU_S, vd, vj));
+}
+
+/* Emits the `vftintrz.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_LU_D, vd, vj));
+}
+
+/* Emits the `vftintl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTL_L_S, vd, vj));
+}
+
+/* Emits the `vftinth.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftinth_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrml.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrml_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRML_L_S, vd, vj));
+}
+
+/* Emits the `vftintrmh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrmh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRMH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrpl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrpl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrph.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrph_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrnel.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrnel_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrneh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrneh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEH_L_S, vd, vj));
+}
+
+/* Emits the `vexth.h.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_h_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_H_B, vd, vj));
+}
+
+/* Emits the `vexth.w.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_w_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_W_H, vd, vj));
+}
+
+/* Emits the `vexth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_D_W, vd, vj));
+}
+
+/* Emits the `vexth.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_Q_D, vd, vj));
+}
+
+/* Emits the `vexth.hu.bu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_HU_BU, vd, vj));
+}
+
+/* Emits the `vexth.wu.hu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_WU_HU, vd, vj));
+}
+
+/* Emits the `vexth.du.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_du_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_DU_WU, vd, vj));
+}
+
+/* Emits the `vexth.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_QU_DU, vd, vj));
+}
+
+/* Emits the `vreplgr2vr.b vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_B, vd, j));
+}
+
+/* Emits the `vreplgr2vr.h vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_H, vd, j));
+}
+
+/* Emits the `vreplgr2vr.w vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_W, vd, j));
+}
+
+/* Emits the `vreplgr2vr.d vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_D, vd, j));
+}
+
+/* Emits the `vrotri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VROTRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vrotri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VROTRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vrotri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VROTRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vrotri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VROTRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrlri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrari.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRARI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrari.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrari.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrari.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARI_D, vd, vj, uk6));
+}
+
+/* Emits the `vinsgr2vr.b vd, j, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdjuk4_insn(OPC_VINSGR2VR_B, vd, j, uk4));
+}
+
+/* Emits the `vinsgr2vr.h vd, j, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdjuk3_insn(OPC_VINSGR2VR_H, vd, j, uk3));
+}
+
+/* Emits the `vinsgr2vr.w vd, j, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdjuk2_insn(OPC_VINSGR2VR_W, vd, j, uk2));
+}
+
+/* Emits the `vinsgr2vr.d vd, j, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdjuk1_insn(OPC_VINSGR2VR_D, vd, j, uk1));
+}
+
+/* Emits the `vpickve2gr.b d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_b(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_B, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.h d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_h(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_H, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.w d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_w(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_W, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.d d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_d(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_D, d, vj, uk1));
+}
+
+/* Emits the `vpickve2gr.bu d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_bu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_BU, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.hu d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_hu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_HU, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.wu d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_wu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_WU, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.du d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_du(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_DU, d, vj, uk1));
+}
+
+/* Emits the `vreplvei.b vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VREPLVEI_B, vd, vj, uk4));
+}
+
+/* Emits the `vreplvei.h vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VREPLVEI_H, vd, vj, uk3));
+}
+
+/* Emits the `vreplvei.w vd, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdvjuk2_insn(OPC_VREPLVEI_W, vd, vj, uk2));
+}
+
+/* Emits the `vreplvei.d vd, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdvjuk1_insn(OPC_VREPLVEI_D, vd, vj, uk1));
+}
+
+/* Emits the `vsllwil.h.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_h_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_H_B, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.w.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_w_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_W_H, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.d.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_d_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_D_W, vd, vj, uk5));
+}
+
+/* Emits the `vextl.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_Q_D, vd, vj));
+}
+
+/* Emits the `vsllwil.hu.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_HU_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.wu.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_WU_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.du.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_DU_WU, vd, vj, uk5));
+}
+
+/* Emits the `vextl.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_QU_DU, vd, vj));
+}
+
+/* Emits the `vbitclri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITCLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitclri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITCLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitclri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITCLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitclri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITCLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitseti.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITSETI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitseti.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITSETI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitseti.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITSETI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitseti.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITSETI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitrevi.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITREVI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitrevi.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITREVI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitrevi.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITREVI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitrevi.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITREVI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_B, vd, vj, uk3));
+}
+
+/* Emits the `vsat.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_H, vd, vj, uk4));
+}
+
+/* Emits the `vsat.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_W, vd, vj, uk5));
+}
+
+/* Emits the `vsat.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsat.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsat.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsat.du vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_DU, vd, vj, uk6));
+}
+
+/* Emits the `vslli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vslli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vslli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vslli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSLLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrai.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRAI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrai.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRAI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrai.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRAI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrai.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRAI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrlrni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vextrins.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_D, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_W, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_H, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_H, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_W, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_D, vd, vj, uk8));
+}
+
+/* Emits the `vbitseli.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VBITSELI_B, vd, vj, uk8));
+}
+
+/* Emits the `vandi.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VANDI_B, vd, vj, uk8));
+}
+
+/* Emits the `vori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vxori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VXORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vnori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VNORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vldi vd, sj13` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldi(TCGContext *s, TCGReg vd, int32_t sj13)
+{
+    tcg_out32(s, encode_vdsj13_insn(OPC_VLDI, vd, sj13));
+}
+
+/* Emits the `vpermi.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpermi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VPERMI_W, vd, vj, uk8));
+}
+
 /* End of generated code.  */
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index c2bde44613..77d62e38e7 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,7 +17,11 @@
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
+C_O0_I2(w, r)
+C_O0_I3(r, r, r)
 C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
@@ -29,4 +33,9 @@ C_O1_I2(r, 0, rZ)
 C_O1_I2(r, rZ, ri)
 C_O1_I2(r, rZ, rJ)
 C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wM)
+C_O1_I2(w, w, wA)
+C_O1_I3(w, w, w, w)
 C_O1_I4(r, rZ, rJ, rZ, rZ)
+C_O2_I1(r, r, r)
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
index 6e9ccca3ad..2ba9c135ac 100644
--- a/tcg/loongarch64/tcg-target-con-str.h
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -14,6 +14,7 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
+REGS('w', ALL_VECTOR_REGS)
 
 /*
  * Define constraint letters for constants:
@@ -25,3 +26,5 @@ CONST('U', TCG_CT_CONST_U12)
 CONST('Z', TCG_CT_CONST_ZERO)
 CONST('C', TCG_CT_CONST_C12)
 CONST('W', TCG_CT_CONST_WSZ)
+CONST('M', TCG_CT_CONST_VCMP)
+CONST('A', TCG_CT_CONST_VADD)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index baf5fc3819..b701df50db 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -32,6 +32,8 @@
 #include "../tcg-ldst.c.inc"
 #include <asm/hwcap.h>
 
+bool use_lsx_instructions;
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "zero",
@@ -65,7 +67,39 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "s5",
     "s6",
     "s7",
-    "s8"
+    "s8",
+    "vr0",
+    "vr1",
+    "vr2",
+    "vr3",
+    "vr4",
+    "vr5",
+    "vr6",
+    "vr7",
+    "vr8",
+    "vr9",
+    "vr10",
+    "vr11",
+    "vr12",
+    "vr13",
+    "vr14",
+    "vr15",
+    "vr16",
+    "vr17",
+    "vr18",
+    "vr19",
+    "vr20",
+    "vr21",
+    "vr22",
+    "vr23",
+    "vr24",
+    "vr25",
+    "vr26",
+    "vr27",
+    "vr28",
+    "vr29",
+    "vr30",
+    "vr31",
 };
 #endif
 
@@ -102,6 +136,15 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_A2,
     TCG_REG_A1,
     TCG_REG_A0,
+
+    /* Vector registers */
+    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    /* V24 - V31 are caller-saved, and skipped.  */
 };
 
 static const int tcg_target_call_iarg_regs[] = {
@@ -133,8 +176,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define TCG_CT_CONST_U12   0x800
 #define TCG_CT_CONST_C12   0x1000
 #define TCG_CT_CONST_WSZ   0x2000
+#define TCG_CT_CONST_VCMP  0x4000
+#define TCG_CT_CONST_VADD  0x8000
 
 #define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
+#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
 
 static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 {
@@ -142,7 +188,7 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return true;
@@ -165,6 +211,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
     if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
         return true;
     }
+    int64_t vec_val = sextract64(val, 0, 8 << vece);
+    if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
     return false;
 }
 
@@ -1028,6 +1081,48 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     }
 }
 
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi,
+                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
+{
+    TCGLabelQemuLdst *ldst;
+    HostAddress h;
+
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
+
+    if (h.aa.atom == MO_128) {
+        /*
+         * Use VLDX/VSTX when 128-bit atomicity is required.
+         * If address is aligned to 16-bytes, the 128-bit load/store is atomic.
+         */
+        if (is_ld) {
+            tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
+            tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
+            tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
+        } else {
+            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
+            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
+            tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
+        }
+    } else {
+        /* Otherwise use a pair of LD/ST. */
+        tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index);
+        if (is_ld) {
+            tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0);
+            tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8);
+        } else {
+            tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0);
+            tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8);
+        }
+    }
+
+    if (ldst) {
+        ldst->type = TCG_TYPE_I128;
+        ldst->datalo_reg = data_lo;
+        ldst->datahi_reg = data_hi;
+        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+    }
+}
+
 /*
  * Entry-points
  */
@@ -1092,6 +1187,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     TCGArg a0 = args[0];
     TCGArg a1 = args[1];
     TCGArg a2 = args[2];
+    TCGArg a3 = args[3];
     int c2 = const_args[2];
 
     switch (opc) {
@@ -1454,6 +1550,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld_a64_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
+        break;
     case INDEX_op_qemu_st_a32_i32:
     case INDEX_op_qemu_st_a64_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
@@ -1462,6 +1562,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_st_a64_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
+        break;
 
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
@@ -1486,6 +1590,444 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                            TCGReg rd, TCGReg rs)
+{
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vreplgr2vr_b(s, rd, rs);
+        break;
+    case MO_16:
+        tcg_out_opc_vreplgr2vr_h(s, rd, rs);
+        break;
+    case MO_32:
+        tcg_out_opc_vreplgr2vr_w(s, rd, rs);
+        break;
+    case MO_64:
+        tcg_out_opc_vreplgr2vr_d(s, rd, rs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg r, TCGReg base, intptr_t offset)
+{
+    /* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
+    if (offset < -0x800 || offset > 0x7ff || \
+        (offset & ((1 << vece) - 1)) != 0) {
+        tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
+        base = TCG_REG_TMP0;
+        offset = 0;
+    }
+    offset >>= vece;
+
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vldrepl_b(s, r, base, offset);
+        break;
+    case MO_16:
+        tcg_out_opc_vldrepl_h(s, r, base, offset);
+        break;
+    case MO_32:
+        tcg_out_opc_vldrepl_w(s, r, base, offset);
+        break;
+    case MO_64:
+        tcg_out_opc_vldrepl_d(s, r, base, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg rd, int64_t v64)
+{
+    /* Try vldi if imm can fit */
+    int64_t value = sextract64(v64, 0, 8 << vece);
+    if (-0x200 <= value && value <= 0x1FF) {
+        uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
+        tcg_out_opc_vldi(s, rd, imm);
+        return;
+    }
+
+    /* TODO: vldi patterns when imm 12 is set */
+
+    /* Fallback to vreplgr2vr */
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_16:
+        tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_32:
+        tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_64:
+        tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
+                               const TCGArg a1, const TCGArg a2,
+                               bool a2_is_const, bool is_add)
+{
+    static const LoongArchInsn add_vec_insn[4] = {
+        OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+    };
+    static const LoongArchInsn add_vec_imm_insn[4] = {
+        OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+    };
+    static const LoongArchInsn sub_vec_insn[4] = {
+        OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+    };
+    static const LoongArchInsn sub_vec_imm_insn[4] = {
+        OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+    };
+
+    if (a2_is_const) {
+        int64_t value = sextract64(a2, 0, 8 << vece);
+        if (!is_add) {
+            value = -value;
+        }
+
+        /* Try vaddi/vsubi */
+        if (0 <= value && value <= 0x1f) {
+            tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
+                                             a1, value));
+            return;
+        } else if (-0x1f <= value && value < 0) {
+            tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
+                                             a1, -value));
+            return;
+        }
+
+        /* constraint TCG_CT_CONST_VADD ensures unreachable */
+        g_assert_not_reached();
+    }
+
+    if (is_add) {
+        tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+    } else {
+        tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+    }
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+                           unsigned vecl, unsigned vece,
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGType type = vecl + TCG_TYPE_V64;
+    TCGArg a0, a1, a2, a3;
+    TCGReg temp = TCG_REG_TMP0;
+    TCGReg temp_vec = TCG_VEC_TMP0;
+
+    static const LoongArchInsn cmp_vec_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
+        [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
+        [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
+        [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
+        [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+    };
+    static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
+        [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
+        [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
+        [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
+        [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
+    };
+    LoongArchInsn insn;
+    static const LoongArchInsn neg_vec_insn[4] = {
+        OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+    };
+    static const LoongArchInsn mul_vec_insn[4] = {
+        OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+    };
+    static const LoongArchInsn smin_vec_insn[4] = {
+        OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+    };
+    static const LoongArchInsn umin_vec_insn[4] = {
+        OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+    };
+    static const LoongArchInsn smax_vec_insn[4] = {
+        OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+    };
+    static const LoongArchInsn umax_vec_insn[4] = {
+        OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+    };
+    static const LoongArchInsn ssadd_vec_insn[4] = {
+        OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
+    };
+    static const LoongArchInsn usadd_vec_insn[4] = {
+        OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
+    };
+    static const LoongArchInsn sssub_vec_insn[4] = {
+        OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
+    };
+    static const LoongArchInsn ussub_vec_insn[4] = {
+        OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
+    };
+    static const LoongArchInsn shlv_vec_insn[4] = {
+        OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
+    };
+    static const LoongArchInsn shrv_vec_insn[4] = {
+        OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
+    };
+    static const LoongArchInsn sarv_vec_insn[4] = {
+        OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
+    };
+    static const LoongArchInsn shli_vec_insn[4] = {
+        OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
+    };
+    static const LoongArchInsn shri_vec_insn[4] = {
+        OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
+    };
+    static const LoongArchInsn sari_vec_insn[4] = {
+        OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
+    };
+    static const LoongArchInsn rotrv_vec_insn[4] = {
+        OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
+    };
+
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    a3 = args[3];
+
+    /* Currently only supports V128 */
+    tcg_debug_assert(type == TCG_TYPE_V128);
+
+    switch (opc) {
+    case INDEX_op_st_vec:
+        /* Try to fit vst imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vst(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vstx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_ld_vec:
+        /* Try to fit vld imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vld(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vldx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_and_vec:
+        tcg_out_opc_vand_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_andc_vec:
+        /*
+         * vandn vd, vj, vk: vd = vk & ~vj
+         * andc_vec vd, vj, vk: vd = vj & ~vk
+         * vk and vk are swapped
+         */
+        tcg_out_opc_vandn_v(s, a0, a2, a1);
+        break;
+    case INDEX_op_or_vec:
+        tcg_out_opc_vor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_orc_vec:
+        tcg_out_opc_vorn_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_xor_vec:
+        tcg_out_opc_vxor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_nor_vec:
+        tcg_out_opc_vnor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_not_vec:
+        tcg_out_opc_vnor_v(s, a0, a1, a1);
+        break;
+    case INDEX_op_cmp_vec:
+        TCGCond cond = args[3];
+        if (const_args[2]) {
+            /*
+             * cmp_vec dest, src, value
+             * Try vseqi/vslei/vslti
+             */
+            int64_t value = sextract64(a2, 0, 8 << vece);
+            if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
+                 cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
+                tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                 a0, a1, value));
+                break;
+            } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
+                (0x00 <= value && value <= 0x1f)) {
+                tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                 a0, a1, value));
+                break;
+            }
+
+            /*
+             * Fallback to:
+             * dupi_vec temp, a2
+             * cmp_vec a0, a1, temp, cond
+             */
+            tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
+            a2 = temp_vec;
+        }
+
+        insn = cmp_vec_insn[cond][vece];
+        if (insn == 0) {
+            TCGArg t;
+            t = a1, a1 = a2, a2 = t;
+            cond = tcg_swap_cond(cond);
+            insn = cmp_vec_insn[cond][vece];
+            tcg_debug_assert(insn != 0);
+        }
+        tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+        break;
+    case INDEX_op_add_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+        break;
+    case INDEX_op_sub_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+        break;
+    case INDEX_op_neg_vec:
+        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
+        break;
+    case INDEX_op_mul_vec:
+        tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ssadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_usadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sssub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ussub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shlv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shrv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sarv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shli_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shri_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sari_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_rotrv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_rotlv_vec:
+        /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
+        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
+        tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1,
+                                        temp_vec));
+        break;
+    case INDEX_op_rotli_vec:
+        /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
+        a2 = extract32(-a2, 0, 3 + vece);
+        switch (vece) {
+        case MO_8:
+            tcg_out_opc_vrotri_b(s, a0, a1, a2);
+            break;
+        case MO_16:
+            tcg_out_opc_vrotri_h(s, a0, a1, a2);
+            break;
+        case MO_32:
+            tcg_out_opc_vrotri_w(s, a0, a1, a2);
+            break;
+        case MO_64:
+            tcg_out_opc_vrotri_d(s, a0, a1, a2);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+    case INDEX_op_bitsel_vec:
+        /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
+        tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
+        break;
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+    switch (opc) {
+    case INDEX_op_ld_vec:
+    case INDEX_op_st_vec:
+    case INDEX_op_dup_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_cmp_vec:
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_bitsel_vec:
+        return 1;
+    default:
+        return 0;
+    }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+                       TCGArg a0, ...)
+{
+    g_assert_not_reached();
+}
+
 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
     switch (op) {
@@ -1505,6 +2047,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_qemu_st_a64_i64:
         return C_O0_I2(rZ, r);
 
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        return C_O2_I1(r, r, r);
+
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        return C_O0_I3(r, r, r);
+
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rZ);
@@ -1627,6 +2177,54 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_movcond_i64:
         return C_O1_I4(r, rZ, rJ, rZ, rZ);
 
+    case INDEX_op_ld_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_dup_vec:
+        return C_O1_I1(w, r);
+
+    case INDEX_op_st_vec:
+        return C_O0_I2(w, r);
+
+    case INDEX_op_cmp_vec:
+        return C_O1_I2(w, w, wM);
+
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+        return C_O1_I2(w, w, wA);
+
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_rotrv_vec:
+    case INDEX_op_rotlv_vec:
+        return C_O1_I2(w, w, w);
+
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_rotli_vec:
+        return C_O1_I1(w, w);
+
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(w, w, w, w);
+
     default:
         g_assert_not_reached();
     }
@@ -1698,6 +2296,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_target_init(TCGContext *s)
 {
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@@ -1708,6 +2311,10 @@ static void tcg_target_init(TCGContext *s)
         exit(EXIT_FAILURE);
     }
 
+    if (hwcap & HWCAP_LOONGARCH_LSX) {
+        use_lsx_instructions = 1;
+    }
+
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
     tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
@@ -1723,6 +2330,18 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
+    if (use_lsx_instructions) {
+        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
+    }
+
     s->reserved_regs = 0;
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
@@ -1731,6 +2350,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
+    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
 }
 
 typedef struct {
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 559be67186..03017672f6 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -30,7 +30,7 @@
 #define LOONGARCH_TCG_TARGET_H
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
 
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
@@ -68,13 +68,25 @@ typedef enum {
     TCG_REG_S7,
     TCG_REG_S8,
 
+    TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
     /* aliases */
     TCG_AREG0    = TCG_REG_S0,
     TCG_REG_TMP0 = TCG_REG_T8,
     TCG_REG_TMP1 = TCG_REG_T7,
     TCG_REG_TMP2 = TCG_REG_T6,
+    TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
+extern bool use_lsx_instructions;
+
 /* used for function call generation */
 #define TCG_REG_CALL_STACK              TCG_REG_SP
 #define TCG_TARGET_STACK_ALIGN          16
@@ -159,7 +171,31 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
+#define TCG_TARGET_HAS_qemu_ldst_i128   use_lsx_instructions
+
+#define TCG_TARGET_HAS_v64              0
+#define TCG_TARGET_HAS_v128             use_lsx_instructions
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          1
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_roti_vec         1
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
diff --git a/tcg/loongarch64/tcg-target.opc.h b/tcg/loongarch64/tcg-target.opc.h
new file mode 100644
index 0000000000..fd1a40b7fd
--- /dev/null
+++ b/tcg/loongarch64/tcg-target.opc.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2023 Jiajie Chen
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion.  These will be
+ * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
diff --git a/tcg/meson.build b/tcg/meson.build
index c0252c4198..0014dca7d4 100644
--- a/tcg/meson.build
+++ b/tcg/meson.build
@@ -22,7 +22,7 @@ if get_option('tcg_interpreter')
   tcg_ss.add(files('tci.c'))
 endif
 
-tcg_ss = tcg_ss.apply(config_host, strict: false)
+tcg_ss = tcg_ss.apply(config_targetos, strict: false)
 
 libtcg_user = static_library('tcg_user',
                              tcg_ss.sources() + genh,
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 9faa8bdf0b..f52bda4828 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -190,7 +190,7 @@ static bool is_p2m1(tcg_target_long val)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2628,6 +2628,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_target_init(TCGContext *s)
 {
     tcg_target_detect_isa();
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 090f11e71c..90d76c2c2c 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -261,7 +261,7 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2527,6 +2527,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
 {
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
diff --git a/tcg/region.c b/tcg/region.c
index 2b28ed3556..a078899096 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -33,8 +33,19 @@
 #include "tcg/tcg.h"
 #include "exec/translation-block.h"
 #include "tcg-internal.h"
+#include "host/cpuinfo.h"
 
 
+/*
+ * Local source-level compatibility with Unix.
+ * Used by tcg_region_init below.
+ */
+#if defined(_WIN32)
+#define PROT_READ   1
+#define PROT_WRITE  2
+#define PROT_EXEC   4
+#endif
+
 struct tcg_region_tree {
     QemuMutex lock;
     QTree *tree;
@@ -83,6 +94,18 @@ bool in_code_gen_buffer(const void *p)
     return (size_t)(p - region.start_aligned) <= region.total_size;
 }
 
+#ifndef CONFIG_TCG_INTERPRETER
+static int host_prot_read_exec(void)
+{
+#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)
+    if (cpuinfo & CPUINFO_BTI) {
+        return PROT_READ | PROT_EXEC | PROT_BTI;
+    }
+#endif
+    return PROT_READ | PROT_EXEC;
+}
+#endif
+
 #ifdef CONFIG_DEBUG_TCG
 const void *tcg_splitwx_to_rx(void *rw)
 {
@@ -505,14 +528,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
     return PROT_READ | PROT_WRITE;
 }
 #elif defined(_WIN32)
-/*
- * Local source-level compatibility with Unix.
- * Used by tcg_region_init below.
- */
-#define PROT_READ   1
-#define PROT_WRITE  2
-#define PROT_EXEC   4
-
 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 {
     void *buf;
@@ -567,7 +582,7 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
         goto fail;
     }
 
-    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+    buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);
     if (buf_rx == MAP_FAILED) {
         goto fail_rx;
     }
@@ -642,7 +657,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
         return -1;
     }
 
-    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+    if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {
         error_setg_errno(errp, errno, "mprotect for jit splitwx");
         munmap((void *)buf_rx, size);
         munmap((void *)buf_rw, size);
@@ -805,7 +820,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
     need_prot = PROT_READ | PROT_WRITE;
 #ifndef CONFIG_TCG_INTERPRETER
     if (tcg_splitwx_diff == 0) {
-        need_prot |= PROT_EXEC;
+        need_prot |= host_prot_read_exec();
     }
 #endif
     for (size_t i = 0, n = region.n; i < n; i++) {
@@ -820,7 +835,11 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
             } else if (need_prot == (PROT_READ | PROT_WRITE)) {
                 rc = qemu_mprotect_rw(start, end - start);
             } else {
+#ifdef CONFIG_POSIX
+                rc = mprotect(start, end - start, need_prot);
+#else
                 g_assert_not_reached();
+#endif
             }
             if (rc) {
                 error_setg_errno(&error_fatal, errno,
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 9be81c1b7b..c2bcdea33f 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -145,7 +145,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define sextreg  sextract64
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2099,6 +2099,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static volatile sig_atomic_t got_sigill;
 
 static void sigill_handler(int signo, siginfo_t *si, void *data)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index ecd8aaf2a1..7552f63a05 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -540,7 +540,7 @@ static bool risbg_mask(uint64_t c)
 }
 
 /* Test if a constant matches the constraint. */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -3483,6 +3483,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     memset(p, 0x07, count * sizeof(tcg_insn_unit));
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 81a08bb6c5..01ac26c192 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -322,7 +322,7 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -962,6 +962,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_movi_s13(s, TCG_REG_O0, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     int i;
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index fbe62b31b8..40a69e6e6e 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -64,8 +64,8 @@ static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t)
     return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN);
 }
 #else
-extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
-extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
+TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
+TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
 #endif
 
 static inline TCGv_i64 TCGV128_LOW(TCGv_i128 t)
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index e260a07c61..41b1ae18e4 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -3846,6 +3846,155 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
     }
 }
 
+static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            uint32_t oprsz, uint32_t tysz, TCGType type,
+                            TCGCond cond, TCGv_vec c)
+{
+    TCGv_vec t0 = tcg_temp_new_vec(type);
+    TCGv_vec t1 = tcg_temp_new_vec(type);
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += tysz) {
+        tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+        tcg_gen_cmp_vec(cond, vece, t0, t1, c);
+        tcg_gen_st_vec(t0, cpu_env, dofs + i);
+    }
+}
+
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
+    static gen_helper_gvec_2i * const eq_fn[4] = {
+        gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
+        gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
+    };
+    static gen_helper_gvec_2i * const lt_fn[4] = {
+        gen_helper_gvec_lts8, gen_helper_gvec_lts16,
+        gen_helper_gvec_lts32, gen_helper_gvec_lts64
+    };
+    static gen_helper_gvec_2i * const le_fn[4] = {
+        gen_helper_gvec_les8, gen_helper_gvec_les16,
+        gen_helper_gvec_les32, gen_helper_gvec_les64
+    };
+    static gen_helper_gvec_2i * const ltu_fn[4] = {
+        gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
+        gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
+    };
+    static gen_helper_gvec_2i * const leu_fn[4] = {
+        gen_helper_gvec_leus8, gen_helper_gvec_leus16,
+        gen_helper_gvec_leus32, gen_helper_gvec_leus64
+    };
+    static gen_helper_gvec_2i * const * const fns[16] = {
+        [TCG_COND_EQ] = eq_fn,
+        [TCG_COND_LT] = lt_fn,
+        [TCG_COND_LE] = le_fn,
+        [TCG_COND_LTU] = ltu_fn,
+        [TCG_COND_LEU] = leu_fn,
+    };
+
+    TCGType type;
+
+    check_size_align(oprsz, maxsz, dofs | aofs);
+    check_overlap_2(dofs, aofs, maxsz);
+
+    if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+        do_dup(MO_8, dofs, oprsz, maxsz,
+               NULL, NULL, -(cond == TCG_COND_ALWAYS));
+        return;
+    }
+
+    /*
+     * Implement inline with a vector type, if possible.
+     * Prefer integer when 64-bit host and 64-bit comparison.
+     */
+    type = choose_vector_type(cmp_list, vece, oprsz,
+                              TCG_TARGET_REG_BITS == 64 && vece == MO_64);
+    if (type != 0) {
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
+        TCGv_vec t_vec = tcg_temp_new_vec(type);
+        uint32_t some;
+
+        tcg_gen_dup_i64_vec(vece, t_vec, c);
+        switch (type) {
+        case TCG_TYPE_V256:
+            some = QEMU_ALIGN_DOWN(oprsz, 32);
+            expand_cmps_vec(vece, dofs, aofs, some, 32,
+                            TCG_TYPE_V256, cond, t_vec);
+            aofs += some;
+            dofs += some;
+            oprsz -= some;
+            maxsz -= some;
+            /* fallthru */
+
+        case TCG_TYPE_V128:
+            some = QEMU_ALIGN_DOWN(oprsz, 16);
+            expand_cmps_vec(vece, dofs, aofs, some, 16,
+                            TCG_TYPE_V128, cond, t_vec);
+            break;
+
+        case TCG_TYPE_V64:
+            some = QEMU_ALIGN_DOWN(oprsz, 8);
+            expand_cmps_vec(vece, dofs, aofs, some, 8,
+                            TCG_TYPE_V64, cond, t_vec);
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+        tcg_temp_free_vec(t_vec);
+        tcg_swap_vecop_list(hold_list);
+    } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+        TCGv_i64 t0 = tcg_temp_ebb_new_i64();
+        uint32_t i;
+
+        for (i = 0; i < oprsz; i += 8) {
+            tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+            tcg_gen_negsetcond_i64(cond, t0, t0, c);
+            tcg_gen_st_i64(t0, cpu_env, dofs + i);
+        }
+        tcg_temp_free_i64(t0);
+    } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+        TCGv_i32 t0 = tcg_temp_ebb_new_i32();
+        TCGv_i32 t1 = tcg_temp_ebb_new_i32();
+        uint32_t i;
+
+        tcg_gen_extrl_i64_i32(t1, c);
+        for (i = 0; i < oprsz; i += 8) {
+            tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+            tcg_gen_negsetcond_i32(cond, t0, t0, t1);
+            tcg_gen_st_i32(t0, cpu_env, dofs + i);
+        }
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    } else {
+        gen_helper_gvec_2i * const *fn = fns[cond];
+        bool inv = false;
+
+        if (fn == NULL) {
+            cond = tcg_invert_cond(cond);
+            fn = fns[cond];
+            assert(fn != NULL);
+            inv = true;
+        }
+        tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
+        return;
+    }
+
+    if (oprsz < maxsz) {
+        expand_clr(dofs + oprsz, maxsz - oprsz);
+    }
+}
+
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    TCGv_i64 tmp = tcg_constant_i64(c);
+    tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
 static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
 {
     TCGv_i64 t = tcg_temp_ebb_new_i64();
diff --git a/tcg/tcg.c b/tcg/tcg.c
index e0bc9caa6b..f63b7fb3e5 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -108,6 +108,7 @@ static void tcg_register_jit_int(const void *buf, size_t size,
     __attribute__((unused));
 
 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
+static void tcg_out_tb_start(TCGContext *s);
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
                        intptr_t arg2);
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -171,7 +172,7 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
                          const TCGHelperInfo *info);
 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
 #ifdef TCG_TARGET_NEED_LDST_LABELS
 static int tcg_out_ldst_finalize(TCGContext *s);
 #endif
@@ -4695,7 +4696,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         ts = arg_temp(arg);
 
         if (ts->val_type == TEMP_VAL_CONST
-            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
+            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
             /* constant is OK for instruction */
             const_args[i] = 1;
             new_args[i] = ts->val;
@@ -6020,6 +6021,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     s->gen_insn_data =
         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
 
+    tcg_out_tb_start(s);
+
     num_insns = -1;
     QTAILQ_FOREACH(op, &s->ops, link) {
         TCGOpcode opc = op->opc;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 253f27f174..461f4b47ff 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -913,7 +913,7 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 }
 
 /* Test if a constant matches the constraint. */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     return ct & TCG_CT_CONST;
 }
@@ -955,6 +955,11 @@ static inline void tcg_target_qemu_prologue(TCGContext *s)
 {
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 bool tcg_target_has_memory_bswap(MemOp memop)
 {
     return true;
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 985cda7a94..3898742659 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -73,7 +73,7 @@ $(TCG_TESTS_TARGETS:%=distclean-tcg-tests-%): distclean-tcg-tests-%:
 build-tcg: $(BUILD_TCG_TARGET_RULES)
 
 .PHONY: check-tcg
-.ninja-goals.check-tcg = all $(if $(CONFIG_PLUGIN),test-plugins)
+.ninja-goals.check-tcg = all
 check-tcg: $(RUN_TCG_TARGET_RULES)
 
 .PHONY: clean-tcg
diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index 3ed286dcbd..bb3f818689 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # group: rw quick
-# Exercize QEMU generated ACPI/SMBIOS tables using biosbits,
+# Exercise QEMU generated ACPI/SMBIOS tables using biosbits,
 # https://biosbits.org/
 #
 # This program is free software; you can redistribute it and/or modify
@@ -366,7 +366,7 @@ class AcpiBitsTest(QemuBaseTest): #pylint: disable=too-many-instance-attributes
         super().tearDown()
 
     def test_acpi_smbios_bits(self):
-        """The main test case implementaion."""
+        """The main test case implementation."""
 
         iso_file = os.path.join(self._workDir,
                                 'bits-%d.iso' %self._bitsInternalVer)
diff --git a/tests/avocado/acpi-bits/bits-tests/testacpi.py2 b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
index f818a9cce6..7bf9075c1b 100644
--- a/tests/avocado/acpi-bits/bits-tests/testacpi.py2
+++ b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
@@ -273,8 +273,8 @@ def test_rsdp():
 
     # Checksum the first 20 bytes per ACPI 1.0
     csum = sum(ord(c) for c in data[:20]) % 0x100
-    testsuite.test('ACPI 1.0 table first 20 bytes cummulative checksum must equal 0', csum == 0)
-    testsuite.print_detail("Cummulative checksum = {} (Expected 0)".format(csum))
+    testsuite.test('ACPI 1.0 table first 20 bytes cumulative checksum must equal 0', csum == 0)
+    testsuite.print_detail("Cumulative checksum = {} (Expected 0)".format(csum))
 
     test_table_checksum(data)
     rsdp = acpi.parse_rsdp()
diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py
index 33090903f1..0172a359b7 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -137,7 +137,7 @@ def _console_interaction(test, success_message, failure_message,
     assert not keep_sending or send_string
     if vm is None:
         vm = test.vm
-    console = vm.console_socket.makefile(mode='rb', encoding='utf-8')
+    console = vm.console_file
     console_logger = logging.getLogger('console')
     while True:
         if send_string:
diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py
index 6eab515718..01ee149812 100644
--- a/tests/avocado/boot_linux_console.py
+++ b/tests/avocado/boot_linux_console.py
@@ -116,6 +116,7 @@ class BootLinuxConsole(LinuxKernelTest):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.wait_for_console_pattern(console_pattern)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta(self):
         """
         :avocado: tags=arch:mips
@@ -138,6 +139,7 @@ class BootLinuxConsole(LinuxKernelTest):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.wait_for_console_pattern(console_pattern)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips64el_malta(self):
         """
         This test requires the ar tool to extract "data.tar.gz" from
@@ -191,6 +193,7 @@ class BootLinuxConsole(LinuxKernelTest):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.wait_for_console_pattern(console_pattern)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta_cpio(self):
         """
         :avocado: tags=arch:mips
@@ -232,6 +235,7 @@ class BootLinuxConsole(LinuxKernelTest):
         # Wait for VM to shut down gracefully
         self.vm.wait()
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     @skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
     def test_mips64el_malta_5KEc_cpio(self):
         """
@@ -292,6 +296,7 @@ class BootLinuxConsole(LinuxKernelTest):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.wait_for_console_pattern(console_pattern)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_4k(self):
         """
         :avocado: tags=arch:mipsel
@@ -305,6 +310,7 @@ class BootLinuxConsole(LinuxKernelTest):
         kernel_hash = '477456aafd2a0f1ddc9482727f20fe9575565dd6'
         self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_16k_up(self):
         """
         :avocado: tags=arch:mipsel
@@ -318,6 +324,7 @@ class BootLinuxConsole(LinuxKernelTest):
         kernel_hash = 'e882868f944c71c816e832e2303b7874d044a7bc'
         self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_64k_dbg(self):
         """
         :avocado: tags=arch:mipsel
diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 724ee72c02..90f1b7cb77 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -316,8 +316,8 @@ class AST2x00MachineSDK(QemuSystemTest, LinuxSSHMixIn):
         """
 
         image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'
-                     'download/v08.01/ast2500-default-obmc.tar.gz')
-        image_hash = ('5375f82b4c43a79427909342a1e18b4e48bd663e38466862145d27bb358796fd')
+                     'download/v08.06/ast2500-default-obmc.tar.gz')
+        image_hash = ('e1755f3cadff69190438c688d52dd0f0d399b70a1e14b1d3d5540fc4851d38ca')
         image_path = self.fetch_asset(image_url, asset_hash=image_hash,
                                       algorithm='sha256')
         archive.extract(image_path, self.workdir)
@@ -334,8 +334,8 @@ class AST2x00MachineSDK(QemuSystemTest, LinuxSSHMixIn):
         """
 
         image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'
-                     'download/v08.01/ast2600-default-obmc.tar.gz')
-        image_hash = ('f12ef15e8c1f03a214df3b91c814515c5e2b2f56119021398c1dbdd626817d15')
+                     'download/v08.06/ast2600-a2-obmc.tar.gz')
+        image_hash = ('9083506135f622d5e7351fcf7d4e1c7125cee5ba16141220c0ba88931f3681a4')
         image_path = self.fetch_asset(image_url, asset_hash=image_hash,
                                       algorithm='sha256')
         archive.extract(image_path, self.workdir)
@@ -345,8 +345,8 @@ class AST2x00MachineSDK(QemuSystemTest, LinuxSSHMixIn):
         self.vm.add_args('-device',
                          'ds1338,bus=aspeed.i2c.bus.5,address=0x32');
         self.do_test_arm_aspeed_sdk_start(
-            self.workdir + '/ast2600-default/image-bmc')
-        self.wait_for_console_pattern('nodistro.0 ast2600-default ttyS4')
+            self.workdir + '/ast2600-a2/image-bmc')
+        self.wait_for_console_pattern('nodistro.0 ast2600-a2 ttyS4')
 
         self.ssh_connect('root', '0penBmc', False)
         self.ssh_command('dmesg -c > /dev/null')
diff --git a/tests/avocado/machine_mips_malta.py b/tests/avocado/machine_mips_malta.py
index 92233451c5..3620266589 100644
--- a/tests/avocado/machine_mips_malta.py
+++ b/tests/avocado/machine_mips_malta.py
@@ -11,6 +11,7 @@ import os
 import gzip
 import logging
 
+from avocado import skip
 from avocado import skipIf
 from avocado import skipUnless
 from avocado.utils import archive
@@ -93,6 +94,7 @@ class MaltaMachineFramebuffer(QemuSystemTest):
             cv2.imwrite(debug_png, screendump_bgr)
         self.assertGreaterEqual(tuxlogo_count, cpu_cores_count)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta_i6400_framebuffer_logo_1core(self):
         """
         :avocado: tags=arch:mips64el
@@ -101,6 +103,7 @@ class MaltaMachineFramebuffer(QemuSystemTest):
         """
         self.do_test_i6400_framebuffer_logo(1)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
     def test_mips_malta_i6400_framebuffer_logo_7cores(self):
         """
@@ -111,6 +114,7 @@ class MaltaMachineFramebuffer(QemuSystemTest):
         """
         self.do_test_i6400_framebuffer_logo(7)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
     def test_mips_malta_i6400_framebuffer_logo_8cores(self):
         """
@@ -142,6 +146,7 @@ class MaltaMachine(QemuSystemTest):
         wait_for_console_pattern(self, prompt)
         self.vm.shutdown()
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mipsel_malta_yamon(self):
         """
         :avocado: tags=arch:mipsel
@@ -150,6 +155,7 @@ class MaltaMachine(QemuSystemTest):
         """
         self.do_test_yamon()
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips64el_malta_yamon(self):
         """
         :avocado: tags=arch:mips64el
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 79c607b0e7..f7ccfd2462 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -98,6 +98,7 @@ class ReplayKernelNormal(ReplayKernelBase):
 
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta(self):
         """
         :avocado: tags=arch:mips
@@ -116,6 +117,7 @@ class ReplayKernelNormal(ReplayKernelBase):
 
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips64el_malta(self):
         """
         This test requires the ar tool to extract "data.tar.gz" from
@@ -255,8 +257,7 @@ class ReplayKernelNormal(ReplayKernelBase):
         kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
 
         kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
-        # icount is not good enough for PPC64 for complete boot yet
-        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        console_pattern = 'VFS: Cannot open root device'
         self.run_rr(kernel_path, kernel_command_line, console_pattern)
 
     def test_ppc64_powernv(self):
@@ -432,6 +433,7 @@ class ReplayKernelSlow(ReplayKernelBase):
     # making it very slow.
     timeout = 180
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta_cpio(self):
         """
         :avocado: tags=arch:mips
@@ -461,6 +463,7 @@ class ReplayKernelSlow(ReplayKernelBase):
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5,
                     args=('-initrd', initrd_path))
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     @skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
     def test_mips64el_malta_5KEc_cpio(self):
         """
@@ -503,6 +506,7 @@ class ReplayKernelSlow(ReplayKernelBase):
         console_pattern = 'Kernel command line: %s' % kernel_command_line
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_4k(self):
         """
         :avocado: tags=arch:mipsel
@@ -517,6 +521,7 @@ class ReplayKernelSlow(ReplayKernelBase):
         kernel_path_xz = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
         self.do_test_mips_malta32el_nanomips(kernel_path_xz)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_16k_up(self):
         """
         :avocado: tags=arch:mipsel
@@ -531,6 +536,7 @@ class ReplayKernelSlow(ReplayKernelBase):
         kernel_path_xz = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
         self.do_test_mips_malta32el_nanomips(kernel_path_xz)
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips_malta32el_nanomips_64k_dbg(self):
         """
         :avocado: tags=arch:mipsel
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index 680c314cfc..fc47874eda 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -150,16 +150,33 @@ class ReverseDebugging(LinuxKernelTest):
             self.check_pc(g, addr)
             logger.info('found position %x' % addr)
 
-        logger.info('seeking to the end (icount %s)' % (last_icount - 1))
-        vm.qmp('replay-break', icount=last_icount - 1)
-        # continue - will return after pausing
-        g.cmd(b'c', b'T02thread:01;')
+        # visit the recorded instruction in forward order
+        logger.info('stepping forward')
+        for addr in steps:
+            self.check_pc(g, addr)
+            self.gdb_step(g)
+            logger.info('found position %x' % addr)
 
+        # set breakpoints for the instructions just stepped over
         logger.info('setting breakpoints')
         for addr in steps:
             # hardware breakpoint at addr with len=1
             g.cmd(b'Z1,%x,1' % addr, b'OK')
 
+        # this may hit a breakpoint if first instructions are executed
+        # again
+        logger.info('continuing execution')
+        vm.qmp('replay-break', icount=last_icount - 1)
+        # continue - will return after pausing
+        # This could stop at the end and get a T02 return, or by
+        # re-executing one of the breakpoints and get a T05 return.
+        g.cmd(b'c')
+        if self.vm_get_icount(vm) == last_icount - 1:
+            logger.info('reached the end (icount %s)' % (last_icount - 1))
+        else:
+            logger.info('hit a breakpoint again at %x (icount %s)' %
+                        (self.get_pc(g), self.vm_get_icount(vm)))
+
         logger.info('running reverse continue to reach %x' % steps[-1])
         # reverse continue - will return after stopping at the breakpoint
         g.cmd(b'bc', b'T05thread:01;')
@@ -216,3 +233,32 @@ class ReverseDebugging_AArch64(ReverseDebugging):
 
         self.reverse_debugging(
             args=('-kernel', kernel_path))
+
+class ReverseDebugging_ppc64(ReverseDebugging):
+    """
+    :avocado: tags=accel:tcg
+    """
+
+    REG_PC = 0x40
+
+    # unidentified gitlab timeout problem
+    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+    def test_ppc64_pseries(self):
+        """
+        :avocado: tags=arch:ppc64
+        :avocado: tags=machine:pseries
+        """
+        # SLOF branches back to its entry point, which causes this test
+        # to take the 'hit a breakpoint again' path. That's not a problem,
+        # just slightly different than the other machines.
+        self.endian_is_le = False
+        self.reverse_debugging()
+
+    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+    def test_ppc64_powernv(self):
+        """
+        :avocado: tags=arch:ppc64
+        :avocado: tags=machine:powernv
+        """
+        self.endian_is_le = False
+        self.reverse_debugging()
diff --git a/tests/avocado/tuxrun_baselines.py b/tests/avocado/tuxrun_baselines.py
index e12250eabb..610b7e2bfa 100644
--- a/tests/avocado/tuxrun_baselines.py
+++ b/tests/avocado/tuxrun_baselines.py
@@ -352,6 +352,7 @@ class TuxRunBaselineTest(QemuSystemTest):
 
         self.common_tuxrun(csums=sums, drive="virtio-blk-pci")
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips32(self):
         """
         :avocado: tags=arch:mips
@@ -370,6 +371,7 @@ class TuxRunBaselineTest(QemuSystemTest):
 
         self.common_tuxrun(csums=sums, drive="driver=ide-hd,bus=ide.0,unit=0")
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips32el(self):
         """
         :avocado: tags=arch:mipsel
@@ -387,6 +389,7 @@ class TuxRunBaselineTest(QemuSystemTest):
 
         self.common_tuxrun(csums=sums, drive="driver=ide-hd,bus=ide.0,unit=0")
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips64(self):
         """
         :avocado: tags=arch:mips64
@@ -404,6 +407,7 @@ class TuxRunBaselineTest(QemuSystemTest):
 
         self.common_tuxrun(csums=sums, drive="driver=ide-hd,bus=ide.0,unit=0")
 
+    @skip('https://gitlab.com/qemu-project/qemu/-/issues/1884')
     def test_mips64el(self):
         """
         :avocado: tags=arch:mips64el
diff --git a/tests/data/acpi/virt/SSDT.memhp b/tests/data/acpi/virt/SSDT.memhp
index ef93c44464..31ff6ac469 100644
Binary files a/tests/data/acpi/virt/SSDT.memhp and b/tests/data/acpi/virt/SSDT.memhp differ
diff --git a/tests/decode/err_pattern_group_ident2.decode b/tests/decode/err_pattern_group_ident2.decode
index bc859233b1..0abb7513e9 100644
--- a/tests/decode/err_pattern_group_ident2.decode
+++ b/tests/decode/err_pattern_group_ident2.decode
@@ -7,5 +7,5 @@
 {
   top      00000000 00000000 00000000 00000000
   sub1     00000000 00000000 00000000 ........ %sub1
-# comments are suposed to be indented
+# comments are supposed to be indented
 }
diff --git a/tests/docker/common.rc b/tests/docker/common.rc
index 9a33df2832..a611e6adf9 100755
--- a/tests/docker/common.rc
+++ b/tests/docker/common.rc
@@ -12,7 +12,7 @@
 # the top-level directory.
 
 # This might be set by ENV of a docker container... it is always
-# overriden by TARGET_LIST if the user sets it. We special case
+# overridden by TARGET_LIST if the user sets it. We special case
 # "none" to allow for other options like --disable-tcg to restrict the
 # builds we eventually do.
 if test "$DEF_TARGET_LIST" = "none"; then
diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker
index fa455f1474..d25649cb4f 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -59,6 +59,7 @@ RUN apk update && \
         libtasn1-dev \
         liburing-dev \
         libusb-dev \
+        libxdp-dev \
         linux-pam-dev \
         llvm \
         lttng-ust-dev \
diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker
index fc1830966f..68bfe606f5 100644
--- a/tests/docker/dockerfiles/centos8.docker
+++ b/tests/docker/dockerfiles/centos8.docker
@@ -75,6 +75,7 @@ RUN dnf distro-sync -y && \
         libubsan \
         liburing-devel \
         libusbx-devel \
+        libxdp-devel \
         libzstd-devel \
         llvm \
         lttng-ust-devel \
diff --git a/tests/docker/dockerfiles/debian-amd64-cross.docker b/tests/docker/dockerfiles/debian-amd64-cross.docker
index b66b9cc191..0991938595 100644
--- a/tests/docker/dockerfiles/debian-amd64-cross.docker
+++ b/tests/docker/dockerfiles/debian-amd64-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch x86_64 debian-11 qemu
+#  $ lcitool dockerfile --layers all --cross-arch x86_64 debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -47,11 +47,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -67,8 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \
     dpkg-reconfigure locales
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
@@ -84,7 +80,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-x86-64-linux-gnu \
                       gcc-x86-64-linux-gnu \
                       libaio-dev:amd64 \
-                      libasan5:amd64 \
+                      libasan6:amd64 \
                       libasound2-dev:amd64 \
                       libattr1-dev:amd64 \
                       libbpf-dev:amd64 \
@@ -146,6 +142,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev:amd64 \
                       libvirglrenderer-dev:amd64 \
                       libvte-2.91-dev:amd64 \
+                      libxdp-dev:amd64 \
                       libxen-dev:amd64 \
                       libzstd-dev:amd64 \
                       nettle-dev:amd64 \
diff --git a/tests/docker/dockerfiles/debian-amd64.docker b/tests/docker/dockerfiles/debian-amd64.docker
index 02262bc70e..61dbc3ff24 100644
--- a/tests/docker/dockerfiles/debian-amd64.docker
+++ b/tests/docker/dockerfiles/debian-amd64.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all debian-11 qemu
+#  $ lcitool dockerfile --layers all debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbpf-dev \
@@ -97,6 +97,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev \
                       libvirglrenderer-dev \
                       libvte-2.91-dev \
+                      libxdp-dev \
                       libxen-dev \
                       libzstd-dev \
                       llvm \
@@ -115,11 +116,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -145,8 +144,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \
     ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker
index a0a968b8c6..74eabb274e 100644
--- a/tests/docker/dockerfiles/debian-arm64-cross.docker
+++ b/tests/docker/dockerfiles/debian-arm64-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch aarch64 debian-11 qemu
+#  $ lcitool dockerfile --layers all --cross-arch aarch64 debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -47,11 +47,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -67,8 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \
     dpkg-reconfigure locales
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
@@ -84,7 +80,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-aarch64-linux-gnu \
                       gcc-aarch64-linux-gnu \
                       libaio-dev:arm64 \
-                      libasan5:arm64 \
+                      libasan6:arm64 \
                       libasound2-dev:arm64 \
                       libattr1-dev:arm64 \
                       libbpf-dev:arm64 \
@@ -145,6 +141,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev:arm64 \
                       libvirglrenderer-dev:arm64 \
                       libvte-2.91-dev:arm64 \
+                      libxdp-dev:arm64 \
                       libxen-dev:arm64 \
                       libzstd-dev:arm64 \
                       nettle-dev:arm64 \
diff --git a/tests/docker/dockerfiles/debian-armel-cross.docker b/tests/docker/dockerfiles/debian-armel-cross.docker
index f1fc34a28a..75342c09b0 100644
--- a/tests/docker/dockerfiles/debian-armel-cross.docker
+++ b/tests/docker/dockerfiles/debian-armel-cross.docker
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-arm-linux-gnueabi \
                       gcc-arm-linux-gnueabi \
                       libaio-dev:armel \
-                      libasan5:armel \
+                      libasan6:armel \
                       libasound2-dev:armel \
                       libattr1-dev:armel \
                       libbpf-dev:armel \
diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker
index a278578211..1ebd6ebd00 100644
--- a/tests/docker/dockerfiles/debian-armhf-cross.docker
+++ b/tests/docker/dockerfiles/debian-armhf-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch armv7l debian-11 qemu
+#  $ lcitool dockerfile --layers all --cross-arch armv7l debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -47,11 +47,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -67,8 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \
     dpkg-reconfigure locales
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
@@ -84,7 +80,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-arm-linux-gnueabihf \
                       gcc-arm-linux-gnueabihf \
                       libaio-dev:armhf \
-                      libasan5:armhf \
+                      libasan6:armhf \
                       libasound2-dev:armhf \
                       libattr1-dev:armhf \
                       libbpf-dev:armhf \
@@ -145,6 +141,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev:armhf \
                       libvirglrenderer-dev:armhf \
                       libvte-2.91-dev:armhf \
+                      libxdp-dev:armhf \
                       libxen-dev:armhf \
                       libzstd-dev:armhf \
                       nettle-dev:armhf \
diff --git a/tests/docker/dockerfiles/debian-loongarch-cross.docker b/tests/docker/dockerfiles/debian-loongarch-cross.docker
index 9d957547b5..b4bf265717 100644
--- a/tests/docker/dockerfiles/debian-loongarch-cross.docker
+++ b/tests/docker/dockerfiles/debian-loongarch-cross.docker
@@ -20,7 +20,7 @@ RUN apt-get update && \
         git \
         python3-minimal
 
-RUN curl -#SL https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-5.0-cross-tools-gcc-glibc.tar.xz \
+RUN curl -#SL https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz \
     | tar -xJC /opt
 
 ENV PATH $PATH:/opt/cross-tools/bin
diff --git a/tests/docker/dockerfiles/debian-ppc64el-cross.docker b/tests/docker/dockerfiles/debian-ppc64el-cross.docker
index 30e5efa986..59091fed02 100644
--- a/tests/docker/dockerfiles/debian-ppc64el-cross.docker
+++ b/tests/docker/dockerfiles/debian-ppc64el-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch ppc64le debian-11 qemu
+#  $ lcitool dockerfile --layers all --cross-arch ppc64le debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -47,11 +47,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -67,8 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \
     dpkg-reconfigure locales
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
@@ -84,7 +80,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-powerpc64le-linux-gnu \
                       gcc-powerpc64le-linux-gnu \
                       libaio-dev:ppc64el \
-                      libasan5:ppc64el \
+                      libasan6:ppc64el \
                       libasound2-dev:ppc64el \
                       libattr1-dev:ppc64el \
                       libbpf-dev:ppc64el \
@@ -145,6 +141,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev:ppc64el \
                       libvirglrenderer-dev:ppc64el \
                       libvte-2.91-dev:ppc64el \
+                      libxdp-dev:ppc64el \
                       libzstd-dev:ppc64el \
                       nettle-dev:ppc64el \
                       systemtap-sdt-dev:ppc64el \
diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker
index ee6db7b526..48b2f28310 100644
--- a/tests/docker/dockerfiles/debian-s390x-cross.docker
+++ b/tests/docker/dockerfiles/debian-s390x-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch s390x debian-11 qemu
+#  $ lcitool dockerfile --layers all --cross-arch s390x debian-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/debian:11-slim
+FROM docker.io/library/debian:12-slim
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
@@ -47,11 +47,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       python3-opencv \
                       python3-pillow \
                       python3-pip \
-                      python3-setuptools \
                       python3-sphinx \
                       python3-sphinx-rtd-theme \
                       python3-venv \
-                      python3-wheel \
                       python3-yaml \
                       rpm2cpio \
                       sed \
@@ -67,8 +65,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \
     dpkg-reconfigure locales
 
-RUN /usr/bin/pip3 install tomli
-
 ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
 ENV LANG "en_US.UTF-8"
 ENV MAKE "/usr/bin/make"
@@ -84,7 +80,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-s390x-linux-gnu \
                       gcc-s390x-linux-gnu \
                       libaio-dev:s390x \
-                      libasan5:s390x \
+                      libasan6:s390x \
                       libasound2-dev:s390x \
                       libattr1-dev:s390x \
                       libbpf-dev:s390x \
@@ -144,6 +140,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       libvdeplug-dev:s390x \
                       libvirglrenderer-dev:s390x \
                       libvte-2.91-dev:s390x \
+                      libxdp-dev:s390x \
                       libzstd-dev:s390x \
                       nettle-dev:s390x \
                       systemtap-sdt-dev:s390x \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index c5b6c96943..f00e9e267c 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -82,6 +82,7 @@ exec "$@"\n' > /usr/bin/nosync && \
                libubsan \
                liburing-devel \
                libusbx-devel \
+               libxdp-devel \
                libzstd-devel \
                llvm \
                lttng-ust-devel \
diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker
index fef8d5a2e4..ed04b4d6da 100644
--- a/tests/docker/dockerfiles/opensuse-leap.docker
+++ b/tests/docker/dockerfiles/opensuse-leap.docker
@@ -40,7 +40,7 @@ RUN zypper update -y && \
            libSDL2-devel \
            libSDL2_image-devel \
            libaio-devel \
-           libasan6 \
+           libasan8 \
            libattr-devel \
            libbpf-devel \
            libbz2-devel \
diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker
index 4180cd8674..d3e212060c 100644
--- a/tests/docker/dockerfiles/ubuntu2004.docker
+++ b/tests/docker/dockerfiles/ubuntu2004.docker
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbrlapi-dev \
diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker
index 88493f00f6..94c2c16118 100644
--- a/tests/docker/dockerfiles/ubuntu2204.docker
+++ b/tests/docker/dockerfiles/ubuntu2204.docker
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbpf-dev \
diff --git a/tests/lcitool/libvirt-ci b/tests/lcitool/libvirt-ci
index bbd55b4d18..e3ed1e5da1 160000
--- a/tests/lcitool/libvirt-ci
+++ b/tests/lcitool/libvirt-ci
@@ -1 +1 @@
-Subproject commit bbd55b4d18cce8f89b5167675e434a6941315634
+Subproject commit e3ed1e5da101943e53d8d89424e17b22120743f5
diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml
index 584f78cb7f..6f0885170d 100644
--- a/tests/lcitool/projects/qemu.yml
+++ b/tests/lcitool/projects/qemu.yml
@@ -69,6 +69,7 @@ packages:
  - liburing
  - libusbx
  - libvdeplug
+ - libxdp
  - libzstd
  - llvm
  - lttng-ust
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index 4584870ea1..92e7d30982 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -93,7 +93,7 @@ def generate_pkglist(vm, target):
 # Netmap still needs to be manually built as it is yet to be packaged
 # into a distro. We also add cscope and gtags which are used in the CI
 # test
-debian11_extras = [
+debian12_extras = [
     "# netmap/cscope/global\n",
     "RUN DEBIAN_FRONTEND=noninteractive eatmydata \\\n",
     "  apt install -y --no-install-recommends \\\n",
@@ -123,8 +123,8 @@ try:
     #
     generate_dockerfile("alpine", "alpine-318")
     generate_dockerfile("centos8", "centos-stream-8")
-    generate_dockerfile("debian-amd64", "debian-11",
-                        trailer="".join(debian11_extras))
+    generate_dockerfile("debian-amd64", "debian-12",
+                        trailer="".join(debian12_extras))
     generate_dockerfile("fedora", "fedora-38")
     generate_dockerfile("opensuse-leap", "opensuse-leap-15")
     generate_dockerfile("ubuntu2004", "ubuntu-2004")
@@ -133,24 +133,25 @@ try:
     #
     # Cross compiling builds
     #
-    generate_dockerfile("debian-amd64-cross", "debian-11",
+    generate_dockerfile("debian-amd64-cross", "debian-12",
                         cross="x86_64",
                         trailer=cross_build("x86_64-linux-gnu-",
                                             "x86_64-softmmu,"
                                             "x86_64-linux-user,"
                                             "i386-softmmu,i386-linux-user"))
 
-    generate_dockerfile("debian-arm64-cross", "debian-11",
+    generate_dockerfile("debian-arm64-cross", "debian-12",
                         cross="aarch64",
                         trailer=cross_build("aarch64-linux-gnu-",
                                             "aarch64-softmmu,aarch64-linux-user"))
 
+    # migration to bookworm stalled: https://lists.debian.org/debian-arm/2023/09/msg00006.html
     generate_dockerfile("debian-armel-cross", "debian-11",
                         cross="armv6l",
                         trailer=cross_build("arm-linux-gnueabi-",
                                             "arm-softmmu,arm-linux-user,armeb-linux-user"))
 
-    generate_dockerfile("debian-armhf-cross", "debian-11",
+    generate_dockerfile("debian-armhf-cross", "debian-12",
                         cross="armv7l",
                         trailer=cross_build("arm-linux-gnueabihf-",
                                             "arm-softmmu,arm-linux-user"))
@@ -165,7 +166,7 @@ try:
                         trailer=cross_build("mipsel-linux-gnu-",
                                             "mipsel-softmmu,mipsel-linux-user"))
 
-    generate_dockerfile("debian-ppc64el-cross", "debian-11",
+    generate_dockerfile("debian-ppc64el-cross", "debian-12",
                         cross="ppc64le",
                         trailer=cross_build("powerpc64le-linux-gnu-",
                                             "ppc64-softmmu,ppc64-linux-user"))
@@ -176,7 +177,7 @@ try:
                         trailer=cross_build("riscv64-linux-gnu-",
                                             "riscv64-softmmu,riscv64-linux-user"))
 
-    generate_dockerfile("debian-s390x-cross", "debian-11",
+    generate_dockerfile("debian-s390x-cross", "debian-12",
                         cross="s390x",
                         trailer=cross_build("s390x-linux-gnu-",
                                             "s390x-softmmu,s390x-linux-user"))
diff --git a/tests/meson.build b/tests/meson.build
index 083f2990bd..debaa4505e 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -68,7 +68,7 @@ test_deps = {
   'test-qht-par': qht_bench,
 }
 
-if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
+if have_tools and have_vhost_user and targetos == 'linux'
   executable('vhost-user-bridge',
              sources: files('vhost-user-bridge.c'),
              dependencies: [qemuutil, vhost_user])
@@ -80,10 +80,8 @@ if 'CONFIG_TCG' in config_all
   subdir('fp')
 endif
 
-if get_option('tcg').allowed()
-  if 'CONFIG_PLUGIN' in config_host
-    subdir('plugin')
-  endif
+if get_option('plugins')
+  subdir('plugin')
 endif
 
 subdir('unit')
diff --git a/tests/migration/guestperf-batch.py b/tests/migration/guestperf-batch.py
index ab6bdb9d38..9485eefe49 100755
--- a/tests/migration/guestperf-batch.py
+++ b/tests/migration/guestperf-batch.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# Migration test batch comparison invokation
+# Migration test batch comparison invocation
 #
 # Copyright (c) 2016 Red Hat, Inc.
 #
diff --git a/tests/migration/guestperf.py b/tests/migration/guestperf.py
index e8cc127fd0..07182f211e 100755
--- a/tests/migration/guestperf.py
+++ b/tests/migration/guestperf.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# Migration test direct invokation command
+# Migration test direct invocation command
 #
 # Copyright (c) 2016 Red Hat, Inc.
 #
diff --git a/tests/migration/meson.build b/tests/migration/meson.build
index ac71f13290..a91aa61c65 100644
--- a/tests/migration/meson.build
+++ b/tests/migration/meson.build
@@ -1,4 +1,4 @@
-sysprof = dependency('sysprof-capture-4', required: false)
+sysprof = dependency('sysprof-capture-4', method: 'pkg-config', required: false)
 glib_static = dependency('glib-2.0', version: glib_req_ver, required: false,
                          method: 'pkg-config', static: true)
 
diff --git a/tests/plugin/mem.c b/tests/plugin/mem.c
index f3b9f696a0..44e91065ba 100644
--- a/tests/plugin/mem.c
+++ b/tests/plugin/mem.c
@@ -98,7 +98,7 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
             } else if (g_strcmp0(tokens[1], "rw") == 0) {
                 rw = QEMU_PLUGIN_MEM_RW;
             } else {
-                fprintf(stderr, "invaild value for argument track: %s\n", opt);
+                fprintf(stderr, "invalid value for argument track: %s\n", opt);
                 return -1;
             }
         } else if (g_strcmp0(tokens[0], "inline") == 0) {
diff --git a/tests/qapi-schema/bad-if-not.json b/tests/qapi-schema/bad-if-not.json
index 9fdaacc47b..660fc4feb2 100644
--- a/tests/qapi-schema/bad-if-not.json
+++ b/tests/qapi-schema/bad-if-not.json
@@ -1,3 +1,3 @@
-# check 'if not' with empy argument
+# check 'if not' with empty argument
 { 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
   'if': { 'not': '' } }
diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029
index bd71dd2f22..7f4849b97b 100755
--- a/tests/qemu-iotests/029
+++ b/tests/qemu-iotests/029
@@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.filter
 . ./common.pattern
 
-# Any format supporting intenal snapshots
+# Any format supporting internal snapshots
 _supported_fmt qcow2
 _supported_proto generic
 # Internal snapshots are (currently) impossible with refcount_bits=1,
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 30eb97829e..5601a4873c 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -845,7 +845,7 @@ class TestCommitWithFilters(iotests.QMPTestCase):
         self.assertIsNone(self.vm.node_info('cow-2'))
         self.assertIsNotNone(self.vm.node_info('cow-1'))
 
-        # 2 has been comitted into 1
+        # 2 has been committed into 1
         self.pattern_files[2] = self.img1
 
     def test_commit_through_filter(self):
@@ -863,7 +863,7 @@ class TestCommitWithFilters(iotests.QMPTestCase):
         self.assertIsNone(self.vm.node_info('bottom-filter'))
         self.assertIsNotNone(self.vm.node_info('cow-0'))
 
-        # 1 has been comitted into 0
+        # 1 has been committed into 0
         self.pattern_files[1] = self.img0
 
     def test_filtered_active_commit_with_filter(self):
@@ -900,7 +900,7 @@ class TestCommitWithFilters(iotests.QMPTestCase):
         drv0 = next(dev for dev in blockdevs if dev['qdev'] == 'drv0')
         self.assertEqual(drv0['inserted']['node-name'], 'cow-2')
 
-        # 3 has been comitted into 2
+        # 3 has been committed into 2
         self.pattern_files[3] = self.img2
 
     def test_filtered_active_commit_without_filter(self):
@@ -916,7 +916,7 @@ class TestCommitWithFilters(iotests.QMPTestCase):
         self.assertIsNone(self.vm.node_info('cow-3'))
         self.assertIsNotNone(self.vm.node_info('cow-2'))
 
-        # 3 has been comitted into 2
+        # 3 has been committed into 2
         self.pattern_files[3] = self.img2
 
 class TestCommitWithOverriddenBacking(iotests.QMPTestCase):
diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
index 517b162508..4c9ed4d26e 100755
--- a/tests/qemu-iotests/046
+++ b/tests/qemu-iotests/046
@@ -125,7 +125,7 @@ aio_flush
 EOF
 
 # Sequential write, but the next cluster is already allocated
-# and phyiscally in the right position
+# and physically in the right position
 cat  <<EOF
 write -P 89 0x80000 0x1000
 write -P 90 0x96000 0x8000
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index 4d4af5a486..7e10c5fa1b 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -169,11 +169,11 @@ QEMU_PROG: -device scsi-hd,drive=disk: Device needs media, but drive is empty
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device ide-hd,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-QEMU_PROG: -device ide-hd,drive=disk,share-rw=on: Cannot change iothread of active block backend
+(qemu) QEMU_PROG: -device ide-hd,drive=disk,share-rw=on: Cannot change iothread of active block backend
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-QEMU_PROG: -device virtio-blk-pci,drive=disk,share-rw=on: Cannot change iothread of active block backend
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,share-rw=on: Cannot change iothread of active block backend
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
@@ -185,7 +185,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-QEMU_PROG: -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on: Cannot change iothread of active block backend
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on: Cannot change iothread of active block backend
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index e8be217e1f..2bcb1f7f9c 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -182,7 +182,7 @@ done
 echo
 echo "=== Testing afl image with a very large capacity ==="
 _use_sample_img afl9.vmdk.bz2
-_img_info | grep -q 'Cannot allocate memory' && _notrun "Insufficent memory, skipped test"
+_img_info | grep -q 'Cannot allocate memory' && _notrun "Insufficient memory, skipped test"
 _img_info
 _cleanup_test_img
 
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index 509ad247cd..53c7d428e3 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -49,7 +49,7 @@ _supported_os Linux
 # we have explicit tests for various cluster sizes, the remaining tests
 # require the default 64k cluster
 # we don't have explicit tests for zstd qcow2 compression type, as zstd may be
-# not compiled in. And we can't create compat images with comression type
+# not compiled in. And we can't create compat images with compression type
 # extension
 _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file \
     cluster_size compression_type
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 27bc7305bf..331f8cfddc 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -41,7 +41,7 @@ _supported_fmt qcow2
 _supported_proto file fuse
 _require_drivers blkdebug blkverify
 # blkdebug can only inject errors on bs->file, not on the data_file,
-# so thie test does not work with external data files
+# so this test does not work with external data files
 _unsupported_imgopts data_file
 
 do_run_qemu()
diff --git a/tests/qemu-iotests/080.out b/tests/qemu-iotests/080.out
index 45ab01db8e..d8acb3e723 100644
--- a/tests/qemu-iotests/080.out
+++ b/tests/qemu-iotests/080.out
@@ -33,7 +33,7 @@ qemu-io: can't open device TEST_DIR/t.qcow2: Snapshot table offset invalid
 
 == Hitting snapshot table size limit ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-qemu-img: Could not create snapshot 'test': -27 (File too large)
+qemu-img: Could not create snapshot 'test': File too large
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
@@ -56,8 +56,8 @@ qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: Could not create snapshot 'test': -27 (File too large)
-qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
+qemu-img: Could not create snapshot 'test': File too large
+qemu-img: Could not create snapshot 'test': Resource temporarily unavailable
 
 == Invalid snapshot L1 table offset ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out
index e29280015e..2611d6a40f 100644
--- a/tests/qemu-iotests/109.out
+++ b/tests/qemu-iotests/109.out
@@ -44,6 +44,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -94,6 +96,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -144,6 +148,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -194,6 +200,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -244,6 +252,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -294,6 +304,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -343,6 +355,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -392,6 +406,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -441,6 +457,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -490,6 +508,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -519,6 +539,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
@@ -541,6 +563,8 @@ Images are identical.
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
diff --git a/tests/qemu-iotests/112.out b/tests/qemu-iotests/112.out
index dd3cc4383c..ebf426febc 100644
--- a/tests/qemu-iotests/112.out
+++ b/tests/qemu-iotests/112.out
@@ -32,7 +32,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 refcount bits: 1
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: Could not create snapshot 'foo': -22 (Invalid argument)
+qemu-img: Could not create snapshot 'foo': Invalid argument
 Leaked cluster 6 refcount=1 reference=0
 
 1 leaked clusters were found on the image.
@@ -44,7 +44,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 refcount bits: 2
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: Could not create snapshot 'baz': -22 (Invalid argument)
+qemu-img: Could not create snapshot 'baz': Invalid argument
 Leaked cluster 7 refcount=1 reference=0
 
 1 leaked clusters were found on the image.
@@ -75,7 +75,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 refcount bits: 64
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: Could not create snapshot 'foo': -22 (Invalid argument)
+qemu-img: Could not create snapshot 'foo': Invalid argument
 Leaked cluster 5 refcount=18446744073709551615 reference=1
 Leaked cluster 6 refcount=1 reference=0
 
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
index e18766e167..6a1aa3fe2b 100644
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out
@@ -67,12 +67,12 @@ read 65536/65536 bytes at offset 4194304
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 8388608
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 65536, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 4194304, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 4259840, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 8388608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 8454144, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 65536, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 4259840, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 8454144, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 4194304
@@ -94,12 +94,12 @@ wrote 1024/1024 bytes at offset 1046528
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 1024/1024 bytes at offset 0
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 65536, "length": 65536, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 131072, "length": 196608, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 327680, "length": 655360, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 1048576, "length": 1046528, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 65536, "length": 65536, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 131072, "length": 196608, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 327680, "length": 655360, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 1048576, "length": 1046528, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 read 16384/16384 bytes at offset 0
 16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 16384/16384 bytes at offset 16384
@@ -130,14 +130,14 @@ read 3145728/3145728 bytes at offset 0
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 convert -c -S 0:
 read 3145728/3145728 bytes at offset 0
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
 wrote 33554432/33554432 bytes at offset 0
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -152,7 +152,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 convert -c -S 0 with source backing file:
 read 3145728/3145728 bytes at offset 0
@@ -161,7 +161,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true}]
 
 convert -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -170,7 +170,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 convert -c -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -179,7 +179,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true}]
 
 === Non-zero -S ===
 
@@ -196,32 +196,32 @@ wrote 1024/1024 bytes at offset 66560
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 convert -S 4k
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 convert -c -S 4k
-[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 convert -S 8k
-[{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 convert -c -S 8k
-[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
-{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 === -n to a non-zero image ===
 
@@ -235,18 +235,18 @@ Images are identical.
 
 Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-[{ "start": 0, "length": 67108864, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 === -n to an empty image with a backing file ===
 
 Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680}]
 
 === -n -B to an image without a backing file ===
 
diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
index a847692b4c..3119100e78 100755
--- a/tests/qemu-iotests/131
+++ b/tests/qemu-iotests/131
@@ -44,31 +44,87 @@ _supported_os Linux
 inuse_offset=$((0x2c))
 
 size=$((64 * 1024 * 1024))
-CLUSTER_SIZE=64k
 IMGFMT=parallels
 _make_test_img $size
 
-echo == read empty image ==
-{ $QEMU_IO -c "read -P 0 32k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == write more than 1 block in a row ==
-{ $QEMU_IO -c "write -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read less than block ==
-{ $QEMU_IO -c "read -P 0x11 32k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read exactly 1 block ==
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read more than 1 block ==
-{ $QEMU_IO -c "read -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == check that there is no trash after written ==
-{ $QEMU_IO -c "read -P 0 160k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == check that there is no trash before written ==
-{ $QEMU_IO -c "read -P 0 0 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+# get cluster size in sectors from "tracks" header field
+CLUSTER_SIZE_OFFSET=28
+CLUSTER_SIZE=$(peek_file_le $TEST_IMG $CLUSTER_SIZE_OFFSET 4)
+CLUSTER_SIZE=$((CLUSTER_SIZE * 512))
+CLUSTER_HALF_SIZE=$((CLUSTER_SIZE / 2))
+CLUSTER_DBL_SIZE=$((CLUSTER_SIZE * 2))
 
-echo "== Corrupt image =="
+echo == read empty image ==
+{ $QEMU_IO -c "read -P 0 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == write more than 1 block in a row ==
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read less than block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read exactly 1 block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read more than 1 block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash after written ==
+{ $QEMU_IO -c "read -P 0 $((CLUSTER_HALF_SIZE + CLUSTER_DBL_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash before written ==
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== corrupt image =="
 poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74"
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-_check_test_img
-_check_test_img -r all
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo "== read corrupted image with repairing =="
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check discard =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "discard 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check simple allocation over the discarded hole =="
+
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check more complex allocation over the discard hole =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "discard $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+# There is 1 cluster hole. Fill it fully and allocate 1 cluster at the end
+{ $QEMU_IO -c "write -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check write-zeroes =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check cluster-partial write-zeroes =="
+
+# Clear image
+_make_test_img $size
+
+{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
 
 echo "== allocate with backing =="
 # Verify that allocating clusters works fine even when there is a backing image.
@@ -83,7 +139,7 @@ TEST_IMG="$TEST_IMG.base" _make_test_img $size
 
 # Write some data to the base image (which would trigger an assertion failure if
 # interpreted as a QEMUIOVector)
-$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -P 42 0 $CLUSTER_SIZE" "$TEST_IMG.base" | _filter_qemu_io
 
 # Parallels does not seem to support storing a backing filename in the image
 # itself, so we need to build our backing chain on the command line
@@ -99,8 +155,8 @@ QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
 QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
     $QEMU_IO --image-opts "$imgopts" \
     -c 'read -P 1 0 64' \
-    -c "read -P 42 64 $((64 * 1024 - 64))" \
-    -c "read -P 0 64k $((size - 64 * 1024))" \
+    -c "read -P 42 64 $((CLUSTER_SIZE - 64))" \
+    -c "read -P 0 $CLUSTER_SIZE $((size - CLUSTER_SIZE))" \
     | _filter_qemu_io
 
 # success, all done
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
index de5ef7a8f5..86a2d2a49b 100644
--- a/tests/qemu-iotests/131.out
+++ b/tests/qemu-iotests/131.out
@@ -1,53 +1,102 @@
 QA output created by 131
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 == read empty image ==
-read 65536/65536 bytes at offset 32768
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 524288
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == write more than 1 block in a row ==
-wrote 131072/131072 bytes at offset 32768
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 524288
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == read less than block ==
-read 32768/32768 bytes at offset 32768
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == read exactly 1 block ==
-read 65536/65536 bytes at offset 65536
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == read more than 1 block ==
-read 131072/131072 bytes at offset 32768
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2097152/2097152 bytes at offset 524288
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == check that there is no trash after written ==
-read 32768/32768 bytes at offset 163840
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 2621440
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == check that there is no trash before written ==
-read 32768/32768 bytes at offset 0
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-== Corrupt image ==
-qemu-io: can't open device TEST_DIR/t.parallels: parallels: Image was not closed correctly; cannot be opened read/write
-ERROR image was not closed correctly
-
-1 errors were found on the image.
-Data may be corrupted, or further writes to the image may corrupt it.
+read 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== read corrupted image with repairing ==
 Repairing image was not closed correctly
-The following inconsistencies were found and repaired:
-
-    0 leaked clusters
-    1 corruptions
-
-Double checking the fixed image now...
-No errors were found on the image.
-read 65536/65536 bytes at offset 65536
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check discard ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0               0x200000        TEST_DIR/t.IMGFMT
+discard 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x100000        0x100000        TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check simple allocation over the discarded hole ==
+wrote 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x100000        0x100000        TEST_DIR/t.IMGFMT
+0x200000        0x100000        TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check more complex allocation over the discard hole ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 524288
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0               0x100000        TEST_DIR/t.IMGFMT
+0x100000        0x100000        TEST_DIR/t.IMGFMT
+0x300000        0x100000        TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 524288
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 1572864
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check write-zeroes ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x100000        0x100000        TEST_DIR/t.IMGFMT
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check cluster-partial write-zeroes ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 == allocate with backing ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-wrote 65536/65536 bytes at offset 0
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 64/64 bytes at offset 0
 64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 64/64 bytes at offset 0
 64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65472/65472 bytes at offset 64
-63.938 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 67043328/67043328 bytes at offset 65536
-63.938 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048512/1048512 bytes at offset 64
+1023.938 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 66060288/66060288 bytes at offset 1048576
+63 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out
index dfd6c77140..a48804154e 100644
--- a/tests/qemu-iotests/146.out
+++ b/tests/qemu-iotests/146.out
@@ -2,414 +2,414 @@ QA output created by 146
 
 === Testing VPC Autodetect ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing VPC with current_size force ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing VPC with chs force ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing Hyper-V Autodetect ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing Hyper-V with current_size force ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing Hyper-V with chs force ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing d2v Autodetect ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === Testing d2v with current_size force ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === Testing d2v with chs force ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === Testing Image create, default ===
 
@@ -417,15 +417,15 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
 
 === Read created image, default opts ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Read created image, force_size_calc=chs ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Read created image, force_size_calc=current_size ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Testing Image create, force_size ===
 
@@ -433,13 +433,13 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
 
 === Read created image, default opts ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Read created image, force_size_calc=chs ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === Read created image, force_size_calc=current_size ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 *** done
diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out
index 1fa7ffc475..0199269add 100644
--- a/tests/qemu-iotests/154.out
+++ b/tests/qemu-iotests/154.out
@@ -11,14 +11,14 @@ wrote 2048/2048 bytes at offset 17408
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2048/2048 bytes at offset 27648
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4096, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 12288, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 20480, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 24576, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4096, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12288, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 20480, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 24576, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == backing file contains non-zero data before write_zeroes ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -41,11 +41,11 @@ read 1024/1024 bytes at offset 65536
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 67584
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 28672, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 134148096, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 36864, "length": 28672, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69632, "length": 134148096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == backing file contains non-zero data after write_zeroes ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -68,11 +68,11 @@ read 1024/1024 bytes at offset 44032
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 3072/3072 bytes at offset 40960
 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 40960, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 45056, "length": 134172672, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 36864, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 40960, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 45056, "length": 134172672, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == write_zeroes covers non-zero data ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -101,15 +101,15 @@ wrote 2048/2048 bytes at offset 29696
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 4096/4096 bytes at offset 28672
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 4096, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 12288, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 20480, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 24576, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 28672, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 4096, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8192, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 12288, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16384, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 20480, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 24576, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 28672, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning two clusters, non-zero before request ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -142,16 +142,16 @@ read 1024/1024 bytes at offset 67584
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 5120/5120 bytes at offset 68608
 5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning two clusters, non-zero after request ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -184,16 +184,16 @@ read 7168/7168 bytes at offset 65536
 7 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 72704
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning two clusters, partially overwriting backing file ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -212,8 +212,8 @@ read 1024/1024 bytes at offset 5120
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 6144
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 8192, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8192, "length": 134209536, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 8192, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 8192, "length": 134209536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning multiple clusters, non-zero in first cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -226,10 +226,10 @@ read 2048/2048 bytes at offset 65536
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 10240/10240 bytes at offset 67584
 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69632, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning multiple clusters, non-zero in intermediate cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -240,9 +240,9 @@ wrote 7168/7168 bytes at offset 67584
 7 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 12288/12288 bytes at offset 65536
 12 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 12288, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 12288, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning multiple clusters, non-zero in final cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -255,10 +255,10 @@ read 10240/10240 bytes at offset 65536
 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 75776
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == spanning multiple clusters, partially overwriting backing file ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -277,88 +277,88 @@ read 2048/2048 bytes at offset 74752
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 76800
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 
 == unaligned image tail cluster, no allocation needed ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134218752
 wrote 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -369,15 +369,15 @@ read 512/512 bytes at offset 134217728
 read 512/512 bytes at offset 134218240
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 1024/1024 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 wrote 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 1024/1024 bytes allocated at offset 128 MiB
 read 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 
 == unaligned image tail cluster, allocation required ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
@@ -390,8 +390,8 @@ read 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1536/1536 bytes at offset 134218240
 1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134218240
@@ -412,6 +412,6 @@ read 512/512 bytes at offset 134218240
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 134218752
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/179.out b/tests/qemu-iotests/179.out
index 7cf22cd75f..65b909ebc2 100644
--- a/tests/qemu-iotests/179.out
+++ b/tests/qemu-iotests/179.out
@@ -13,11 +13,11 @@ wrote 2097152/2097152 bytes at offset 6291456
 2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
 2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
 56 MiB (0x3800000) bytes not allocated at offset 8 MiB (0x800000)
-[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 58720256, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 58720256, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 wrote 2097150/2097150 bytes at offset 10485761
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097150/2097150 bytes at offset 14680065
@@ -31,15 +31,15 @@ wrote 2097150/2097150 bytes at offset 14680065
 2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
 2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
 48 MiB (0x3000000) bytes not allocated at offset 16 MiB (0x1000000)
-[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 16777216, "length": 50331648, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16777216, "length": 50331648, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 wrote 14680064/14680064 bytes at offset 18874368
 14 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 20971520
@@ -57,21 +57,21 @@ wrote 6291456/6291456 bytes at offset 25165824
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
 32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
-[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 6291456, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 6291456, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 wrote 2097152/2097152 bytes at offset 27262976
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 29360128
@@ -87,23 +87,23 @@ wrote 2097152/2097152 bytes at offset 29360128
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
 32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
-[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 wrote 8388608/8388608 bytes at offset 33554432
 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 35651584
@@ -121,24 +121,24 @@ wrote 2097152/2097152 bytes at offset 37748736
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 22 MiB (0x1600000) bytes     allocated at offset 18 MiB (0x1200000)
 24 MiB (0x1800000) bytes not allocated at offset 40 MiB (0x2800000)
-[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 8388608, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 41943040, "length": 25165824, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 8388608, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 41943040, "length": 25165824, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 wrote 8388608/8388608 bytes at offset 41943040
 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 8388608/8388608 bytes at offset 50331648
@@ -162,31 +162,31 @@ wrote 2097152/2097152 bytes at offset 62914560
 4 MiB (0x400000) bytes not allocated at offset 54 MiB (0x3600000)
 4 MiB (0x400000) bytes     allocated at offset 58 MiB (0x3a00000)
 2 MiB (0x200000) bytes not allocated at offset 62 MiB (0x3e00000)
-[{ "start": 0, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 2097152, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 6291456, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 8388608, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 10485760, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 12582912, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 14680064, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 16777216, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 10485760, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 44040192, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 48234496, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
-{ "start": 50331648, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 56623104, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 60817408, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 65011712, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 2097152, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 10485760, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 12582912, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 14680064, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 16777216, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 18874368, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 23068672, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 29360128, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 33554432, "length": 10485760, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 44040192, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 48234496, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 50331648, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 52428800, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 56623104, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 60817408, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 65011712, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 No errors were found on the image.
 No errors were found on the image.
 
diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181
index cb96d09ae5..dc90a10757 100755
--- a/tests/qemu-iotests/181
+++ b/tests/qemu-iotests/181
@@ -109,7 +109,7 @@ if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
     _notrun 'Postcopy is not supported'
 fi
 
-_send_qemu_cmd $src 'migrate_set_parameter max_bandwidth 4k' "(qemu)"
+_send_qemu_cmd $src 'migrate_set_parameter max-bandwidth 4k' "(qemu)"
 _send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
 _send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"
 _send_qemu_cmd $src 'migrate_start_postcopy' "(qemu)"
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index 8b1143dc16..2ae0a85bbf 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -354,6 +354,8 @@ wait_for_job_and_quit() {
     QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN
     _wait_event $h 'JOB_STATUS_CHANGE' # standby
     _wait_event $h 'JOB_STATUS_CHANGE' # ready
+    _wait_event $h 'JOB_STATUS_CHANGE' # standby
+    _wait_event $h 'JOB_STATUS_CHANGE' # ready
     _wait_event $h 'JOB_STATUS_CHANGE' # aborting
     # Filter the offset (depends on when exactly `quit` was issued)
     _wait_event $h 'BLOCK_JOB_CANCELLED' \
diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out
index 70e8dd6c87..7292c26bae 100644
--- a/tests/qemu-iotests/185.out
+++ b/tests/qemu-iotests/185.out
@@ -137,6 +137,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}}
@@ -160,6 +162,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}}
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
index f07a9da136..69849c800e 100755
--- a/tests/qemu-iotests/197
+++ b/tests/qemu-iotests/197
@@ -93,7 +93,7 @@ output=$($QEMU_IO -f qcow2 -C -c "read -P 0 1k $((2*1024*1024*1024 - 512))" \
         "$TEST_WRAP" 2>&1 | _filter_qemu_io)
 case $output in
     *allocate*)
-        _notrun "Insufficent memory to run test" ;;
+        _notrun "Insufficient memory to run test" ;;
     *) printf '%s\n' "$output" ;;
 esac
 $QEMU_IO -f qcow2 -C -c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \
@@ -136,18 +136,18 @@ IMGPROTO=file IMGFMT=qcow2 TEST_IMG_FILE="$TEST_WRAP" \
 $QEMU_IO -c "write -P 0xaa 0 64k" "$TEST_IMG" | _filter_qemu_io
 
 # Allocate individual subclusters in the top image, and not the whole cluster
-$QEMU_IO -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \
+$QEMU_IO -f qcow2 -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \
     | _filter_qemu_io
 
 # Only 2 subclusters should be allocated in the top image at this point
-$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
+$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
 
 # Actual copy-on-read operation
-$QEMU_IO -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io
+$QEMU_IO -f qcow2 -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io
 
 # And here we should have 4 subclusters allocated right in the middle of the
 # top image. Make sure the whole cluster remains unallocated
-$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
+$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
 
 _check_test_img
 
diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out
index 8f34a30afe..86c57b51d3 100644
--- a/tests/qemu-iotests/197.out
+++ b/tests/qemu-iotests/197.out
@@ -42,17 +42,15 @@ wrote 2048/2048 bytes at offset 28672
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2048/2048 bytes at offset 34816
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0               0x7000          TEST_DIR/t.IMGFMT
-0x7000          0x800           TEST_DIR/t.wrap.IMGFMT
-0x7800          0x1000          TEST_DIR/t.IMGFMT
-0x8800          0x800           TEST_DIR/t.wrap.IMGFMT
-0x9000          0x7000          TEST_DIR/t.IMGFMT
+28 KiB (0x7000) bytes not allocated at offset 0 bytes (0x0)
+2 KiB (0x800) bytes     allocated at offset 28 KiB (0x7000)
+4 KiB (0x1000) bytes not allocated at offset 30 KiB (0x7800)
+2 KiB (0x800) bytes     allocated at offset 34 KiB (0x8800)
+28 KiB (0x7000) bytes not allocated at offset 36 KiB (0x9000)
 read 4096/4096 bytes at offset 30720
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0               0x7000          TEST_DIR/t.IMGFMT
-0x7000          0x2000          TEST_DIR/t.wrap.IMGFMT
-0x9000          0x7000          TEST_DIR/t.IMGFMT
+28 KiB (0x7000) bytes not allocated at offset 0 bytes (0x0)
+8 KiB (0x2000) bytes     allocated at offset 28 KiB (0x7000)
+28 KiB (0x7000) bytes not allocated at offset 36 KiB (0x9000)
 No errors were found on the image.
 *** done
diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out
index 515906ac7a..79c4103a22 100644
--- a/tests/qemu-iotests/209.out
+++ b/tests/qemu-iotests/209.out
@@ -1,4 +1,4 @@
-[{ "start": 0, "length": 524288, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
-{ "start": 524288, "length": 524288, "depth": 0, "present": true, "zero": true, "data": false, "offset": 524288}]
+[{ "start": 0, "length": 524288, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 0},
+{ "start": 524288, "length": 524288, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 524288}]
 
 done.
diff --git a/tests/qemu-iotests/215 b/tests/qemu-iotests/215
index d464596f14..6babbcdc1f 100755
--- a/tests/qemu-iotests/215
+++ b/tests/qemu-iotests/215
@@ -95,7 +95,7 @@ output=$($QEMU_IO \
          2>&1 | _filter_qemu_io)
 case $output in
     *allocate*)
-        _notrun "Insufficent memory to run test" ;;
+        _notrun "Insufficient memory to run test" ;;
     *) printf '%s\n' "$output" ;;
 esac
 $QEMU_IO \
diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out
index 9cdd171a2d..df231c4e3d 100644
--- a/tests/qemu-iotests/221.out
+++ b/tests/qemu-iotests/221.out
@@ -5,14 +5,14 @@ QA output created by 221
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537
 discard 65537/65537 bytes at offset 0
 64.001 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 wrote 1/1 bytes at offset 65536
 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index 26fb347c5d..86a37014d0 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -120,23 +120,23 @@ read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2097152/2097152 bytes at offset 2097152
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
 
 === Contrast to small granularity dirty-bitmap ===
 
-[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
 
 === Check bitmap taken from another node ===
 
-[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === End qemu NBD server ===
 
@@ -239,23 +239,23 @@ read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2097152/2097152 bytes at offset 2097152
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
 
 === Contrast to small granularity dirty-bitmap ===
 
-[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
 
 === Check bitmap taken from another node ===
 
-[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === End qemu NBD server ===
 
@@ -281,12 +281,12 @@ read 2097152/2097152 bytes at offset 2097152
 
 === Use qemu-nbd as server ===
 
-[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
-[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 1024, "length": 11321, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 12345, "length": 2084807, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 1024, "length": 11321, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 12345, "length": 2084807, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false}]
 *** done
diff --git a/tests/qemu-iotests/241.out b/tests/qemu-iotests/241.out
index 88e8cfcd7e..7946c286d5 100644
--- a/tests/qemu-iotests/241.out
+++ b/tests/qemu-iotests/241.out
@@ -6,8 +6,8 @@ exports available: 1
  export: ''
   size:  1024
   min block: 1
-[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 
 === Exporting unaligned raw image, forced server sector alignment ===
@@ -16,7 +16,7 @@ exports available: 1
  export: ''
   size:  1024
   min block: 512
-[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw.
          Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted.
@@ -28,7 +28,7 @@ exports available: 1
  export: ''
   size:  1024
   min block: 1
-[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 *** done
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
index 5e03add054..f46cfe93f1 100644
--- a/tests/qemu-iotests/244.out
+++ b/tests/qemu-iotests/244.out
@@ -41,7 +41,7 @@ write failed: Operation not supported
 No errors were found on the image.
 
 Take an internal snapshot:
-qemu-img: Could not create snapshot 'test': -95 (Operation not supported)
+qemu-img: Could not create snapshot 'test': Operation not supported
 No errors were found on the image.
 
 === Standalone image with external data file (efficient) ===
@@ -57,12 +57,12 @@ wrote 3145728/3145728 bytes at offset 3145728
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
-[{ "start": 0, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false},
-{ "start": 1048576, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": 1048576},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "offset": 4194304},
-{ "start": 5242880, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 6291456, "length": 60817408, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 1048576},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 4194304},
+{ "start": 5242880, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 6291456, "length": 60817408, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -94,10 +94,10 @@ wrote 3145728/3145728 bytes at offset 3145728
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
-{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": 4194304},
-{ "start": 6291456, "length": 60817408, "depth": 0, "present": true, "zero": false, "data": true, "offset": 6291456}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 0},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 4194304},
+{ "start": 6291456, "length": 60817408, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 6291456}]
 
 read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -123,8 +123,8 @@ read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          Mapped to       File
 0               0x100000        0               TEST_DIR/t.qcow2.data
-[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
-{ "start": 1048576, "length": 66060288, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 0},
+{ "start": 1048576, "length": 66060288, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 === Copy offloading ===
 
diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out
index c578129c25..b1aa94cb05 100644
--- a/tests/qemu-iotests/252.out
+++ b/tests/qemu-iotests/252.out
@@ -23,8 +23,8 @@ read 131072/131072 bytes at offset 131072
 read 131072/131072 bytes at offset 262144
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
-[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144, "length": 131072, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 262144, "length": 131072, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -33,7 +33,7 @@ read 131072/131072 bytes at offset 131072
 read 131072/131072 bytes at offset 262144
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
-[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false},
-{ "start": 327680, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 262144, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false},
+{ "start": 327680, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 *** done
diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out
index b3dca75a89..b458085adb 100644
--- a/tests/qemu-iotests/253.out
+++ b/tests/qemu-iotests/253.out
@@ -3,16 +3,16 @@ QA output created by 253
 === Check mapping of unaligned raw image ===
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
 wrote 65535/65535 bytes at offset 983040
 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
index acd8b166a6..c2967335ca 100644
--- a/tests/qemu-iotests/274.out
+++ b/tests/qemu-iotests/274.out
@@ -20,18 +20,18 @@ read 1048576/1048576 bytes at offset 1048576
 0/1048576 bytes allocated at offset 1 MiB
 
 === Checking map ===
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680}]
 
 Offset          Length          Mapped to       File
 0               0x200000        0x50000         TEST_DIR/PID-base
 
-[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680}]
 
 Offset          Length          Mapped to       File
 0               0x100000        0x50000         TEST_DIR/PID-base
 
-[{ "start": 0, "length": 1048576, "depth": 2, "present": true, "zero": false, "data": true, "offset": 327680},
-{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 2, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}]
 
 Offset          Length          Mapped to       File
 0               0x100000        0x50000         TEST_DIR/PID-base
@@ -186,8 +186,8 @@ read 65536/65536 bytes at offset 5368709120
 1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
 7 GiB (0x1c0000000) bytes     allocated at offset 1 GiB (0x40000000)
 
-[{ "start": 0, "length": 1073741824, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 1073741824, "length": 7516192768, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 1073741824, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === preallocation=metadata ===
 wrote 65536/65536 bytes at offset 33285996544
@@ -201,13 +201,13 @@ read 65536/65536 bytes at offset 33285996544
 30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
 3 GiB (0xc0000000) bytes     allocated at offset 30 GiB (0x780000000)
 
-[{ "start": 0, "length": 32212254720, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 32212254720, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 327680},
-{ "start": 32749125632, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 537264128},
-{ "start": 33285996544, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 1074200576},
-{ "start": 33822867456, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 1611137024},
-{ "start": 34359738368, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 2148139008},
-{ "start": 34896609280, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 2685075456}]
+[{ "start": 0, "length": 32212254720, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 32212254720, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 327680},
+{ "start": 32749125632, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 537264128},
+{ "start": 33285996544, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 1074200576},
+{ "start": 33822867456, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 1611137024},
+{ "start": 34359738368, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 2148139008},
+{ "start": 34896609280, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": 2685075456}]
 
 === preallocation=falloc ===
 wrote 65536/65536 bytes at offset 9437184
@@ -221,8 +221,8 @@ read 65536/65536 bytes at offset 9437184
 5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
 10 MiB (0xa00000) bytes     allocated at offset 5 MiB (0x500000)
 
-[{ "start": 0, "length": 5242880, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 5242880, "length": 10485760, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 5242880, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 5242880, "length": 10485760, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680}]
 
 === preallocation=full ===
 wrote 65536/65536 bytes at offset 11534336
@@ -236,8 +236,8 @@ read 65536/65536 bytes at offset 11534336
 8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
 4 MiB (0x400000) bytes     allocated at offset 8 MiB (0x800000)
 
-[{ "start": 0, "length": 8388608, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 8388608, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 8388608, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 8388608, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680}]
 
 === preallocation=off ===
 wrote 65536/65536 bytes at offset 259072
@@ -251,9 +251,9 @@ read 65536/65536 bytes at offset 259072
 192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
 320 KiB (0x50000) bytes     allocated at offset 192 KiB (0x30000)
 
-[{ "start": 0, "length": 196608, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 196608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680},
-{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 196608, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 196608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680},
+{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === preallocation=off ===
 wrote 65536/65536 bytes at offset 344064
@@ -267,8 +267,8 @@ read 65536/65536 bytes at offset 344064
 256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
 256 KiB (0x40000) bytes     allocated at offset 256 KiB (0x40000)
 
-[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
 === preallocation=off ===
 wrote 65536/65536 bytes at offset 446464
@@ -282,6 +282,6 @@ read 65536/65536 bytes at offset 446464
 256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
 244 KiB (0x3d000) bytes     allocated at offset 256 KiB (0x40000)
 
-[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false},
-{ "start": 262144, "length": 249856, "depth": 0, "present": true, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false},
+{ "start": 262144, "length": 249856, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}]
 
diff --git a/tests/qemu-iotests/298 b/tests/qemu-iotests/298
index ad560e2941..9e75ac6975 100755
--- a/tests/qemu-iotests/298
+++ b/tests/qemu-iotests/298
@@ -140,8 +140,8 @@ class TestTruncate(iotests.QMPTestCase):
         stat = os.stat(disk)
         refstat = os.stat(refdisk)
 
-        # Probably we'll want preallocate filter to keep align to cluster when
-        # shrink preallocation, so, ignore small differece
+        # The preallocate filter may keep cluster alignment when shrinking,
+        # so ignore small differences
         self.assertLess(abs(stat.st_size - refstat.st_size), 64 * 1024)
 
         # Preallocate filter may leak some internal clusters (for example, if
diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc
index f4f823a991..de2e0c2781 100644
--- a/tests/qemu-iotests/pylintrc
+++ b/tests/qemu-iotests/pylintrc
@@ -19,7 +19,7 @@ disable=invalid-name,
         too-many-public-methods,
         # pylint warns about Optional[] etc. as unsubscriptable in 3.9
         unsubscriptable-object,
-        # pylint's static analysis causes false positivies for file_path();
+        # pylint's static analysis causes false positives for file_path();
         # If we really care to make it statically knowable, we'll use mypy.
         unbalanced-tuple-unpacking,
         # Sometimes we need to disable a newly introduced pylint warning.
diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error
new file mode 100755
index 0000000000..88ee5f670c
--- /dev/null
+++ b/tests/qemu-iotests/tests/file-io-error
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+# group: rw
+#
+# Produce an I/O error in file-posix, and hope that it is not catastrophic.
+# Regression test for: https://bugzilla.redhat.com/show_bug.cgi?id=2234374
+#
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq=$(basename "$0")
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_qemu
+    rm -f "$TEST_DIR/fuse-export"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ../common.rc
+. ../common.filter
+. ../common.qemu
+
+# Format-agnostic (we do not use any), but we do test the file protocol
+_supported_proto file
+_require_drivers blkdebug null-co
+
+if [ "$IMGOPTSSYNTAX" = "true" ]; then
+    # We need `$QEMU_IO -f file` to work; IMGOPTSSYNTAX uses --image-opts,
+    # breaking -f.
+    _unsupported_fmt $IMGFMT
+fi
+
+# This is a regression test of a bug in which flie-posix would access zone
+# information in case of an I/O error even when there is no zone information,
+# resulting in a division by zero.
+# To reproduce the problem, we need to trigger an I/O error inside of
+# file-posix, which can be done (rootless) by providing a FUSE export that
+# presents only errors when accessed.
+
+_launch_qemu
+_send_qemu_cmd $QEMU_HANDLE \
+    "{'execute': 'qmp_capabilities'}" \
+    'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{'execute': 'blockdev-add',
+      'arguments': {
+          'driver': 'blkdebug',
+          'node-name': 'node0',
+          'inject-error': [{'event': 'none'}],
+          'image': {
+              'driver': 'null-co'
+          }
+      }}" \
+    'return'
+
+# FUSE mountpoint must exist and be a regular file
+touch "$TEST_DIR/fuse-export"
+
+# The grep -v to filter fusermount's (benign) error when /etc/fuse.conf does
+# not contain user_allow_other and the subsequent check for missing FUSE support
+# have both been taken from iotest 308.
+output=$(_send_qemu_cmd $QEMU_HANDLE \
+    "{'execute': 'block-export-add',
+      'arguments': {
+          'id': 'exp0',
+          'type': 'fuse',
+          'node-name': 'node0',
+          'mountpoint': '$TEST_DIR/fuse-export',
+          'writable': true
+      }}" \
+    'return' \
+    | grep -v 'option allow_other only allowed if')
+
+if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then
+    _notrun 'No FUSE support'
+fi
+echo "$output"
+
+echo
+# This should fail, but gracefully, i.e. just print an I/O error, not crash.
+$QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io
+echo
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{'execute': 'block-export-del',
+      'arguments': {'id': 'exp0'}}" \
+    'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    '' \
+    'BLOCK_EXPORT_DELETED'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{'execute': 'blockdev-del',
+      'arguments': {'node-name': 'node0'}}" \
+    'return'
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/file-io-error.out b/tests/qemu-iotests/tests/file-io-error.out
new file mode 100644
index 0000000000..0f46455a94
--- /dev/null
+++ b/tests/qemu-iotests/tests/file-io-error.out
@@ -0,0 +1,33 @@
+QA output created by file-io-error
+{'execute': 'qmp_capabilities'}
+{"return": {}}
+{'execute': 'blockdev-add',
+      'arguments': {
+          'driver': 'blkdebug',
+          'node-name': 'node0',
+          'inject-error': [{'event': 'none'}],
+          'image': {
+              'driver': 'null-co'
+          }
+      }}
+{"return": {}}
+{'execute': 'block-export-add',
+      'arguments': {
+          'id': 'exp0',
+          'type': 'fuse',
+          'node-name': 'node0',
+          'mountpoint': 'TEST_DIR/fuse-export',
+          'writable': true
+      }}
+{"return": {}}
+
+write failed: Input/output error
+
+{'execute': 'block-export-del',
+      'arguments': {'id': 'exp0'}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "exp0"}}
+{'execute': 'blockdev-del',
+      'arguments': {'node-name': 'node0'}}
+{"return": {}}
+*** done
diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
index 9d938db24e..138eb09c6d 100644
--- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
@@ -11,9 +11,9 @@ wrote 2097152/2097152 bytes at offset 1048576
 
 === Check allocation over NBD ===
 
-[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "offset": 327680},
-{ "start": 1048576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680},
-{ "start": 3145728, "length": 1048576, "depth": 1, "present": false, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680},
+{ "start": 1048576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": 327680},
+{ "start": 3145728, "length": 1048576, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}]
 exports available: 1
  export: ''
   size:  4194304
@@ -24,9 +24,9 @@ exports available: 1
   available meta contexts: 2
    base:allocation
    qemu:allocation-depth
-[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": true, "offset": OFFSET},
-{ "start": 1048576, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 1048576, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks
new file mode 100755
index 0000000000..b281246a42
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Test qemu-img check for parallels format
+#
+# Copyright (C) 2022 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=alexander.ivanov@virtuozzo.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ../common.rc
+. ../common.filter
+
+_supported_fmt parallels
+_supported_proto file
+_supported_os Linux
+
+SIZE=$((4 * 1024 * 1024))
+IMGFMT=parallels
+CLUSTER_SIZE_OFFSET=28
+DATA_OFF_OFFSET=48
+BAT_OFFSET=64
+
+_make_test_img $SIZE
+
+CLUSTER_SIZE=$(peek_file_le $TEST_IMG $CLUSTER_SIZE_OFFSET 4)
+CLUSTER_SIZE=$((CLUSTER_SIZE * 512))
+LAST_CLUSTER_OFF=$((SIZE - CLUSTER_SIZE))
+LAST_CLUSTER=$((LAST_CLUSTER_OFF/CLUSTER_SIZE))
+
+echo "== TEST OUT OF IMAGE CHECK =="
+
+echo "== write pattern =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== corrupt image =="
+cluster=$(($LAST_CLUSTER + 2))
+poke_file "$TEST_IMG" "$BAT_OFFSET" "\x$cluster\x00\x00\x00"
+
+echo "== read corrupted image with repairing =="
+{ $QEMU_IO -c "read -P 0x00 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST LEAK CHECK =="
+
+echo "== write pattern to last cluster =="
+echo "write -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE"
+{ $QEMU_IO -c "write -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== extend image by 1 cluster =="
+fallocate -xl $((file_size + CLUSTER_SIZE)) "$TEST_IMG"
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== repair image =="
+_check_test_img -r all
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== check last cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DUPLICATION CHECK =="
+
+echo "== write pattern to whole image =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to second cluster =="
+{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo "== corrupt image =="
+poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00"
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== repair image =="
+_check_test_img -r all
+
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to the first clusters =="
+{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the second cluster (deduplicated) =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DUPLICATION SELF-CURE =="
+
+echo "== write pattern to whole image =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to second cluster =="
+{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo "== corrupt image =="
+poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00"
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster with self-repair =="
+{ $QEMU_IO -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to the first clusters =="
+{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the first cluster =="
+{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check the second cluster (deduplicated) =="
+{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DATA_OFF CHECK =="
+
+echo "== write pattern to first cluster =="
+{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== spoil data_off field =="
+poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff"
+
+echo "== check first cluster =="
+{ $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DATA_OFF THROUGH REPAIR =="
+
+echo "== write pattern to first cluster =="
+{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== spoil data_off field =="
+poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff"
+
+echo "== repair image =="
+_check_test_img -r all
+
+echo "== check first cluster =="
+{ $QEMU_IO -r -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out
new file mode 100644
index 0000000000..9793423111
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-checks.out
@@ -0,0 +1,132 @@
+QA output created by parallels-checks
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST OUT OF IMAGE CHECK ==
+== write pattern ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== read corrupted image with repairing ==
+Repairing cluster 0 is outside image
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST LEAK CHECK ==
+== write pattern to last cluster ==
+write -P 0x11 3145728 1048576
+wrote 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+file size: 2097152
+== extend image by 1 cluster ==
+file size: 3145728
+== repair image ==
+Repairing space leaked at the end of the image 1048576
+The following inconsistencies were found and repaired:
+
+    1 leaked clusters
+    0 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+file size: 2097152
+== check last cluster ==
+read 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DUPLICATION CHECK ==
+== write pattern to whole image ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to second cluster ==
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== repair image ==
+Repairing duplicate offset in BAT entry 1
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to the first clusters ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the second cluster (deduplicated) ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DUPLICATION SELF-CURE ==
+== write pattern to whole image ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to second cluster ==
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster with self-repair ==
+Repairing duplicate offset in BAT entry 1
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to the first clusters ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check the second cluster (deduplicated) ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DATA_OFF CHECK ==
+== write pattern to first cluster ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== spoil data_off field ==
+== check first cluster ==
+Repairing data_off field has incorrect value
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DATA_OFF THROUGH REPAIR ==
+== write pattern to first cluster ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== spoil data_off field ==
+== repair image ==
+Repairing data_off field has incorrect value
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+== check first cluster ==
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
index e851f0320e..74b81f703b 100644
--- a/tests/qemu-iotests/tests/qemu-img-bitmaps.out
+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
@@ -103,18 +103,18 @@ Format specific information:
 
 === Check bitmap contents ===
 
-[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 3145728, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 4194304, "length": 6291456, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 2097152, "length": 8388608, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
-{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 4194304, "length": 6291456, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 2097152, "length": 8388608, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
+{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false, "compressed": false},
+{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
 
 === Check handling of inconsistent bitmap ===
 
diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
index abab761c26..eea8b5f77b 100644
--- a/tests/qtest/ahci-test.c
+++ b/tests/qtest/ahci-test.c
@@ -330,7 +330,7 @@ static void ahci_test_pci_spec(AHCIQState *ahci)
     ASSERT_BIT_CLEAR(datal, ~0xFF);
     g_assert_cmphex(datal, !=, 0);
 
-    /* Check specification adherence for capability extenstions. */
+    /* Check specification adherence for capability extensions. */
     data = qpci_config_readw(ahci->dev, datal);
 
     switch (ahci->fingerprint) {
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 3fc33fc24d..a8a4c668ad 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -417,12 +417,22 @@ static void pauth_tests_default(QTestState *qts, const char *cpu_type)
 {
     assert_has_feature_enabled(qts, cpu_type, "pauth");
     assert_has_feature_disabled(qts, cpu_type, "pauth-impdef");
+    assert_has_feature_disabled(qts, cpu_type, "pauth-qarma3");
     assert_set_feature(qts, cpu_type, "pauth", false);
     assert_set_feature(qts, cpu_type, "pauth", true);
     assert_set_feature(qts, cpu_type, "pauth-impdef", true);
     assert_set_feature(qts, cpu_type, "pauth-impdef", false);
-    assert_error(qts, cpu_type, "cannot enable pauth-impdef without pauth",
+    assert_set_feature(qts, cpu_type, "pauth-qarma3", true);
+    assert_set_feature(qts, cpu_type, "pauth-qarma3", false);
+    assert_error(qts, cpu_type,
+                 "cannot enable pauth-impdef or pauth-qarma3 without pauth",
                  "{ 'pauth': false, 'pauth-impdef': true }");
+    assert_error(qts, cpu_type,
+                 "cannot enable pauth-impdef or pauth-qarma3 without pauth",
+                 "{ 'pauth': false, 'pauth-qarma3': true }");
+    assert_error(qts, cpu_type,
+                 "cannot enable both pauth-impdef and pauth-qarma3",
+                 "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true }");
 }
 
 static void test_query_cpu_model_expansion(const void *data)
diff --git a/tests/qtest/bcm2835-dma-test.c b/tests/qtest/bcm2835-dma-test.c
index 8293d822b9..18901b76d2 100644
--- a/tests/qtest/bcm2835-dma-test.c
+++ b/tests/qtest/bcm2835-dma-test.c
@@ -25,7 +25,7 @@
 
 #define BCM2708_DMA_INT_STATUS 0xfe0
 
-/* DMA Trasfer Info fields: */
+/* DMA Transfer Info fields: */
 #define BCM2708_DMA_INT_EN     (1 << 0)
 #define BCM2708_DMA_D_INC      (1 << 4)
 #define BCM2708_DMA_S_INC      (1 << 8)
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 47ba20b957..d1b80149f2 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -26,7 +26,7 @@
  * 4. Run
  *      make check V=2
  * this will produce a bunch of warnings about differences
- * beween actual and expected ACPI tables. If you have IASL installed,
+ * between actual and expected ACPI tables. If you have IASL installed,
  * they will also be disassembled so you can look at the disassembled
  * output. If not - disassemble them yourself in any way you like.
  * Look at the differences - make sure they make sense and match what the
@@ -2138,7 +2138,9 @@ int main(int argc, char *argv[])
                 qtest_add_func("acpi/q35/core-count2",
                                test_acpi_q35_tcg_core_count2);
             }
-            qtest_add_func("acpi/q35/viot", test_acpi_q35_viot);
+            if (qtest_has_device("virtio-iommu-pci")) {
+                qtest_add_func("acpi/q35/viot", test_acpi_q35_viot);
+            }
 #ifdef CONFIG_POSIX
             qtest_add_func("acpi/q35/cxl", test_acpi_q35_cxl);
 #endif
@@ -2173,7 +2175,9 @@ int main(int argc, char *argv[])
             qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp);
             qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb);
             qtest_add_func("acpi/virt/oem-fields", test_acpi_virt_oem_fields);
-            qtest_add_func("acpi/virt/viot", test_acpi_virt_viot);
+            if (qtest_has_device("virtio-iommu-pci")) {
+                qtest_add_func("acpi/virt/viot", test_acpi_virt_viot);
+            }
         }
     }
     ret = g_test_run();
diff --git a/tests/qtest/ds1338-test.c b/tests/qtest/ds1338-test.c
index f6ade9a050..d12424d27f 100644
--- a/tests/qtest/ds1338-test.c
+++ b/tests/qtest/ds1338-test.c
@@ -38,7 +38,7 @@ static void send_and_receive(void *obj, void *data, QGuestAllocator *alloc)
 
     i2c_read_block(i2cdev, 0, resp, sizeof(resp));
 
-    /* check retrieved time againt local time */
+    /* check retrieved time against local time */
     g_assert_cmpuint(bcd2bin(resp[4]), == , tm_ptr->tm_mday);
     g_assert_cmpuint(bcd2bin(resp[5]), == , 1 + tm_ptr->tm_mon);
     g_assert_cmpuint(2000 + bcd2bin(resp[6]), == , 1900 + tm_ptr->tm_year);
diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
index 11256abf6c..ec842e03c5 100644
--- a/tests/qtest/fuzz/generic_fuzz.c
+++ b/tests/qtest/fuzz/generic_fuzz.c
@@ -846,9 +846,9 @@ static void generic_pre_fuzz(QTestState *s)
  *          functionality B
  *
  * This function attempts to produce an input that:
- * Ouptut: maps a device's BARs, set up three DMA patterns, triggers
- *          functionality A device, replaces the DMA patterns with a single
- *          patten, and triggers device functionality B.
+ * Output: maps a device's BARs, set up three DMA patterns, triggers
+ *          device functionality A, replaces the DMA patterns with a single
+ *          pattern, and triggers device functionality B.
  */
 static size_t generic_fuzz_crossover(const uint8_t *data1, size_t size1, const
                                      uint8_t *data2, size_t size2, uint8_t *out,
diff --git a/tests/qtest/libqos/ahci.c b/tests/qtest/libqos/ahci.c
index f53f12aa99..a2c94c6e06 100644
--- a/tests/qtest/libqos/ahci.c
+++ b/tests/qtest/libqos/ahci.c
@@ -404,57 +404,110 @@ void ahci_port_clear(AHCIQState *ahci, uint8_t port)
 /**
  * Check a port for errors.
  */
-void ahci_port_check_error(AHCIQState *ahci, uint8_t port,
-                           uint32_t imask, uint8_t emask)
+void ahci_port_check_error(AHCIQState *ahci, AHCICommand *cmd)
 {
+    uint8_t port = cmd->port;
     uint32_t reg;
 
-    /* The upper 9 bits of the IS register all indicate errors. */
-    reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
-    reg &= ~imask;
-    reg >>= 23;
-    g_assert_cmphex(reg, ==, 0);
+    /* If expecting TF error, ensure that TFES is set. */
+    if (cmd->errors) {
+        reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
+        ASSERT_BIT_SET(reg, AHCI_PX_IS_TFES);
+    } else {
+        /* The upper 9 bits of the IS register all indicate errors. */
+        reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
+        reg &= ~cmd->interrupts;
+        reg >>= 23;
+        g_assert_cmphex(reg, ==, 0);
+    }
 
-    /* The Sata Error Register should be empty. */
+    /* The Sata Error Register should be empty, even when expecting TF error. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_SERR);
     g_assert_cmphex(reg, ==, 0);
 
+    /* If expecting TF error, and TFES was set, perform error recovery
+     * (see AHCI 1.3 section 6.2.2.1) such that we can send new commands. */
+    if (cmd->errors) {
+        /* This will clear PxCI. */
+        ahci_px_clr(ahci, port, AHCI_PX_CMD, AHCI_PX_CMD_ST);
+
+        /* The port has 500ms to disengage. */
+        usleep(500000);
+        reg = ahci_px_rreg(ahci, port, AHCI_PX_CMD);
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR);
+
+        /* Clear PxIS. */
+        reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
+        ahci_px_wreg(ahci, port, AHCI_PX_IS, reg);
+
+        /* Check if we need to perform a COMRESET.
+         * Not implemented right now, as there is no reason why our QEMU model
+         * should need a COMRESET when expecting TF error. */
+        reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD);
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ);
+
+        /* Enable issuing new commands. */
+        ahci_px_set(ahci, port, AHCI_PX_CMD, AHCI_PX_CMD_ST);
+    }
+
     /* The TFD also has two error sections. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD);
-    if (!emask) {
+    if (!cmd->errors) {
         ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR);
     } else {
         ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR);
     }
-    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR & (~emask << 8));
-    ASSERT_BIT_SET(reg, AHCI_PX_TFD_ERR & (emask << 8));
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR & (~cmd->errors << 8));
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_ERR & (cmd->errors << 8));
 }
 
-void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port,
-                                uint32_t intr_mask)
+void ahci_port_check_interrupts(AHCIQState *ahci, AHCICommand *cmd)
 {
+    uint8_t port = cmd->port;
     uint32_t reg;
 
+    /* If we expect errors, error handling in ahci_port_check_error() will
+     * already have cleared PxIS, so in that case this function cannot verify
+     * and clear expected interrupts. */
+    if (cmd->errors) {
+        return;
+    }
+
     /* Check for expected interrupts */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
-    ASSERT_BIT_SET(reg, intr_mask);
+    ASSERT_BIT_SET(reg, cmd->interrupts);
 
     /* Clear expected interrupts and assert all interrupts now cleared. */
-    ahci_px_wreg(ahci, port, AHCI_PX_IS, intr_mask);
+    ahci_px_wreg(ahci, port, AHCI_PX_IS, cmd->interrupts);
     g_assert_cmphex(ahci_px_rreg(ahci, port, AHCI_PX_IS), ==, 0);
 }
 
-void ahci_port_check_nonbusy(AHCIQState *ahci, uint8_t port, uint8_t slot)
+void ahci_port_check_nonbusy(AHCIQState *ahci, AHCICommand *cmd)
 {
+    uint8_t slot = cmd->slot;
+    uint8_t port = cmd->port;
     uint32_t reg;
 
-    /* Assert that the command slot is no longer busy (NCQ) */
+    /* For NCQ command with error PxSACT bit should still be set.
+     * For NCQ command without error, PxSACT bit should be cleared.
+     * For non-NCQ command, PxSACT bit should always be cleared. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_SACT);
-    ASSERT_BIT_CLEAR(reg, (1 << slot));
+    if (cmd->props->ncq && cmd->errors) {
+        ASSERT_BIT_SET(reg, (1 << slot));
+    } else {
+        ASSERT_BIT_CLEAR(reg, (1 << slot));
+    }
 
-    /* Non-NCQ */
+    /* For non-NCQ command with error, PxCI bit should still be set.
+     * For non-NCQ command without error, PxCI bit should be cleared.
+     * For NCQ command without error, PxCI bit should be cleared.
+     * For NCQ command with error, PxCI bit may or may not be cleared. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_CI);
-    ASSERT_BIT_CLEAR(reg, (1 << slot));
+    if (!cmd->props->ncq && cmd->errors) {
+        ASSERT_BIT_SET(reg, (1 << slot));
+    } else if (!cmd->errors) {
+        ASSERT_BIT_CLEAR(reg, (1 << slot));
+    }
 
     /* And assert that we are generally not busy. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD);
@@ -1207,9 +1260,10 @@ void ahci_command_wait(AHCIQState *ahci, AHCICommand *cmd)
 
 #define RSET(REG, MASK) (BITSET(ahci_px_rreg(ahci, cmd->port, (REG)), (MASK)))
 
-    while (RSET(AHCI_PX_TFD, AHCI_PX_TFD_STS_BSY) ||
-           RSET(AHCI_PX_CI, 1 << cmd->slot) ||
-           (cmd->props->ncq && RSET(AHCI_PX_SACT, 1 << cmd->slot))) {
+    while (!RSET(AHCI_PX_TFD, AHCI_PX_TFD_STS_ERR) &&
+           (RSET(AHCI_PX_TFD, AHCI_PX_TFD_STS_BSY) ||
+            RSET(AHCI_PX_CI, 1 << cmd->slot) ||
+            (cmd->props->ncq && RSET(AHCI_PX_SACT, 1 << cmd->slot)))) {
         usleep(50);
     }
 
@@ -1226,9 +1280,9 @@ void ahci_command_verify(AHCIQState *ahci, AHCICommand *cmd)
     uint8_t slot = cmd->slot;
     uint8_t port = cmd->port;
 
-    ahci_port_check_error(ahci, port, cmd->interrupts, cmd->errors);
-    ahci_port_check_interrupts(ahci, port, cmd->interrupts);
-    ahci_port_check_nonbusy(ahci, port, slot);
+    ahci_port_check_nonbusy(ahci, cmd);
+    ahci_port_check_error(ahci, cmd);
+    ahci_port_check_interrupts(ahci, cmd);
     ahci_port_check_cmd_sanity(ahci, cmd);
     if (cmd->interrupts & AHCI_PX_IS_DHRS) {
         ahci_port_check_d2h_sanity(ahci, port, slot);
diff --git a/tests/qtest/libqos/ahci.h b/tests/qtest/libqos/ahci.h
index 88835b6228..48017864bf 100644
--- a/tests/qtest/libqos/ahci.h
+++ b/tests/qtest/libqos/ahci.h
@@ -590,11 +590,9 @@ void ahci_set_command_header(AHCIQState *ahci, uint8_t port,
 void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot);
 
 /* AHCI sanity check routines */
-void ahci_port_check_error(AHCIQState *ahci, uint8_t port,
-                           uint32_t imask, uint8_t emask);
-void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port,
-                                uint32_t intr_mask);
-void ahci_port_check_nonbusy(AHCIQState *ahci, uint8_t port, uint8_t slot);
+void ahci_port_check_error(AHCIQState *ahci, AHCICommand *cmd);
+void ahci_port_check_interrupts(AHCIQState *ahci, AHCICommand *cmd);
+void ahci_port_check_nonbusy(AHCIQState *ahci, AHCICommand *cmd);
 void ahci_port_check_d2h_sanity(AHCIQState *ahci, uint8_t port, uint8_t slot);
 void ahci_port_check_pio_sanity(AHCIQState *ahci, AHCICommand *cmd);
 void ahci_port_check_cmd_sanity(AHCIQState *ahci, AHCICommand *cmd);
diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c
index a603468beb..f40c4ec4cd 100644
--- a/tests/qtest/libqos/igb.c
+++ b/tests/qtest/libqos/igb.c
@@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
                         E1000_RAH_AV | E1000_RAH_POOL_1 |
                         le16_to_cpu(*(uint16_t *)(address + 4)));
 
+    /* Set supported receive descriptor mode */
+    e1000e_macreg_write(&d->e1000e,
+                        E1000_SRRCTL(0),
+                        E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
+
     /* Enable receive */
     e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
     e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c
index 0a2dddfafa..2029bf9804 100644
--- a/tests/qtest/libqos/qgraph.c
+++ b/tests/qtest/libqos/qgraph.c
@@ -54,7 +54,7 @@ struct QOSStackElement {
     int length;
 };
 
-/* Each enty in these hash table will consist of <string, node/edge> pair. */
+/* Each entry in these hash table will consist of <string, node/edge> pair. */
 static GHashTable *edge_table;
 static GHashTable *node_table;
 
@@ -214,7 +214,7 @@ static QOSGraphEdge *search_list_edges(QOSGraphEdgeList *edgelist,
 /**
  * search_machine(): search for a machine @name in the node hash
  * table. A machine is the child of the root node.
- * This function forces the research in the childs of the root,
+ * This function forces the research in the children of the root,
  * to check the node is a proper machine
  *
  * Returns: on success: the %QOSGraphNode
diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h
index 7d62fd17af..87fab1f9f0 100644
--- a/tests/qtest/libqos/qgraph_internal.h
+++ b/tests/qtest/libqos/qgraph_internal.h
@@ -197,7 +197,7 @@ char *qos_graph_edge_get_name(QOSGraphEdge *edge);
  * qos_graph_get_machine(): returns the machine assigned
  * to that @node name.
  *
- * It performs a search only trough the list of machines
+ * It performs a search only through the list of machines
  * (i.e. the QOS_ROOT child).
  *
  * Returns: on success: the %QOSGraphNode
diff --git a/tests/qtest/libqos/virtio-gpio.c b/tests/qtest/libqos/virtio-gpio.c
index f22d7b5eb5..9220d287fe 100644
--- a/tests/qtest/libqos/virtio-gpio.c
+++ b/tests/qtest/libqos/virtio-gpio.c
@@ -28,7 +28,7 @@ static void virtio_gpio_cleanup(QVhostUserGPIO *gpio)
 
 /*
  * This handles the VirtIO setup from the point of view of the driver
- * frontend and therefor doesn't present any vhost specific features
+ * frontend and therefore doesn't present any vhost specific features
  * and in fact masks of the re-used bit.
  */
 static void virtio_gpio_setup(QVhostUserGPIO *gpio)
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 471529e6cc..b1eba71ffe 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -24,6 +24,9 @@
 #ifdef __linux__
 #include <sys/prctl.h>
 #endif /* __linux__ */
+#ifdef __FreeBSD__
+#include <sys/procctl.h>
+#endif /* __FreeBSD__ */
 
 #include "libqtest.h"
 #include "libqmp.h"
@@ -112,7 +115,7 @@ static int socket_accept(int sock)
     socklen_t addrlen;
     int ret;
     /*
-     * timeout unit of blocking receive calls is different among platfoms.
+     * timeout unit of blocking receive calls is different among platforms.
      * It's in seconds on non-Windows platforms but milliseconds on Windows.
      */
 #ifndef _WIN32
@@ -414,6 +417,10 @@ static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...)
          */
         prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
 #endif /* __linux__ */
+#ifdef __FreeBSD__
+        int sig = SIGKILL;
+        procctl(P_PID, getpid(), PROC_PDEATHSIG_CTL, &sig);
+#endif /* __FreeBSD__ */
         if (!g_setenv("QEMU_AUDIO_DRV", "none", true)) {
             exit(1);
         }
@@ -1697,7 +1704,7 @@ QTestState *qtest_inproc_init(QTestState **s, bool log, const char* arch,
 
     qtest_client_set_rx_handler(qts, qtest_client_inproc_recv_line);
 
-    /* send() may not have a matching protoype, so use a type-safe wrapper */
+    /* send() may not have a matching prototype, so use a type-safe wrapper */
     qts->ops.external_send = send;
     qtest_client_set_tx_handler(qts, send_wrapper);
 
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b071d400b3..1fba07f4ed 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -38,8 +38,8 @@ qtests_cxl = \
 #        for the availability of the default NICs in the tests
 qtests_filter = \
   (get_option('default_devices') and slirp.found() ? ['test-netfilter'] : []) + \
-  (get_option('default_devices') and config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \
-  (get_option('default_devices') and config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : [])
+  (get_option('default_devices') and targetos != 'windows' ? ['test-filter-mirror'] : []) + \
+  (get_option('default_devices') and targetos != 'windows' ? ['test-filter-redirector'] : [])
 
 qtests_i386 = \
   (slirp.found() ? ['pxe-test'] : []) + \
@@ -48,7 +48,7 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +           \
   (config_all_devices.has_key('CONFIG_SGA') ? ['boot-serial-test'] : []) +                  \
   (config_all_devices.has_key('CONFIG_ISA_IPMI_KCS') ? ['ipmi-kcs-test'] : []) +            \
-  (config_host.has_key('CONFIG_LINUX') and                                                  \
+  (targetos == 'linux' and                                                                  \
    config_all_devices.has_key('CONFIG_ISA_IPMI_BT') and
    config_all_devices.has_key('CONFIG_IPMI_EXTERN') ? ['ipmi-bt-test'] : []) +              \
   (config_all_devices.has_key('CONFIG_WDT_IB700') ? ['wdt_ib700-test'] : []) +              \
@@ -74,7 +74,7 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) +                   \
   (config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) +            \
   (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) +                 \
-  (config_host.has_key('CONFIG_POSIX') and                                                  \
+  (targetos != 'windows' and                                                                \
    config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) +                   \
   (config_all_devices.has_key('CONFIG_PCIE_PORT') and                                       \
    config_all_devices.has_key('CONFIG_VIRTIO_NET') and                                      \
@@ -155,8 +155,8 @@ qtests_ppc = \
   qtests_filter + \
   (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +            \
   (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) +                     \
-  (config_all_devices.has_key('CONFIG_TCG') ? ['prom-env-test'] : []) +                      \
-  (config_all_devices.has_key('CONFIG_TCG') ? ['boot-serial-test'] : []) +                   \
+  (config_all.has_key('CONFIG_TCG') ? ['prom-env-test'] : []) +                              \
+  (config_all.has_key('CONFIG_TCG') ? ['boot-serial-test'] : []) +                           \
   ['boot-order-test']
 
 qtests_ppc64 = \
@@ -269,13 +269,14 @@ qos_test_ss.add(
   'virtio-iommu-test.c',
   'vmxnet3-test.c',
   'igb-test.c',
+  'ufs-test.c',
 )
 
 if config_all_devices.has_key('CONFIG_VIRTIO_SERIAL')
   qos_test_ss.add(files('virtio-serial-test.c'))
 endif
 
-if config_host.has_key('CONFIG_POSIX')
+if targetos != 'windows'
   qos_test_ss.add(files('e1000e-test.c'))
 endif
 if have_virtfs
@@ -308,7 +309,7 @@ qtests = {
   'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'],
   'migration-test': migration_files,
   'pxe-test': files('boot-sector.c'),
-  'qos-test': [chardev, io, qos_test_ss.apply(config_host, strict: false).sources()],
+  'qos-test': [chardev, io, qos_test_ss.apply(config_targetos, strict: false).sources()],
   'tpm-crb-swtpm-test': [io, tpmemu_files],
   'tpm-crb-test': [io, tpmemu_files],
   'tpm-tis-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'],
@@ -321,7 +322,7 @@ qtests = {
 }
 
 if vnc.found()
-  gvnc = dependency('gvnc-1.0', required: false)
+  gvnc = dependency('gvnc-1.0', method: 'pkg-config', required: false)
   if gvnc.found()
     qtests += {'vnc-display-test': [gvnc]}
     qtests_generic += [ 'vnc-display-test' ]
diff --git a/tests/qtest/microbit-test.c b/tests/qtest/microbit-test.c
index 2abcad8e31..72190d38f7 100644
--- a/tests/qtest/microbit-test.c
+++ b/tests/qtest/microbit-test.c
@@ -434,6 +434,8 @@ static void test_nrf51_gpio_detect(void)
     g_assert_true(qtest_get_irq(qts, 0));
     qtest_set_irq_in(qts, "/machine/nrf51", "unnamed-gpio-in", 3, 0);
     g_assert_true(qtest_get_irq(qts, 0));
+
+    qtest_quit(qts);
 }
 
 static void timer_task(QTestState *qts, hwaddr task)
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 62d3f37021..1b43df5ca7 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -486,7 +486,7 @@ static void migrate_ensure_converge(QTestState *who)
  * transferred.
  *
  * Finally we go back to the source and read a byte just
- * before the marker untill we see it flip in value. This
+ * before the marker until we see it flip in value. This
  * is proof that start_address -> MAGIC_OFFSET_BASE
  * is now dirty again.
  *
@@ -826,7 +826,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
 
     /*
      * Remove shmem file immediately to avoid memory leak in test failed case.
-     * It's valid becase QEMU has already opened this file
+     * It's valid because QEMU has already opened this file
      */
     if (args->use_shmem) {
         unlink(shmem_path);
@@ -2103,7 +2103,7 @@ static void test_migrate_auto_converge(void)
 
     /*
      * We want the test to be stable and as fast as possible.
-     * E.g., with 1Gb/s bandwith migration may pass without throttling,
+     * E.g., with 1Gb/s bandwidth migration may pass without throttling,
      * so we need to decrease a bandwidth.
      */
     const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;
diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c
index 097abc0230..8eed54801f 100644
--- a/tests/qtest/netdev-socket.c
+++ b/tests/qtest/netdev-socket.c
@@ -82,7 +82,7 @@ static int inet_get_free_port_socket_ipv6(int sock)
 
 static int inet_get_free_port_multiple(int nb, int *port, bool ipv6)
 {
-    int sock[nb];
+    g_autofree int *sock = g_new(int, nb);
     int i;
 
     for (i = 0; i < nb; i++) {
diff --git a/tests/qtest/npcm7xx_timer-test.c b/tests/qtest/npcm7xx_timer-test.c
index 83774a5b90..43711049ca 100644
--- a/tests/qtest/npcm7xx_timer-test.c
+++ b/tests/qtest/npcm7xx_timer-test.c
@@ -384,7 +384,7 @@ static void test_pause_resume(gconstpointer test_data)
     g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td)));
 }
 
-/* Verifies that the prescaler can be changed while the timer is runnin. */
+/* Verifies that the prescaler can be changed while the timer is running. */
 static void test_prescaler_change(gconstpointer test_data)
 {
     const TestData *td = test_data;
diff --git a/tests/qtest/pflash-cfi02-test.c b/tests/qtest/pflash-cfi02-test.c
index 0b52c2ca5c..8c073efcb4 100644
--- a/tests/qtest/pflash-cfi02-test.c
+++ b/tests/qtest/pflash-cfi02-test.c
@@ -406,7 +406,7 @@ static void test_geometry(const void *opaque)
 
     for (int region = 0; region < nb_erase_regions; ++region) {
         for (uint32_t i = 0; i < c->nb_blocs[region]; ++i) {
-            uint64_t byte_addr = (uint64_t)i * c->sector_len[region];
+            byte_addr = (uint64_t)i * c->sector_len[region];
             g_assert_cmphex(flash_read(c, byte_addr), ==, bank_mask(c));
         }
     }
diff --git a/tests/qtest/test-hmp.c b/tests/qtest/test-hmp.c
index 6704be239b..fc9125f8bb 100644
--- a/tests/qtest/test-hmp.c
+++ b/tests/qtest/test-hmp.c
@@ -45,9 +45,9 @@ static const char *hmp_cmds[] = {
     "log all",
     "log none",
     "memsave 0 4096 \"/dev/null\"",
-    "migrate_set_parameter xbzrle_cache_size 1",
-    "migrate_set_parameter downtime_limit 1",
-    "migrate_set_parameter max_bandwidth 1",
+    "migrate_set_parameter xbzrle-cache-size 64k",
+    "migrate_set_parameter downtime-limit 1",
+    "migrate_set_parameter max-bandwidth 1",
     "netdev_add user,id=net1",
     "set_link net1 off",
     "set_link net1 on",
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index f05fe12f01..2bf8ff4c86 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -77,7 +77,7 @@ static void *tpm_emu_tpm_thread(void *data)
             s->tpm_msg->code = cpu_to_be32(TPM_FAIL);
             break;
         default:
-            g_debug("unsupport TPM version %u", s->tpm_version);
+            g_debug("unsupported TPM version %u", s->tpm_version);
             g_assert_not_reached();
         }
         qio_channel_write(ioc, (char *)s->tpm_msg, be32_to_cpu(s->tpm_msg->len),
diff --git a/tests/qtest/tpm-tests.c b/tests/qtest/tpm-tests.c
index 25073d1f9e..fb94496bbd 100644
--- a/tests/qtest/tpm-tests.c
+++ b/tests/qtest/tpm-tests.c
@@ -1,5 +1,5 @@
 /*
- * QTest TPM commont test code
+ * QTest TPM common test code
  *
  * Copyright (c) 2018 IBM Corporation
  * Copyright (c) 2018 Red Hat, Inc.
diff --git a/tests/qtest/tpm-tests.h b/tests/qtest/tpm-tests.h
index a5df35ab5b..07ba60d26e 100644
--- a/tests/qtest/tpm-tests.h
+++ b/tests/qtest/tpm-tests.h
@@ -1,5 +1,5 @@
 /*
- * QTest TPM commont test code
+ * QTest TPM common test code
  *
  * Copyright (c) 2018 IBM Corporation
  *
diff --git a/tests/qtest/tpm-tis-i2c-test.c b/tests/qtest/tpm-tis-i2c-test.c
index 7a590ac551..3a1af026f2 100644
--- a/tests/qtest/tpm-tis-i2c-test.c
+++ b/tests/qtest/tpm-tis-i2c-test.c
@@ -468,7 +468,7 @@ static void tpm_tis_i2c_test_check_access_reg_release(const void *data)
                            TPM_TIS_ACCESS_ACTIVE_LOCALITY);
         /*
          * highest locality should now be active; release it and make sure the
-         * next higest locality is active afterwards
+         * next highest locality is active afterwards
          */
         for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) {
             if (l == locty) {
diff --git a/tests/qtest/tpm-tis-util.c b/tests/qtest/tpm-tis-util.c
index 728cd3e065..862bb53248 100644
--- a/tests/qtest/tpm-tis-util.c
+++ b/tests/qtest/tpm-tis-util.c
@@ -340,7 +340,7 @@ void tpm_tis_test_check_access_reg_release(const void *data)
                TPM_TIS_ACCESS_ACTIVE_LOCALITY);
         /*
          * highest locality should now be active; release it and make sure the
-         * next higest locality is active afterwards
+         * next highest locality is active afterwards
          */
         for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) {
             if (l == locty) {
diff --git a/tests/qtest/ufs-test.c b/tests/qtest/ufs-test.c
new file mode 100644
index 0000000000..ed3dbca154
--- /dev/null
+++ b/tests/qtest/ufs-test.c
@@ -0,0 +1,587 @@
+/*
+ * QTest testcase for UFS
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "libqtest.h"
+#include "libqos/qgraph.h"
+#include "libqos/pci.h"
+#include "scsi/constants.h"
+#include "include/block/ufs.h"
+
+/* Test images sizes in Bytes */
+#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
+/* Timeout for various operations, in seconds. */
+#define TIMEOUT_SECONDS 10
+/* Maximum PRD entry count */
+#define MAX_PRD_ENTRY_COUNT 10
+#define PRD_ENTRY_DATA_SIZE 4096
+/* Constants to build upiu */
+#define UTP_COMMAND_DESCRIPTOR_SIZE 4096
+#define UTP_RESPONSE_UPIU_OFFSET 1024
+#define UTP_PRDT_UPIU_OFFSET 2048
+
+typedef struct QUfs QUfs;
+
+struct QUfs {
+    QOSGraphObject obj;
+    QPCIDevice dev;
+    QPCIBar bar;
+
+    uint64_t utrlba;
+    uint64_t utmrlba;
+    uint64_t cmd_desc_addr;
+    uint64_t data_buffer_addr;
+
+    bool enabled;
+};
+
+static inline uint32_t ufs_rreg(QUfs *ufs, size_t offset)
+{
+    return qpci_io_readl(&ufs->dev, ufs->bar, offset);
+}
+
+static inline void ufs_wreg(QUfs *ufs, size_t offset, uint32_t value)
+{
+    qpci_io_writel(&ufs->dev, ufs->bar, offset, value);
+}
+
+static void ufs_wait_for_irq(QUfs *ufs)
+{
+    uint64_t end_time;
+    uint32_t is;
+    /* Wait for device to reset as the linux driver does. */
+    end_time = g_get_monotonic_time() + TIMEOUT_SECONDS * G_TIME_SPAN_SECOND;
+    do {
+        qtest_clock_step(ufs->dev.bus->qts, 100);
+        is = ufs_rreg(ufs, A_IS);
+    } while (is == 0 && g_get_monotonic_time() < end_time);
+}
+
+static UtpTransferReqDesc ufs_build_req_utrd(uint64_t cmd_desc_addr,
+                                             uint8_t slot,
+                                             uint32_t data_direction,
+                                             uint16_t prd_table_length)
+{
+    UtpTransferReqDesc req = { 0 };
+    uint64_t command_desc_base_addr =
+        cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+
+    req.header.dword_0 =
+        cpu_to_le32(1 << 28 | data_direction | UFS_UTP_REQ_DESC_INT_CMD);
+    req.header.dword_2 = cpu_to_le32(UFS_OCS_INVALID_COMMAND_STATUS);
+
+    req.command_desc_base_addr_hi = cpu_to_le32(command_desc_base_addr >> 32);
+    req.command_desc_base_addr_lo =
+        cpu_to_le32(command_desc_base_addr & 0xffffffff);
+    req.response_upiu_offset =
+        cpu_to_le16(UTP_RESPONSE_UPIU_OFFSET / sizeof(uint32_t));
+    req.response_upiu_length = cpu_to_le16(sizeof(UtpUpiuRsp));
+    req.prd_table_offset = cpu_to_le16(UTP_PRDT_UPIU_OFFSET / sizeof(uint32_t));
+    req.prd_table_length = cpu_to_le16(prd_table_length);
+    return req;
+}
+
+static void ufs_send_nop_out(QUfs *ufs, uint8_t slot,
+                             UtpTransferReqDesc *utrd_out, UtpUpiuRsp *rsp_out)
+{
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd = ufs_build_req_utrd(ufs->cmd_desc_addr, slot,
+                                                 UFS_UTP_NO_DATA_TRANSFER, 0);
+    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+    uint64_t req_upiu_addr =
+        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+    uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
+    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
+
+    /* Build up request upiu */
+    UtpUpiuReq req_upiu = { 0 };
+    req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_NOP_OUT;
+    req_upiu.header.task_tag = slot;
+    qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
+                   sizeof(req_upiu));
+
+    /* Ring Doorbell */
+    ufs_wreg(ufs, A_UTRLDBR, 1);
+    ufs_wait_for_irq(ufs);
+    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+
+    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
+    qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
+}
+
+static void ufs_send_query(QUfs *ufs, uint8_t slot, uint8_t query_function,
+                           uint8_t query_opcode, uint8_t idn, uint8_t index,
+                           UtpTransferReqDesc *utrd_out, UtpUpiuRsp *rsp_out)
+{
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd = ufs_build_req_utrd(ufs->cmd_desc_addr, slot,
+                                                 UFS_UTP_NO_DATA_TRANSFER, 0);
+    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+    uint64_t req_upiu_addr =
+        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+    uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
+    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
+
+    /* Build up request upiu */
+    UtpUpiuReq req_upiu = { 0 };
+    req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_QUERY_REQ;
+    req_upiu.header.query_func = query_function;
+    req_upiu.header.task_tag = slot;
+    /*
+     * QEMU UFS does not currently support Write descriptor and Write attribute,
+     * so the value of data_segment_length is always 0.
+     */
+    req_upiu.header.data_segment_length = 0;
+    req_upiu.qr.opcode = query_opcode;
+    req_upiu.qr.idn = idn;
+    req_upiu.qr.index = index;
+    qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
+                   sizeof(req_upiu));
+
+    /* Ring Doorbell */
+    ufs_wreg(ufs, A_UTRLDBR, 1);
+    ufs_wait_for_irq(ufs);
+    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+
+    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
+    qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
+}
+
+static void ufs_send_scsi_command(QUfs *ufs, uint8_t slot, uint8_t lun,
+                                  const uint8_t *cdb, const uint8_t *data_in,
+                                  size_t data_in_len, uint8_t *data_out,
+                                  size_t data_out_len,
+                                  UtpTransferReqDesc *utrd_out,
+                                  UtpUpiuRsp *rsp_out)
+
+{
+    /* Build up PRDT */
+    UfshcdSgEntry entries[MAX_PRD_ENTRY_COUNT] = {
+        0,
+    };
+    uint8_t flags;
+    uint16_t prd_table_length, i;
+    uint32_t data_direction, data_len;
+    uint64_t req_upiu_addr =
+        ufs->cmd_desc_addr + slot * UTP_COMMAND_DESCRIPTOR_SIZE;
+    uint64_t prdt_addr = req_upiu_addr + UTP_PRDT_UPIU_OFFSET;
+
+    g_assert_true(data_in_len < MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
+    g_assert_true(data_out_len < MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
+    if (data_in_len > 0) {
+        g_assert_nonnull(data_in);
+        data_direction = UFS_UTP_HOST_TO_DEVICE;
+        data_len = data_in_len;
+        flags = UFS_UPIU_CMD_FLAGS_WRITE;
+    } else if (data_out_len > 0) {
+        g_assert_nonnull(data_out);
+        data_direction = UFS_UTP_DEVICE_TO_HOST;
+        data_len = data_out_len;
+        flags = UFS_UPIU_CMD_FLAGS_READ;
+    } else {
+        data_direction = UFS_UTP_NO_DATA_TRANSFER;
+        data_len = 0;
+        flags = UFS_UPIU_CMD_FLAGS_NONE;
+    }
+    prd_table_length = DIV_ROUND_UP(data_len, PRD_ENTRY_DATA_SIZE);
+
+    qtest_memset(ufs->dev.bus->qts, ufs->data_buffer_addr, 0,
+                 MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
+    if (data_in_len) {
+        qtest_memwrite(ufs->dev.bus->qts, ufs->data_buffer_addr, data_in,
+                       data_in_len);
+    }
+
+    for (i = 0; i < prd_table_length; i++) {
+        entries[i].addr =
+            cpu_to_le64(ufs->data_buffer_addr + i * sizeof(UfshcdSgEntry));
+        if (i + 1 != prd_table_length) {
+            entries[i].size = cpu_to_le32(PRD_ENTRY_DATA_SIZE - 1);
+        } else {
+            entries[i].size = cpu_to_le32(
+                data_len - (PRD_ENTRY_DATA_SIZE * (prd_table_length - 1)) - 1);
+        }
+    }
+    qtest_memwrite(ufs->dev.bus->qts, prdt_addr, entries,
+                   prd_table_length * sizeof(UfshcdSgEntry));
+
+    /* Build up utp transfer request descriptor */
+    UtpTransferReqDesc utrd = ufs_build_req_utrd(
+        ufs->cmd_desc_addr, slot, data_direction, prd_table_length);
+    uint64_t utrd_addr = ufs->utrlba + slot * sizeof(UtpTransferReqDesc);
+    uint64_t rsp_upiu_addr = req_upiu_addr + UTP_RESPONSE_UPIU_OFFSET;
+    qtest_memwrite(ufs->dev.bus->qts, utrd_addr, &utrd, sizeof(utrd));
+
+    /* Build up request upiu */
+    UtpUpiuReq req_upiu = { 0 };
+    req_upiu.header.trans_type = UFS_UPIU_TRANSACTION_COMMAND;
+    req_upiu.header.flags = flags;
+    req_upiu.header.lun = lun;
+    req_upiu.header.task_tag = slot;
+    req_upiu.sc.exp_data_transfer_len = cpu_to_be32(data_len);
+    memcpy(req_upiu.sc.cdb, cdb, UFS_CDB_SIZE);
+    qtest_memwrite(ufs->dev.bus->qts, req_upiu_addr, &req_upiu,
+                   sizeof(req_upiu));
+
+    /* Ring Doorbell */
+    ufs_wreg(ufs, A_UTRLDBR, 1);
+    ufs_wait_for_irq(ufs);
+    g_assert_true(FIELD_EX32(ufs_rreg(ufs, A_IS), IS, UTRCS));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UTRCS, 1));
+
+    qtest_memread(ufs->dev.bus->qts, utrd_addr, utrd_out, sizeof(*utrd_out));
+    qtest_memread(ufs->dev.bus->qts, rsp_upiu_addr, rsp_out, sizeof(*rsp_out));
+    if (data_out_len) {
+        qtest_memread(ufs->dev.bus->qts, ufs->data_buffer_addr, data_out,
+                      data_out_len);
+    }
+}
+
+/**
+ * Initialize Ufs host controller and logical unit.
+ * After running this function, you can make a transfer request to the UFS.
+ */
+static void ufs_init(QUfs *ufs, QGuestAllocator *alloc)
+{
+    uint64_t end_time;
+    uint32_t nutrs, nutmrs;
+    uint32_t hcs, is, ucmdarg2, cap;
+    uint32_t hce = 0, ie = 0;
+    UtpTransferReqDesc utrd;
+    UtpUpiuRsp rsp_upiu;
+
+    ufs->bar = qpci_iomap(&ufs->dev, 0, NULL);
+    qpci_device_enable(&ufs->dev);
+
+    /* Start host controller initialization */
+    hce = FIELD_DP32(hce, HCE, HCE, 1);
+    ufs_wreg(ufs, A_HCE, hce);
+
+    /* Wait for device to reset */
+    end_time = g_get_monotonic_time() + TIMEOUT_SECONDS * G_TIME_SPAN_SECOND;
+    do {
+        qtest_clock_step(ufs->dev.bus->qts, 100);
+        hce = FIELD_EX32(ufs_rreg(ufs, A_HCE), HCE, HCE);
+    } while (hce == 0 && g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(hce, ==, 1);
+
+    /* Enable interrupt */
+    ie = FIELD_DP32(ie, IE, UCCE, 1);
+    ie = FIELD_DP32(ie, IE, UHESE, 1);
+    ie = FIELD_DP32(ie, IE, UHXSE, 1);
+    ie = FIELD_DP32(ie, IE, UPMSE, 1);
+    ufs_wreg(ufs, A_IE, ie);
+
+    /* Send DME_LINK_STARTUP uic command */
+    hcs = ufs_rreg(ufs, A_HCS);
+    g_assert_true(FIELD_EX32(hcs, HCS, UCRDY));
+
+    ufs_wreg(ufs, A_UCMDARG1, 0);
+    ufs_wreg(ufs, A_UCMDARG2, 0);
+    ufs_wreg(ufs, A_UCMDARG3, 0);
+    ufs_wreg(ufs, A_UICCMD, UFS_UIC_CMD_DME_LINK_STARTUP);
+
+    is = ufs_rreg(ufs, A_IS);
+    g_assert_true(FIELD_EX32(is, IS, UCCS));
+    ufs_wreg(ufs, A_IS, FIELD_DP32(0, IS, UCCS, 1));
+
+    ucmdarg2 = ufs_rreg(ufs, A_UCMDARG2);
+    g_assert_cmpuint(ucmdarg2, ==, 0);
+    is = ufs_rreg(ufs, A_IS);
+    g_assert_cmpuint(is, ==, 0);
+    hcs = ufs_rreg(ufs, A_HCS);
+    g_assert_true(FIELD_EX32(hcs, HCS, DP));
+    g_assert_true(FIELD_EX32(hcs, HCS, UTRLRDY));
+    g_assert_true(FIELD_EX32(hcs, HCS, UTMRLRDY));
+    g_assert_true(FIELD_EX32(hcs, HCS, UCRDY));
+
+    /* Enable all interrupt functions */
+    ie = FIELD_DP32(ie, IE, UTRCE, 1);
+    ie = FIELD_DP32(ie, IE, UEE, 1);
+    ie = FIELD_DP32(ie, IE, UPMSE, 1);
+    ie = FIELD_DP32(ie, IE, UHXSE, 1);
+    ie = FIELD_DP32(ie, IE, UHESE, 1);
+    ie = FIELD_DP32(ie, IE, UTMRCE, 1);
+    ie = FIELD_DP32(ie, IE, UCCE, 1);
+    ie = FIELD_DP32(ie, IE, DFEE, 1);
+    ie = FIELD_DP32(ie, IE, HCFEE, 1);
+    ie = FIELD_DP32(ie, IE, SBFEE, 1);
+    ie = FIELD_DP32(ie, IE, CEFEE, 1);
+    ufs_wreg(ufs, A_IE, ie);
+    ufs_wreg(ufs, A_UTRIACR, 0);
+
+    /* Enable tranfer request and task management request */
+    cap = ufs_rreg(ufs, A_CAP);
+    nutrs = FIELD_EX32(cap, CAP, NUTRS) + 1;
+    nutmrs = FIELD_EX32(cap, CAP, NUTMRS) + 1;
+    ufs->cmd_desc_addr =
+        guest_alloc(alloc, nutrs * UTP_COMMAND_DESCRIPTOR_SIZE);
+    ufs->data_buffer_addr =
+        guest_alloc(alloc, MAX_PRD_ENTRY_COUNT * PRD_ENTRY_DATA_SIZE);
+    ufs->utrlba = guest_alloc(alloc, nutrs * sizeof(UtpTransferReqDesc));
+    ufs->utmrlba = guest_alloc(alloc, nutmrs * sizeof(UtpTaskReqDesc));
+
+    ufs_wreg(ufs, A_UTRLBA, ufs->utrlba & 0xffffffff);
+    ufs_wreg(ufs, A_UTRLBAU, ufs->utrlba >> 32);
+    ufs_wreg(ufs, A_UTMRLBA, ufs->utmrlba & 0xffffffff);
+    ufs_wreg(ufs, A_UTMRLBAU, ufs->utmrlba >> 32);
+    ufs_wreg(ufs, A_UTRLRSR, 1);
+    ufs_wreg(ufs, A_UTMRLRSR, 1);
+
+    /* Send nop out to test transfer request */
+    ufs_send_nop_out(ufs, 0, &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+
+    /* Set fDeviceInit flag via query request */
+    ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST,
+                   UFS_UPIU_QUERY_OPCODE_SET_FLAG,
+                   UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+
+    /* Wait for device to reset */
+    end_time = g_get_monotonic_time() + TIMEOUT_SECONDS * G_TIME_SPAN_SECOND;
+    do {
+        qtest_clock_step(ufs->dev.bus->qts, 100);
+        ufs_send_query(ufs, 0, UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST,
+                       UFS_UPIU_QUERY_OPCODE_READ_FLAG,
+                       UFS_QUERY_FLAG_IDN_FDEVICEINIT, 0, &utrd, &rsp_upiu);
+    } while (be32_to_cpu(rsp_upiu.qr.value) != 0 &&
+             g_get_monotonic_time() < end_time);
+    g_assert_cmpuint(be32_to_cpu(rsp_upiu.qr.value), ==, 0);
+
+    ufs->enabled = true;
+}
+
+static void ufs_exit(QUfs *ufs, QGuestAllocator *alloc)
+{
+    if (ufs->enabled) {
+        guest_free(alloc, ufs->utrlba);
+        guest_free(alloc, ufs->utmrlba);
+        guest_free(alloc, ufs->cmd_desc_addr);
+        guest_free(alloc, ufs->data_buffer_addr);
+    }
+
+    qpci_iounmap(&ufs->dev, ufs->bar);
+}
+
+static void *ufs_get_driver(void *obj, const char *interface)
+{
+    QUfs *ufs = obj;
+
+    if (!g_strcmp0(interface, "pci-device")) {
+        return &ufs->dev;
+    }
+
+    fprintf(stderr, "%s not present in ufs\n", interface);
+    g_assert_not_reached();
+}
+
+static void *ufs_create(void *pci_bus, QGuestAllocator *alloc, void *addr)
+{
+    QUfs *ufs = g_new0(QUfs, 1);
+    QPCIBus *bus = pci_bus;
+
+    qpci_device_init(&ufs->dev, bus, addr);
+    ufs->obj.get_driver = ufs_get_driver;
+
+    return &ufs->obj;
+}
+
+static void ufstest_reg_read(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QUfs *ufs = obj;
+    uint32_t cap;
+
+    ufs->bar = qpci_iomap(&ufs->dev, 0, NULL);
+    qpci_device_enable(&ufs->dev);
+
+    cap = ufs_rreg(ufs, A_CAP);
+    g_assert_cmpuint(FIELD_EX32(cap, CAP, NUTRS), ==, 31);
+    g_assert_cmpuint(FIELD_EX32(cap, CAP, NUTMRS), ==, 7);
+    g_assert_cmpuint(FIELD_EX32(cap, CAP, 64AS), ==, 1);
+
+    qpci_iounmap(&ufs->dev, ufs->bar);
+}
+
+static void ufstest_init(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QUfs *ufs = obj;
+
+    uint8_t buf[4096] = { 0 };
+    const uint8_t report_luns_cdb[UFS_CDB_SIZE] = {
+        /* allocation length 4096 */
+        REPORT_LUNS, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00,        0x00, 0x10, 0x00, 0x00, 0x00
+    };
+    const uint8_t test_unit_ready_cdb[UFS_CDB_SIZE] = {
+        TEST_UNIT_READY,
+    };
+    UtpTransferReqDesc utrd;
+    UtpUpiuRsp rsp_upiu;
+
+    ufs_init(ufs, alloc);
+
+    /* Check REPORT_LUNS */
+    ufs_send_scsi_command(ufs, 0, 0, report_luns_cdb, NULL, 0, buf, sizeof(buf),
+                          &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, GOOD);
+    /* LUN LIST LENGTH should be 8, in big endian */
+    g_assert_cmpuint(buf[3], ==, 8);
+    /* There is one logical unit whose lun is 0 */
+    g_assert_cmpuint(buf[9], ==, 0);
+
+    /* Check TEST_UNIT_READY */
+    ufs_send_scsi_command(ufs, 0, 0, test_unit_ready_cdb, NULL, 0, NULL, 0,
+                          &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.scsi_status, ==, GOOD);
+
+    ufs_exit(ufs, alloc);
+}
+
+static void ufstest_read_write(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QUfs *ufs = obj;
+    uint8_t read_buf[4096] = { 0 };
+    uint8_t write_buf[4096] = { 0 };
+    const uint8_t read_capacity_cdb[UFS_CDB_SIZE] = {
+        /* allocation length 4096 */
+        SERVICE_ACTION_IN_16,
+        SAI_READ_CAPACITY_16,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x00,
+        0x10,
+        0x00,
+        0x00,
+        0x00
+    };
+    const uint8_t read_cdb[UFS_CDB_SIZE] = {
+        /* READ(10) to LBA 0, transfer length 1 */
+        READ_10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
+    };
+    const uint8_t write_cdb[UFS_CDB_SIZE] = {
+        /* WRITE(10) to LBA 0, transfer length 1 */
+        WRITE_10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00
+    };
+    uint32_t block_size;
+    UtpTransferReqDesc utrd;
+    UtpUpiuRsp rsp_upiu;
+
+    ufs_init(ufs, alloc);
+
+    /* Read capacity */
+    ufs_send_scsi_command(ufs, 0, 1, read_capacity_cdb, NULL, 0, read_buf,
+                          sizeof(read_buf), &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
+                     UFS_COMMAND_RESULT_SUCESS);
+    block_size = ldl_be_p(&read_buf[8]);
+    g_assert_cmpuint(block_size, ==, 4096);
+
+    /* Write data */
+    memset(write_buf, rand() % 255 + 1, block_size);
+    ufs_send_scsi_command(ufs, 0, 1, write_cdb, write_buf, block_size, NULL, 0,
+                          &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
+                     UFS_COMMAND_RESULT_SUCESS);
+
+    /* Read data and verify */
+    ufs_send_scsi_command(ufs, 0, 1, read_cdb, NULL, 0, read_buf, block_size,
+                          &utrd, &rsp_upiu);
+    g_assert_cmpuint(le32_to_cpu(utrd.header.dword_2), ==, UFS_OCS_SUCCESS);
+    g_assert_cmpuint(rsp_upiu.header.scsi_status, ==,
+                     UFS_COMMAND_RESULT_SUCESS);
+    g_assert_cmpint(memcmp(read_buf, write_buf, block_size), ==, 0);
+
+    ufs_exit(ufs, alloc);
+}
+
+static void drive_destroy(void *path)
+{
+    unlink(path);
+    g_free(path);
+    qos_invalidate_command_line();
+}
+
+static char *drive_create(void)
+{
+    int fd, ret;
+    char *t_path;
+
+    /* Create a temporary raw image */
+    fd = g_file_open_tmp("qtest-ufs.XXXXXX", &t_path, NULL);
+    g_assert_cmpint(fd, >=, 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert_cmpint(ret, ==, 0);
+    close(fd);
+
+    g_test_queue_destroy(drive_destroy, t_path);
+    return t_path;
+}
+
+static void *ufs_blk_test_setup(GString *cmd_line, void *arg)
+{
+    char *tmp_path = drive_create();
+
+    g_string_append_printf(cmd_line,
+                           " -blockdev file,filename=%s,node-name=drv1 "
+                           "-device ufs-lu,bus=ufs0,drive=drv1,lun=1 ",
+                           tmp_path);
+
+    return arg;
+}
+
+static void ufs_register_nodes(void)
+{
+    const char *arch;
+    QOSGraphEdgeOptions edge_opts = {
+        .before_cmd_line = "-blockdev null-co,node-name=drv0,read-zeroes=on",
+        .after_cmd_line = "-device ufs-lu,bus=ufs0,drive=drv0,lun=0",
+        .extra_device_opts = "addr=04.0,id=ufs0,nutrs=32,nutmrs=8"
+    };
+
+    QOSGraphTestOptions io_test_opts = {
+        .before = ufs_blk_test_setup,
+    };
+
+    add_qpci_address(&edge_opts, &(QPCIAddress){ .devfn = QPCI_DEVFN(4, 0) });
+
+    qos_node_create_driver("ufs", ufs_create);
+    qos_node_consumes("ufs", "pci-bus", &edge_opts);
+    qos_node_produces("ufs", "pci-device");
+
+    qos_add_test("reg-read", "ufs", ufstest_reg_read, NULL);
+
+    /*
+     * Check architecture
+     * TODO: Enable ufs io tests for ppc64
+     */
+    arch = qtest_get_arch();
+    if (!strcmp(arch, "ppc64")) {
+        g_test_message("Skipping ufs io tests for ppc64");
+        return;
+    }
+    qos_add_test("init", "ufs", ufstest_init, NULL);
+    qos_add_test("read-write", "ufs", ufstest_read_write, &io_test_opts);
+}
+
+libqos_init(ufs_register_nodes);
diff --git a/tests/qtest/usb-hcd-uhci-test.c b/tests/qtest/usb-hcd-uhci-test.c
index 28751f53da..4446555f08 100644
--- a/tests/qtest/usb-hcd-uhci-test.c
+++ b/tests/qtest/usb-hcd-uhci-test.c
@@ -17,10 +17,6 @@
 
 static QOSState *qs;
 
-static void test_uhci_init(void)
-{
-}
-
 static void test_port(int port)
 {
     struct qhc uhci;
@@ -71,7 +67,6 @@ int main(int argc, char **argv)
         return 0;
     }
 
-    qtest_add_func("/uhci/pci/init", test_uhci_init);
     qtest_add_func("/uhci/pci/port1", test_port_1);
     qtest_add_func("/uhci/pci/hotplug", test_uhci_hotplug);
     if (qtest_has_device("usb-storage")) {
diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c
index 10ef9d2a91..0cccfd85a6 100644
--- a/tests/qtest/usb-hcd-xhci-test.c
+++ b/tests/qtest/usb-hcd-xhci-test.c
@@ -11,11 +11,6 @@
 #include "libqtest-single.h"
 #include "libqos/usb.h"
 
-
-static void test_xhci_init(void)
-{
-}
-
 static void test_xhci_hotplug(void)
 {
     usb_test_hotplug(global_qtest, "xhci", "1", NULL);
@@ -54,10 +49,13 @@ int main(int argc, char **argv)
 
     g_test_init(&argc, &argv, NULL);
 
-    qtest_add_func("/xhci/pci/init", test_xhci_init);
     qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug);
-    qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug);
-    qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug);
+    if (qtest_has_device("usb-uas")) {
+        qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug);
+    }
+    if (qtest_has_device("usb-ccid")) {
+        qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug);
+    }
 
     qtest_start("-device nec-usb-xhci,id=xhci"
                 " -drive id=drive0,if=none,file=null-co://,"
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
index dc37f5af4d..117b9acd10 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -961,7 +961,7 @@ static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
  * Setup for hotplug.
  *
  * Since vhost-user server only serves one vhost-user client one time,
- * another exprot
+ * another export
  *
  */
 static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
diff --git a/tests/qtest/virtio-net-test.c b/tests/qtest/virtio-net-test.c
index dff43f0f60..fab5dd8b05 100644
--- a/tests/qtest/virtio-net-test.c
+++ b/tests/qtest/virtio-net-test.c
@@ -212,7 +212,7 @@ static void announce_self(void *obj, void *data, QGuestAllocator *t_alloc)
     g_assert_cmpint(*proto, ==, htons(ETH_P_RARP));
 
     /*
-     * Stop the announcment by settings rounds to 0 on the
+     * Stop the announcement by settings rounds to 0 on the
      * existing timer.
      */
     rsp = qmp("{ 'execute' : 'announce-self', "
diff --git a/tests/qtest/vmgenid-test.c b/tests/qtest/vmgenid-test.c
index 324db08c7a..29fee9e7c0 100644
--- a/tests/qtest/vmgenid-test.c
+++ b/tests/qtest/vmgenid-test.c
@@ -19,7 +19,7 @@
 
 #define VGID_GUID "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"
 #define VMGENID_GUID_OFFSET 40   /* allow space for
-                                  * OVMF SDT Header Probe Supressor
+                                  * OVMF SDT Header Probe Suppressor
                                   */
 #define RSDP_ADDR_INVALID 0x100000 /* RSDP must be below this address */
 
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index b77bbd9b3c..2efacf9a5a 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -42,7 +42,11 @@ endif
 ifneq ($(CROSS_CC_HAS_ARMV8_3),)
 AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5
 pauth-%: CFLAGS += -march=armv8.3-a
-run-pauth-%: QEMU_OPTS += -cpu max
+run-pauth-1: QEMU_OPTS += -cpu max
+run-pauth-2: QEMU_OPTS += -cpu max
+# Choose a cpu with FEAT_Pauth but without FEAT_FPAC for pauth-[45].
+run-pauth-4: QEMU_OPTS += -cpu neoverse-v1
+run-pauth-5: QEMU_OPTS += -cpu neoverse-v1
 endif
 
 # BTI Tests
diff --git a/tests/tcg/aarch64/bti-1.c b/tests/tcg/aarch64/bti-1.c
index 61924f0d7a..99a879af23 100644
--- a/tests/tcg/aarch64/bti-1.c
+++ b/tests/tcg/aarch64/bti-1.c
@@ -2,7 +2,7 @@
  * Branch target identification, basic notskip cases.
  */
 
-#include "bti-crt.inc.c"
+#include "bti-crt.c.inc"
 
 static void skip2_sigill(int sig, siginfo_t *info, ucontext_t *uc)
 {
diff --git a/tests/tcg/aarch64/bti-3.c b/tests/tcg/aarch64/bti-3.c
index a852856d9a..8c534c09d7 100644
--- a/tests/tcg/aarch64/bti-3.c
+++ b/tests/tcg/aarch64/bti-3.c
@@ -2,7 +2,7 @@
  * BTI vs PACIASP
  */
 
-#include "bti-crt.inc.c"
+#include "bti-crt.c.inc"
 
 static void skip2_sigill(int sig, siginfo_t *info, ucontext_t *uc)
 {
diff --git a/tests/tcg/aarch64/bti-crt.inc.c b/tests/tcg/aarch64/bti-crt.c.inc
similarity index 100%
rename from tests/tcg/aarch64/bti-crt.inc.c
rename to tests/tcg/aarch64/bti-crt.c.inc
diff --git a/tests/tcg/aarch64/pauth-2.c b/tests/tcg/aarch64/pauth-2.c
index 978652ede3..89ffdbf1df 100644
--- a/tests/tcg/aarch64/pauth-2.c
+++ b/tests/tcg/aarch64/pauth-2.c
@@ -1,5 +1,22 @@
 #include <stdint.h>
+#include <signal.h>
+#include <stdlib.h>
 #include <assert.h>
+#include "pauth.h"
+
+
+static void sigill(int sig, siginfo_t *info, void *vuc)
+{
+    ucontext_t *uc = vuc;
+    uint64_t test;
+
+    /* There is only one insn below that is allowed to fault. */
+    asm volatile("adr %0, auth2_insn" : "=r"(test));
+    assert(test == uc->uc_mcontext.pc);
+    exit(0);
+}
+
+static int pac_feature;
 
 void do_test(uint64_t value)
 {
@@ -27,31 +44,52 @@ void do_test(uint64_t value)
      * An invalid salt usually fails authorization, but again there
      * is a chance of choosing another salt that works.
      * Iterate until we find another salt which does fail.
+     *
+     * With FEAT_FPAC, this will SIGILL instead of producing a result.
      */
     for (salt2 = salt1 + 1; ; salt2++) {
-        asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt2));
+        asm volatile("auth2_insn: autda %0, %2"
+                     : "=r"(decode) : "0"(encode), "r"(salt2));
         if (decode != value) {
             break;
         }
     }
 
+    assert(pac_feature < 4);  /* No FEAT_FPAC */
+
     /* The VA bits, bit 55, and the TBI bits, should be unchanged.  */
     assert(((decode ^ value) & 0xff80ffffffffffffull) == 0);
 
     /*
-     * Bits [54:53] are an error indicator based on the key used;
-     * the DA key above is keynumber 0, so error == 0b01.  Otherwise
-     * bit 55 of the original is sign-extended into the rest of the auth.
+     * Without FEAT_Pauth2, bits [54:53] are an error indicator based on
+     * the key used; the DA key above is keynumber 0, so error == 0b01.
+     * Otherwise, bit 55 of the original is sign-extended into the rest
+     * of the auth.
      */
-    if ((value >> 55) & 1) {
-        assert(((decode >> 48) & 0xff) == 0b10111111);
-    } else {
-        assert(((decode >> 48) & 0xff) == 0b00100000);
+    if (pac_feature < 3) {
+        if ((value >> 55) & 1) {
+            assert(((decode >> 48) & 0xff) == 0b10111111);
+        } else {
+            assert(((decode >> 48) & 0xff) == 0b00100000);
+        }
     }
 }
 
 int main()
 {
+    static const struct sigaction sa = {
+        .sa_sigaction = sigill,
+        .sa_flags = SA_SIGINFO
+    };
+
+    pac_feature = get_pac_feature();
+    assert(pac_feature != 0);
+
+    if (pac_feature >= 4) {
+        /* FEAT_FPAC */
+        sigaction(SIGILL, &sa, NULL);
+    }
+
     do_test(0);
     do_test(0xda004acedeadbeefull);
     return 0;
diff --git a/tests/tcg/aarch64/pauth-4.c b/tests/tcg/aarch64/pauth-4.c
index 24a639e36c..b254f413af 100644
--- a/tests/tcg/aarch64/pauth-4.c
+++ b/tests/tcg/aarch64/pauth-4.c
@@ -2,14 +2,24 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "pauth.h"
 
 #define TESTS 1000
 
 int main()
 {
+    char base[TESTS];
     int i, count = 0;
     float perc;
-    void *base = malloc(TESTS);
+    int pac_feature = get_pac_feature();
+
+    /*
+     * Exit if no PAuth or FEAT_FPAC, which will SIGILL on AUTIA failure
+     * rather than return an error for us to check below.
+     */
+    if (pac_feature == 0 || pac_feature >= 4) {
+        return 0;
+    }
 
     for (i = 0; i < TESTS; i++) {
         uintptr_t in, x, y;
@@ -17,7 +27,7 @@ int main()
         in = i + (uintptr_t) base;
 
         asm("mov %0, %[in]\n\t"
-            "pacia %0, sp\n\t"        /* sigill if pauth not supported */
+            "pacia %0, sp\n\t"
             "eor %0, %0, #4\n\t"      /* corrupt single bit */
             "mov %1, %0\n\t"
             "autia %1, sp\n\t"        /* validate corrupted pointer */
@@ -36,10 +46,10 @@ int main()
         if (x != y) {
             count++;
         }
-
     }
+
     perc = (float) count / (float) TESTS;
-    printf("Checks Passed: %0.2f%%", perc * 100.0);
+    printf("Checks Passed: %0.2f%%\n", perc * 100.0);
     assert(perc > 0.95);
     return 0;
 }
diff --git a/tests/tcg/aarch64/pauth-5.c b/tests/tcg/aarch64/pauth-5.c
index 67c257918b..ed8d5a926b 100644
--- a/tests/tcg/aarch64/pauth-5.c
+++ b/tests/tcg/aarch64/pauth-5.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include "pauth.h"
 
 static int x;
 
@@ -6,6 +7,15 @@ int main()
 {
     int *p0 = &x, *p1, *p2, *p3;
     unsigned long salt = 0;
+    int pac_feature = get_pac_feature();
+
+    /*
+     * Exit if no PAuth or FEAT_FPAC, which will SIGILL on AUTDA failure
+     * rather than return an error for us to check below.
+     */
+    if (pac_feature == 0 || pac_feature >= 4) {
+        return 0;
+    }
 
     /*
      * With TBI enabled and a 48-bit VA, there are 7 bits of auth, and so
diff --git a/tests/tcg/aarch64/pauth.h b/tests/tcg/aarch64/pauth.h
new file mode 100644
index 0000000000..543b234437
--- /dev/null
+++ b/tests/tcg/aarch64/pauth.h
@@ -0,0 +1,23 @@
+/*
+ * Helper for pauth test case
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <assert.h>
+#include <sys/auxv.h>
+
+static int get_pac_feature(void)
+{
+    unsigned long isar1, isar2;
+
+    assert(getauxval(AT_HWCAP) & HWCAP_CPUID);
+
+    asm("mrs %0, id_aa64isar1_el1" : "=r"(isar1));
+    asm("mrs %0, S3_0_C0_C6_2" : "=r"(isar2));     /* id_aa64isar2_el1 */
+
+    return ((isar1 >> 4) & 0xf)   /* APA */
+         | ((isar1 >> 8) & 0xf)   /* API */
+         | ((isar2 >> 12) & 0xf); /* APA3 */
+}
diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c
index 344b9f7772..6aadaccabd 100644
--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@@ -52,7 +52,7 @@ static void check_compare_exception(void)
     uint32_t cmp;
     uint32_t usr;
 
-    /* Check that FP compares are quiet (don't raise any execptions) */
+    /* Check that FP compares are quiet (don't raise any exceptions) */
     asm (CLEAR_FPSTATUS
          "p0 = sfcmp.eq(%2, %3)\n\t"
          "%0 = p0\n\t"
diff --git a/tests/tcg/hexagon/test_clobber.S b/tests/tcg/hexagon/test_clobber.S
index a7aeb2b60c..10046c30d2 100644
--- a/tests/tcg/hexagon/test_clobber.S
+++ b/tests/tcg/hexagon/test_clobber.S
@@ -1,5 +1,5 @@
 /*
- * Purpose: demonstrate the succesful operation of the register save mechanism,
+ * Purpose: demonstrate the successful operation of the register save mechanism,
  * in which the caller saves the registers that will be clobbered, and restores
  * them after the call.
  */
diff --git a/tests/tcg/i386/system/boot.S b/tests/tcg/i386/system/boot.S
index 794c2cb0ad..9e8920cbfe 100644
--- a/tests/tcg/i386/system/boot.S
+++ b/tests/tcg/i386/system/boot.S
@@ -71,7 +71,7 @@ _start:
         add $8,%esp
 
         /*
-         * Don't worry about stack frame, assume everthing
+         * Don't worry about stack frame, assume everything
          * is garbage when we return, we won't need it.
          */
         call main
diff --git a/tests/tcg/i386/x86.csv b/tests/tcg/i386/x86.csv
index c43bf42dd3..5c0f628e35 100644
--- a/tests/tcg/i386/x86.csv
+++ b/tests/tcg/i386/x86.csv
@@ -19,7 +19,7 @@
 #
 # 4. The instruction encoding. For example, "C1 /4 ib".
 #
-# 5. The validity of the instruction in 32-bit (aka compatiblity, legacy) mode.
+# 5. The validity of the instruction in 32-bit (aka compatibility, legacy) mode.
 #
 # 6. The validity of the instruction in 64-bit mode.
 #
diff --git a/tests/tcg/m68k/Makefile.target b/tests/tcg/m68k/Makefile.target
index 1163c7ef03..6ff214e60a 100644
--- a/tests/tcg/m68k/Makefile.target
+++ b/tests/tcg/m68k/Makefile.target
@@ -4,7 +4,7 @@
 #
 
 VPATH += $(SRC_PATH)/tests/tcg/m68k
-TESTS += trap
+TESTS += trap denormal
 
 # On m68k Linux supports 4k and 8k pages (but 8k is currently broken)
 EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192
diff --git a/tests/tcg/m68k/denormal.c b/tests/tcg/m68k/denormal.c
new file mode 100644
index 0000000000..20bd8c7332
--- /dev/null
+++ b/tests/tcg/m68k/denormal.c
@@ -0,0 +1,53 @@
+/*
+ * Test m68k extended double denormals.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#define TEST(X, Y)  { X, Y, X * Y }
+
+static volatile long double test[][3] = {
+    TEST(0x1p+16383l, 0x1p-16446l),
+    TEST(0x1.1p-8223l, 0x1.1p-8224l),
+    TEST(1.0l, 0x1p-16383l),
+};
+
+#undef TEST
+
+static void dump_ld(const char *label, long double ld)
+{
+    union {
+        long double  d;
+        struct {
+            uint32_t exp:16;
+            uint32_t space:16;
+            uint32_t h;
+            uint32_t l;
+        };
+    } u;
+
+    u.d = ld;
+    printf("%12s: % -27La 0x%04x 0x%08x 0x%08x\n", label, u.d, u.exp, u.h, u.l);
+}
+
+int main(void)
+{
+    int i, n = sizeof(test) / sizeof(test[0]), err = 0;
+
+    for (i = 0; i < n; ++i) {
+        long double x = test[i][0];
+        long double y = test[i][1];
+        long double build_mul = test[i][2];
+        long double runtime_mul = x * y;
+
+        if (runtime_mul != build_mul) {
+            dump_ld("x", x);
+            dump_ld("y", y);
+            dump_ld("build_mul", build_mul);
+            dump_ld("runtime_mul", runtime_mul);
+            err = 1;
+        }
+    }
+    return err;
+}
diff --git a/tests/tcg/s390x/Makefile.softmmu-target b/tests/tcg/s390x/Makefile.softmmu-target
index 76345b6e64..1a1f088b28 100644
--- a/tests/tcg/s390x/Makefile.softmmu-target
+++ b/tests/tcg/s390x/Makefile.softmmu-target
@@ -25,6 +25,7 @@ ASM_TESTS =                                                                    \
     lpswe-early                                                                \
     lra                                                                        \
     mc                                                                         \
+    precise-smc-softmmu                                                        \
     ssm-early                                                                  \
     stosm-early                                                                \
     stpq                                                                       \
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 9c0e70c6ca..c650aefe5c 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -61,6 +61,7 @@ Z13_TESTS+=locfhr
 Z13_TESTS+=vcksm
 Z13_TESTS+=vstl
 Z13_TESTS+=vrep
+Z13_TESTS+=precise-smc-user
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
 TESTS+=$(Z13_TESTS)
 
diff --git a/tests/tcg/s390x/precise-smc-softmmu.S b/tests/tcg/s390x/precise-smc-softmmu.S
new file mode 100644
index 0000000000..f7fa57d899
--- /dev/null
+++ b/tests/tcg/s390x/precise-smc-softmmu.S
@@ -0,0 +1,63 @@
+/*
+ * Test s390x-softmmu precise self-modifying code handling.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+    .org 0x8e
+program_interruption_code:
+    .org 0x150
+program_old_psw:
+    .org 0x1D0                         /* program new PSW */
+    .quad 0x180000000,pgm              /* 64-bit mode */
+    .org 0x200                         /* lowcore padding */
+    .globl _start
+_start:
+    lctlg %c0,%c0,c0
+    lghi %r0,15
+
+    /* Test 1: replace sgr with agr. */
+    lghi %r1,21
+    vl %v0,patch1
+    jg 1f                              /* start a new TB */
+0:
+    .org . + 6                         /* pad patched code to 16 bytes */
+1:
+    vstl %v0,%r0,0b                    /* start writing before TB */
+    sgr %r1,%r1                        /* this becomes `agr %r1,%r1` */
+    cgijne %r1,42,failure
+
+    /* Test 2: replace agr with division by zero. */
+    vl %v0,patch2
+    jg 1f                              /* start a new TB */
+0:
+    .org . + 6                         /* pad patched code to 16 bytes */
+1:
+    vstl %v0,%r0,0b                    /* start writing before TB */
+    sgr %r1,%r1                        /* this becomes `d %r0,zero` */
+failure:
+    lpswe failure_psw
+
+pgm:
+    chhsi program_interruption_code,0x9          /* divide exception? */
+    jne failure
+    clc program_old_psw(16),expected_old_psw2    /* correct old PSW? */
+    jne failure
+    lpswe success_psw
+
+patch1:
+    .fill 12                           /* replaces padding and stpq */
+    agr %r1,%r1                        /* replaces sgr */
+patch2:
+    .fill 12                           /* replaces padding and stpq */
+    d %r0,zero                         /* replaces sgr */
+zero:
+    .long 0
+expected_old_psw2:
+    .quad 0x200180000000,failure       /* cc is from addition */
+    .align 8
+c0:
+    .quad 0x60000                      /* AFP, VX */
+success_psw:
+    .quad 0x2000000000000,0xfff        /* see is_special_wait_psw() */
+failure_psw:
+    .quad 0x2000000000000,0            /* disabled wait */
diff --git a/tests/tcg/s390x/precise-smc-user.c b/tests/tcg/s390x/precise-smc-user.c
new file mode 100644
index 0000000000..33a5270865
--- /dev/null
+++ b/tests/tcg/s390x/precise-smc-user.c
@@ -0,0 +1,39 @@
+/*
+ * Test s390x-linux-user precise self-modifying code handling.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <assert.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+extern __uint128_t __attribute__((__aligned__(1))) smc;
+extern __uint128_t __attribute__((__aligned__(1))) patch;
+
+int main(void)
+{
+    char *aligned_smc = (char *)((uintptr_t)&smc & ~0xFFFULL);
+    char *smc_end = (char *)&smc + sizeof(smc);
+    uint64_t value = 21;
+    int err;
+
+    err = mprotect(aligned_smc, smc_end - aligned_smc,
+                   PROT_READ | PROT_WRITE | PROT_EXEC);
+    assert(err == 0);
+
+    asm("jg 0f\n"                           /* start a new TB */
+        "patch: .byte 0,0,0,0,0,0\n"        /* replaces padding */
+        ".byte 0,0,0,0,0,0\n"               /* replaces vstl */
+        "agr %[value],%[value]\n"           /* replaces sgr */
+        "smc: .org . + 6\n"                 /* pad patched code to 16 bytes */
+        "0: vstl %[patch],%[idx],%[smc]\n"  /* start writing before TB */
+        "sgr %[value],%[value]"             /* this becomes `agr %r0,%r0` */
+        : [smc] "=R" (smc)
+        , [value] "+r" (value)
+        : [patch] "v" (patch)
+        , [idx] "r" (sizeof(patch) - 1)
+        : "cc");
+
+    return value == 42 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tests/tcg/tricore/Makefile.softmmu-target b/tests/tcg/tricore/Makefile.softmmu-target
index aff7c1b580..2ec0bd3622 100644
--- a/tests/tcg/tricore/Makefile.softmmu-target
+++ b/tests/tcg/tricore/Makefile.softmmu-target
@@ -28,7 +28,7 @@ TESTS += test_context_save_areas.c.tst
 QEMU_OPTS += -M tricore_testboard -cpu tc27x -nographic -kernel
 
 %.pS: $(ASM_TESTS_PATH)/%.S
-	$(HOST_CC) -E -o $@ $<
+	$(CC) -E -o $@ $<
 
 %.o: %.pS
 	$(AS) $(ASFLAGS) -o $@ $<
diff --git a/tests/tsan/suppressions.tsan b/tests/tsan/suppressions.tsan
index 73414b9ebd..d9a002a2ef 100644
--- a/tests/tsan/suppressions.tsan
+++ b/tests/tsan/suppressions.tsan
@@ -7,7 +7,7 @@
 mutex:aio_context_acquire
 mutex:pthread_mutex_lock
 
-# TSan reports a race betwen pthread_mutex_init() and
+# TSan reports a race between pthread_mutex_init() and
 # pthread_mutex_lock().  Since this is outside of QEMU,
 # we choose to ignore it.
 race:pthread_mutex_init
diff --git a/tests/uefi-test-tools/Makefile b/tests/uefi-test-tools/Makefile
index 471f0de981..0c003f2877 100644
--- a/tests/uefi-test-tools/Makefile
+++ b/tests/uefi-test-tools/Makefile
@@ -87,7 +87,7 @@ Build/%.fat: Build/%.efi
 .NOTPARALLEL:
 
 # In turn, the "build" utility of edk2 BaseTools invokes another "make".
-# Although the outer "make" process advertizes its job server to all child
+# Although the outer "make" process advertises its job server to all child
 # processes via MAKEFLAGS in the environment, the outer "make" closes the job
 # server file descriptors (exposed in MAKEFLAGS) before executing a recipe --
 # unless the recipe is recognized as a recursive "make" recipe. Recipes that
diff --git a/tests/unit/check-qjson.c b/tests/unit/check-qjson.c
index c4e0f851bf..a89293ce51 100644
--- a/tests/unit/check-qjson.c
+++ b/tests/unit/check-qjson.c
@@ -1486,7 +1486,7 @@ int main(int argc, char **argv)
     g_test_add_func("/literals/keyword", keyword_literal);
 
     g_test_add_func("/literals/interpolation/valid", interpolation_valid);
-    g_test_add_func("/literals/interpolation/unkown", interpolation_unknown);
+    g_test_add_func("/literals/interpolation/unknown", interpolation_unknown);
     g_test_add_func("/literals/interpolation/string", interpolation_string);
 
     g_test_add_func("/dicts/simple_dict", simple_dict);
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 93977cc32d..0299ef6906 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -98,7 +98,7 @@ if have_block
   }
   if gnutls.found() and \
      tasn1.found() and \
-     'CONFIG_POSIX' in config_host
+     targetos != 'windows'
     tests += {
       'test-crypto-tlscredsx509': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
                                    tasn1, crypto, gnutls],
@@ -113,7 +113,7 @@ if have_block
   if xts == 'private'
     tests += {'test-crypto-xts': [crypto, io]}
   endif
-  if 'CONFIG_POSIX' in config_host
+  if targetos != 'windows'
     tests += {
       'test-image-locking': [testblock],
       'test-nested-aio-poll': [testblock],
@@ -148,7 +148,7 @@ if have_system
   # are not runnable under TSan due to a known issue.
   # https://github.com/google/sanitizers/issues/1116
   if not get_option('tsan')
-    if 'CONFIG_POSIX' in config_host
+    if targetos != 'windows'
         tests += {
           'test-char': ['socket-helpers.c', qom, io, chardev]
         }
diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c
index 519440eed3..71ed31a4db 100644
--- a/tests/unit/test-aio.c
+++ b/tests/unit/test-aio.c
@@ -454,7 +454,7 @@ static void test_timer_schedule(void)
 
     g_assert_cmpint(data.n, ==, 0);
 
-    /* timer_mod may well cause an event notifer to have gone off,
+    /* timer_mod may well cause an event notifier to have gone off,
      * so clear that
      */
     do {} while (aio_poll(ctx, false));
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index ccc453c29e..0b603e7c57 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -512,6 +512,7 @@ static void test_iothread_main_thread_bh(void *opaque)
      * executed during drain, otherwise this would deadlock. */
     aio_context_acquire(bdrv_get_aio_context(data->bs));
     bdrv_flush(data->bs);
+    bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() */
     aio_context_release(bdrv_get_aio_context(data->bs));
 }
 
@@ -583,6 +584,13 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
             aio_context_acquire(ctx_a);
         }
 
+        /*
+         * Increment in_flight so that do_drain_begin() waits for
+         * test_iothread_main_thread_bh(). This prevents the race between
+         * test_iothread_main_thread_bh() in IOThread a and do_drain_begin() in
+         * this thread. test_iothread_main_thread_bh() decrements in_flight.
+         */
+        bdrv_inc_in_flight(bs);
         aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
 
         /* The request is running on the IOThread a. Draining its block device
@@ -967,9 +975,12 @@ typedef struct BDRVTestTopState {
 static void bdrv_test_top_close(BlockDriverState *bs)
 {
     BdrvChild *c, *next_c;
+
+    bdrv_graph_wrlock(NULL);
     QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
         bdrv_unref_child(bs, c);
     }
+    bdrv_graph_wrunlock();
 }
 
 static int coroutine_fn GRAPH_RDLOCK
@@ -1024,7 +1035,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
     } else {
         BdrvChild *c, *next_c;
         QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
-            bdrv_unref_child(bs, c);
+            bdrv_co_unref_child(bs, c);
         }
     }
 
@@ -1055,8 +1066,10 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
 
     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
                         &error_abort);
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds,
                       BDRV_CHILD_DATA, &error_abort);
+    bdrv_graph_wrunlock();
 
     /* This child will be the one to pass to requests through to, and
      * it will stall until a drain occurs */
@@ -1064,17 +1077,21 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
                                     &error_abort);
     child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
     /* Takes our reference to child_bs */
+    bdrv_graph_wrlock(NULL);
     tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child",
                                         &child_of_bds,
                                         BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
                                         &error_abort);
+    bdrv_graph_wrunlock();
 
     /* This child is just there to be deleted
      * (for detach_instead_of_delete == true) */
     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
                         &error_abort);
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA,
                       &error_abort);
+    bdrv_graph_wrunlock();
 
     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk, bs, &error_abort);
@@ -1156,12 +1173,15 @@ static void detach_indirect_bh(void *opaque)
     struct detach_by_parent_data *data = opaque;
 
     bdrv_dec_in_flight(data->child_b->bs);
+
+    bdrv_graph_wrlock(NULL);
     bdrv_unref_child(data->parent_b, data->child_b);
 
     bdrv_ref(data->c);
     data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C",
                                       &child_of_bds, BDRV_CHILD_DATA,
                                       &error_abort);
+    bdrv_graph_wrunlock();
 }
 
 static void detach_by_parent_aio_cb(void *opaque, int ret)
@@ -1258,6 +1278,7 @@ static void test_detach_indirect(bool by_parent_cb)
     /* Set child relationships */
     bdrv_ref(b);
     bdrv_ref(a);
+    bdrv_graph_wrlock(NULL);
     child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds,
                                 BDRV_CHILD_DATA, &error_abort);
     child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds,
@@ -1267,6 +1288,7 @@ static void test_detach_indirect(bool by_parent_cb)
     bdrv_attach_child(parent_a, a, "PA-A",
                       by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class,
                       BDRV_CHILD_DATA, &error_abort);
+    bdrv_graph_wrunlock();
 
     g_assert_cmpint(parent_a->refcnt, ==, 1);
     g_assert_cmpint(parent_b->refcnt, ==, 1);
@@ -1359,7 +1381,10 @@ static void test_append_to_drained(void)
     g_assert_cmpint(base_s->drain_count, ==, 1);
     g_assert_cmpint(base->in_flight, ==, 0);
 
+    aio_context_acquire(qemu_get_aio_context());
     bdrv_append(overlay, base, &error_abort);
+    aio_context_release(qemu_get_aio_context());
+
     g_assert_cmpint(base->in_flight, ==, 0);
     g_assert_cmpint(overlay->in_flight, ==, 0);
 
@@ -1682,6 +1707,7 @@ static void test_drop_intermediate_poll(void)
      * Establish the chain last, so the chain links are the first
      * elements in the BDS.parents lists
      */
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < 3; i++) {
         if (i) {
             /* Takes the reference to chain[i - 1] */
@@ -1689,6 +1715,7 @@ static void test_drop_intermediate_poll(void)
                               &chain_child_class, BDRV_CHILD_COW, &error_abort);
         }
     }
+    bdrv_graph_wrunlock();
 
     job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
                            0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
@@ -1933,8 +1960,10 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
     new_child_bs->total_sectors = 1;
 
     bdrv_ref(old_child_bs);
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
                       BDRV_CHILD_COW, &error_abort);
+    bdrv_graph_wrunlock();
     parent_s->setup_completed = true;
 
     for (i = 0; i < old_drain_count; i++) {
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index c522591531..8609f7f42b 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -98,9 +98,9 @@ static BlockDriverState *exclusive_writer_node(const char *name)
  *                                    | perm: write, read
  *                                    | shared: except write
  *                                    v
- *  +-------------------+           +----------------+
- *  | passtrough filter |---------->|  null-co node  |
- *  +-------------------+           +----------------+
+ *  +--------------------+          +----------------+
+ *  | passthrough filter |--------->|  null-co node  |
+ *  +--------------------+          +----------------+
  *
  *
  * and then, tries to append filter under node. Expected behavior: fail.
@@ -114,9 +114,9 @@ static BlockDriverState *exclusive_writer_node(const char *name)
  *                         | perm: write, read
  *                         | shared: except write
  *                         v
- *                +-------------------+
- *                | passtrough filter |
- *                +-------------------+
+ *                +--------------------+
+ *                | passthrough filter |
+ *                +--------------------+
  *                       |   |
  *     perm: write, read |   | perm: write, read
  *  shared: except write |   | shared: except write
@@ -137,11 +137,15 @@ static void test_update_perm_tree(void)
 
     blk_insert_bs(root, bs, &error_abort);
 
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(filter, bs, "child", &child_of_bds,
                       BDRV_CHILD_DATA, &error_abort);
+    bdrv_graph_wrunlock();
 
+    aio_context_acquire(qemu_get_aio_context());
     ret = bdrv_append(filter, bs, NULL);
     g_assert_cmpint(ret, <, 0);
+    aio_context_release(qemu_get_aio_context());
 
     bdrv_unref(filter);
     blk_unref(root);
@@ -203,9 +207,13 @@ static void test_should_update_child(void)
     bdrv_set_backing_hd(target, bs, &error_abort);
 
     g_assert(target->backing->bs == bs);
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(filter, target, "target", &child_of_bds,
                       BDRV_CHILD_DATA, &error_abort);
+    bdrv_graph_wrunlock();
+    aio_context_acquire(qemu_get_aio_context());
     bdrv_append(filter, bs, &error_abort);
+    aio_context_release(qemu_get_aio_context());
     g_assert(target->backing->bs == bs);
 
     bdrv_unref(filter);
@@ -232,6 +240,7 @@ static void test_parallel_exclusive_write(void)
      */
     bdrv_ref(base);
 
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(top, fl1, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
@@ -241,6 +250,7 @@ static void test_parallel_exclusive_write(void)
     bdrv_attach_child(fl2, base, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
+    bdrv_graph_wrunlock();
 
     bdrv_replace_node(fl1, fl2, &error_abort);
 
@@ -345,6 +355,7 @@ static void test_parallel_perm_update(void)
      */
     bdrv_ref(base);
 
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA,
                       &error_abort);
     c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds,
@@ -357,9 +368,13 @@ static void test_parallel_perm_update(void)
     bdrv_attach_child(fl2, base, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
+    bdrv_graph_wrunlock();
 
     /* Select fl1 as first child to be active */
     s->selected = c_fl1;
+
+    bdrv_graph_rdlock_main_loop();
+
     bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
 
     assert(c_fl1->perm & BLK_PERM_WRITE);
@@ -379,6 +394,7 @@ static void test_parallel_perm_update(void)
     assert(c_fl1->perm & BLK_PERM_WRITE);
     assert(!(c_fl2->perm & BLK_PERM_WRITE));
 
+    bdrv_graph_rdunlock_main_loop();
     bdrv_unref(top);
 }
 
@@ -406,11 +422,15 @@ static void test_append_greedy_filter(void)
     BlockDriverState *base = no_perm_node("base");
     BlockDriverState *fl = exclusive_writer_node("fl1");
 
+    bdrv_graph_wrlock(NULL);
     bdrv_attach_child(top, base, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
+    bdrv_graph_wrunlock();
 
+    aio_context_acquire(qemu_get_aio_context());
     bdrv_append(fl, base, &error_abort);
+    aio_context_release(qemu_get_aio_context());
     bdrv_unref(fl);
     bdrv_unref(top);
 }
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index d727a5fee8..9155547313 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -756,11 +756,14 @@ static void test_propagate_mirror(void)
                                   &error_abort);
 
     /* Start a mirror job */
+    aio_context_acquire(main_ctx);
     mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0,
                  MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false,
                  BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
                  false, "filter_node", MIRROR_COPY_MODE_BACKGROUND,
                  &error_abort);
+    aio_context_release(main_ctx);
+
     WITH_JOB_LOCK_GUARD() {
         job = job_get_locked("job0");
     }
diff --git a/tests/unit/test-crypto-secret.c b/tests/unit/test-crypto-secret.c
index 147b4af828..ffd13ff70e 100644
--- a/tests/unit/test-crypto-secret.c
+++ b/tests/unit/test-crypto-secret.c
@@ -244,7 +244,7 @@ static void test_secret_keyring_bad_key_access_right(void)
     char key_str[16];
     Object *sec;
 
-    g_test_skip("TODO: Need responce from Linux kernel maintainers");
+    g_test_skip("TODO: Need response from Linux kernel maintainers");
     return;
 
     int32_t key = add_key("user", DESCRIPTION, PAYLOAD,
diff --git a/tests/unit/test-qobject-input-visitor.c b/tests/unit/test-qobject-input-visitor.c
index 9b3e2dbe14..024e26c49e 100644
--- a/tests/unit/test-qobject-input-visitor.c
+++ b/tests/unit/test-qobject-input-visitor.c
@@ -94,7 +94,7 @@ Visitor *visitor_input_test_init(TestInputVisitorData *data,
 
 /* similar to visitor_input_test_init(), but does not expect a string
  * literal/format json_string argument and so can be used for
- * programatically generated strings (and we can't pass in programatically
+ * programmatically generated strings (and we can't pass in programmatically
  * generated strings via %s format parameters since qobject_from_jsonv()
  * will wrap those in double-quotes and treat the entire object as a
  * string)
diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
index 7adb5e6652..cb587e33e7 100644
--- a/tests/unit/test-throttle.c
+++ b/tests/unit/test-throttle.c
@@ -135,7 +135,7 @@ static void test_compute_wait(void)
         g_assert(double_cmp(bkt.burst_level, 0));
         g_assert(double_cmp(bkt.level, (i + 1) * (bkt.max - bkt.avg) / 10));
         /* We can do bursts for the 2 seconds we have configured in
-         * burst_length. We have 100 extra miliseconds of burst
+         * burst_length. We have 100 extra milliseconds of burst
          * because bkt.level has been leaking during this time.
          * After that, we have to wait. */
         result = i < 21 ? 0 : 1.8 * NANOSECONDS_PER_SECOND;
@@ -169,8 +169,72 @@ static void test_init(void)
 
     /* check initialized fields */
     g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
-    g_assert(tt->timers[0]);
-    g_assert(tt->timers[1]);
+    g_assert(tt->timers[THROTTLE_READ]);
+    g_assert(tt->timers[THROTTLE_WRITE]);
+
+    /* check other fields where cleared */
+    g_assert(!ts.previous_leak);
+    g_assert(!ts.cfg.op_size);
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        g_assert(!ts.cfg.buckets[i].avg);
+        g_assert(!ts.cfg.buckets[i].max);
+        g_assert(!ts.cfg.buckets[i].level);
+    }
+
+    throttle_timers_destroy(tt);
+}
+
+static void test_init_readonly(void)
+{
+    int i;
+
+    tt = &tgm.throttle_timers;
+
+    /* fill the structures with crap */
+    memset(&ts, 1, sizeof(ts));
+    memset(tt, 1, sizeof(*tt));
+
+    /* init structures */
+    throttle_init(&ts);
+    throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+                         read_timer_cb, NULL, &ts);
+
+    /* check initialized fields */
+    g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+    g_assert(tt->timers[THROTTLE_READ]);
+    g_assert(!tt->timers[THROTTLE_WRITE]);
+
+    /* check other fields where cleared */
+    g_assert(!ts.previous_leak);
+    g_assert(!ts.cfg.op_size);
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        g_assert(!ts.cfg.buckets[i].avg);
+        g_assert(!ts.cfg.buckets[i].max);
+        g_assert(!ts.cfg.buckets[i].level);
+    }
+
+    throttle_timers_destroy(tt);
+}
+
+static void test_init_writeonly(void)
+{
+    int i;
+
+    tt = &tgm.throttle_timers;
+
+    /* fill the structures with crap */
+    memset(&ts, 1, sizeof(ts));
+    memset(tt, 1, sizeof(*tt));
+
+    /* init structures */
+    throttle_init(&ts);
+    throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+                         NULL, write_timer_cb, &ts);
+
+    /* check initialized fields */
+    g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+    g_assert(!tt->timers[THROTTLE_READ]);
+    g_assert(tt->timers[THROTTLE_WRITE]);
 
     /* check other fields where cleared */
     g_assert(!ts.previous_leak);
@@ -191,7 +255,7 @@ static void test_destroy(void)
     throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
                          read_timer_cb, write_timer_cb, &ts);
     throttle_timers_destroy(tt);
-    for (i = 0; i < 2; i++) {
+    for (i = 0; i < THROTTLE_MAX; i++) {
         g_assert(!tt->timers[i]);
     }
 }
@@ -375,11 +439,11 @@ static void test_is_valid_for_value(int value, bool should_be_valid)
 
 static void test_is_valid(void)
 {
-    /* negative number are invalid */
+    /* negative numbesr are invalid */
     test_is_valid_for_value(-1, false);
-    /* zero are valids */
+    /* zero is valid */
     test_is_valid_for_value(0, true);
-    /* positives numers are valids */
+    /* positives numbers are valid */
     test_is_valid_for_value(1, true);
 }
 
@@ -573,9 +637,9 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
     throttle_config(&ts, QEMU_CLOCK_VIRTUAL, &cfg);
 
     /* account a read */
-    throttle_account(&ts, false, size);
+    throttle_account(&ts, THROTTLE_READ, size);
     /* account a write */
-    throttle_account(&ts, true, size);
+    throttle_account(&ts, THROTTLE_WRITE, size);
 
     /* check total result */
     index = to_test[is_ops][0];
@@ -752,6 +816,8 @@ int main(int argc, char **argv)
     g_test_add_func("/throttle/leak_bucket",        test_leak_bucket);
     g_test_add_func("/throttle/compute_wait",       test_compute_wait);
     g_test_add_func("/throttle/init",               test_init);
+    g_test_add_func("/throttle/init_readonly",      test_init_readonly);
+    g_test_add_func("/throttle/init_writeonly",     test_init_writeonly);
     g_test_add_func("/throttle/destroy",            test_destroy);
     g_test_add_func("/throttle/have_timer",         test_have_timer);
     g_test_add_func("/throttle/detach_attach",      test_detach_attach);
diff --git a/tests/unit/test-util-filemonitor.c b/tests/unit/test-util-filemonitor.c
index b629e10857..a22de27595 100644
--- a/tests/unit/test-util-filemonitor.c
+++ b/tests/unit/test-util-filemonitor.c
@@ -132,7 +132,7 @@ qemu_file_monitor_test_record_free(QFileMonitorTestRecord *rec)
  * the file monitor event handler. Since events are
  * emitted in the background thread running the event
  * loop, we can't assume there is a record available
- * immediately. Thus we will sleep for upto 5 seconds
+ * immediately. Thus we will sleep for up to 5 seconds
  * to wait for the event to be queued for us.
  */
 static QFileMonitorTestRecord *
diff --git a/tests/unit/test-xs-node.c b/tests/unit/test-xs-node.c
index b80d10ff98..ac94e7ed6c 100644
--- a/tests/unit/test-xs-node.c
+++ b/tests/unit/test-xs-node.c
@@ -362,7 +362,7 @@ static void test_xs_node_simple(void)
     g_assert(data->len == strlen("something"));
     g_assert(!memcmp(data->data, "something", data->len));
 
-    /* Even if we use an abolute path */
+    /* Even if we use an absolute path */
     g_byte_array_set_size(data, 0);
     err = xs_impl_read(s, DOMID_GUEST, XBT_NULL,
                        "/local/domain/1/some/relative/path", data);
diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index f0f5d32fb0..bf12e0fa3c 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -81,7 +81,7 @@ endif
 	@echo "    QEMU_IMG=/path/to/qemu-img	 - Change path to qemu-img tool"
 	@echo "    QEMU_LOCAL=1                 - Use QEMU binary local to this build."
 	@echo "    TARGET_LIST=a,b,c    	 - Override target list in builds"
-	@echo "    V=1				 - Enable verbose ouput on host and guest commands"
+	@echo "    V=1				 - Enable verbose output on host and guest commands"
 
 vm-build-all: $(addprefix vm-build-, $(IMAGES))
 
diff --git a/tests/vm/ubuntuvm.py b/tests/vm/ubuntuvm.py
index 6689ad87aa..15c530c571 100644
--- a/tests/vm/ubuntuvm.py
+++ b/tests/vm/ubuntuvm.py
@@ -51,7 +51,7 @@ class UbuntuVM(basevm.BaseVM):
         # then we will jump right to the graceful shutdown
         if self._config['install_cmds'] != "":
             # Issue the install commands.
-            # This can be overriden by the user in the config .yml.
+            # This can be overridden by the user in the config .yml.
             install_cmds = self._config['install_cmds'].split(',')
             for cmd in install_cmds:
                 self.ssh_root(cmd)
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 0c2153d17c..df6d13be38 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -784,7 +784,7 @@ static CGEventRef handleTapEvent(CGEventTapProxy proxy, CGEventType type, CGEven
     }
 
     if (keysym) {
-        kbd_put_keysym(keysym);
+        qemu_text_console_put_keysym(NULL, keysym);
     }
 }
 
@@ -2045,7 +2045,7 @@ static void cocoa_display_init(DisplayState *ds, DisplayOptions *opts)
 
     /*
      * Create the menu entries which depend on QEMU state (for consoles
-     * and removeable devices). These make calls back into QEMU functions,
+     * and removable devices). These make calls back into QEMU functions,
      * which is OK because at this point we know that the second thread
      * holds the iothread lock and is synchronously waiting for us to
      * finish.
diff --git a/ui/console-gl.c b/ui/console-gl.c
index 8e3c9a3c8c..103b954017 100644
--- a/ui/console-gl.c
+++ b/ui/console-gl.c
@@ -53,7 +53,7 @@ void surface_gl_create_texture(QemuGLShader *gls,
         return;
     }
 
-    switch (surface->format) {
+    switch (surface_format(surface)) {
     case PIXMAN_BE_b8g8r8x8:
     case PIXMAN_BE_b8g8r8a8:
         surface->glformat = GL_BGRA_EXT;
diff --git a/ui/console-priv.h b/ui/console-priv.h
new file mode 100644
index 0000000000..88569ed2cc
--- /dev/null
+++ b/ui/console-priv.h
@@ -0,0 +1,43 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * QEMU UI Console
+ */
+#ifndef CONSOLE_PRIV_H
+#define CONSOLE_PRIV_H
+
+#include "ui/console.h"
+#include "qemu/coroutine.h"
+#include "qemu/timer.h"
+
+#include "vgafont.h"
+
+#define FONT_HEIGHT 16
+#define FONT_WIDTH 8
+
+struct QemuConsole {
+    Object parent;
+
+    int index;
+    DisplayState *ds;
+    DisplaySurface *surface;
+    DisplayScanout scanout;
+    int dcls;
+    DisplayGLCtx *gl;
+    int gl_block;
+    QEMUTimer *gl_unblock_timer;
+    int window_id;
+    QemuUIInfo ui_info;
+    QEMUTimer *ui_timer;
+    const GraphicHwOps *hw_ops;
+    void *hw;
+    CoQueue dump_queue;
+
+    QTAILQ_ENTRY(QemuConsole) next;
+};
+
+void qemu_text_console_select(QemuTextConsole *c);
+const char * qemu_text_console_get_label(QemuTextConsole *c);
+void qemu_text_console_update_cursor(void);
+void qemu_text_console_handle_keysym(QemuTextConsole *s, int keysym);
+
+#endif
diff --git a/ui/console-vc.c b/ui/console-vc.c
new file mode 100644
index 0000000000..9c13cc2981
--- /dev/null
+++ b/ui/console-vc.c
@@ -0,0 +1,1079 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * QEMU VC
+ */
+#include "qemu/osdep.h"
+
+#include "chardev/char.h"
+#include "qapi/error.h"
+#include "qemu/fifo8.h"
+#include "qemu/option.h"
+#include "ui/console.h"
+
+#include "trace.h"
+#include "console-priv.h"
+
+#define DEFAULT_BACKSCROLL 512
+#define CONSOLE_CURSOR_PERIOD 500
+
+typedef struct TextAttributes {
+    uint8_t fgcol:4;
+    uint8_t bgcol:4;
+    uint8_t bold:1;
+    uint8_t uline:1;
+    uint8_t blink:1;
+    uint8_t invers:1;
+    uint8_t unvisible:1;
+} TextAttributes;
+
+#define TEXT_ATTRIBUTES_DEFAULT ((TextAttributes) { \
+    .fgcol = QEMU_COLOR_WHITE,                      \
+    .bgcol = QEMU_COLOR_BLACK                       \
+})
+
+typedef struct TextCell {
+    uint8_t ch;
+    TextAttributes t_attrib;
+} TextCell;
+
+#define MAX_ESC_PARAMS 3
+
+enum TTYState {
+    TTY_STATE_NORM,
+    TTY_STATE_ESC,
+    TTY_STATE_CSI,
+};
+
+typedef struct QemuTextConsole {
+    QemuConsole parent;
+
+    int width;
+    int height;
+    int total_height;
+    int backscroll_height;
+    int x, y;
+    int y_displayed;
+    int y_base;
+    TextCell *cells;
+    int text_x[2], text_y[2], cursor_invalidate;
+    int echo;
+
+    int update_x0;
+    int update_y0;
+    int update_x1;
+    int update_y1;
+
+    Chardev *chr;
+    /* fifo for key pressed */
+    Fifo8 out_fifo;
+} QemuTextConsole;
+
+typedef QemuConsoleClass QemuTextConsoleClass;
+
+OBJECT_DEFINE_TYPE(QemuTextConsole, qemu_text_console, QEMU_TEXT_CONSOLE, QEMU_CONSOLE)
+
+typedef struct QemuFixedTextConsole {
+    QemuTextConsole parent;
+} QemuFixedTextConsole;
+
+typedef QemuTextConsoleClass QemuFixedTextConsoleClass;
+
+OBJECT_DEFINE_TYPE(QemuFixedTextConsole, qemu_fixed_text_console, QEMU_FIXED_TEXT_CONSOLE, QEMU_TEXT_CONSOLE)
+
+struct VCChardev {
+    Chardev parent;
+    QemuTextConsole *console;
+
+    enum TTYState state;
+    int esc_params[MAX_ESC_PARAMS];
+    int nb_esc_params;
+    TextAttributes t_attrib; /* currently active text attributes */
+    int x_saved, y_saved;
+};
+typedef struct VCChardev VCChardev;
+
+static const pixman_color_t color_table_rgb[2][8] = {
+    {   /* dark */
+        [QEMU_COLOR_BLACK]   = QEMU_PIXMAN_COLOR_BLACK,
+        [QEMU_COLOR_BLUE]    = QEMU_PIXMAN_COLOR(0x00, 0x00, 0xaa),  /* blue */
+        [QEMU_COLOR_GREEN]   = QEMU_PIXMAN_COLOR(0x00, 0xaa, 0x00),  /* green */
+        [QEMU_COLOR_CYAN]    = QEMU_PIXMAN_COLOR(0x00, 0xaa, 0xaa),  /* cyan */
+        [QEMU_COLOR_RED]     = QEMU_PIXMAN_COLOR(0xaa, 0x00, 0x00),  /* red */
+        [QEMU_COLOR_MAGENTA] = QEMU_PIXMAN_COLOR(0xaa, 0x00, 0xaa),  /* magenta */
+        [QEMU_COLOR_YELLOW]  = QEMU_PIXMAN_COLOR(0xaa, 0xaa, 0x00),  /* yellow */
+        [QEMU_COLOR_WHITE]   = QEMU_PIXMAN_COLOR_GRAY,
+    },
+    {   /* bright */
+        [QEMU_COLOR_BLACK]   = QEMU_PIXMAN_COLOR_BLACK,
+        [QEMU_COLOR_BLUE]    = QEMU_PIXMAN_COLOR(0x00, 0x00, 0xff),  /* blue */
+        [QEMU_COLOR_GREEN]   = QEMU_PIXMAN_COLOR(0x00, 0xff, 0x00),  /* green */
+        [QEMU_COLOR_CYAN]    = QEMU_PIXMAN_COLOR(0x00, 0xff, 0xff),  /* cyan */
+        [QEMU_COLOR_RED]     = QEMU_PIXMAN_COLOR(0xff, 0x00, 0x00),  /* red */
+        [QEMU_COLOR_MAGENTA] = QEMU_PIXMAN_COLOR(0xff, 0x00, 0xff),  /* magenta */
+        [QEMU_COLOR_YELLOW]  = QEMU_PIXMAN_COLOR(0xff, 0xff, 0x00),  /* yellow */
+        [QEMU_COLOR_WHITE]   = QEMU_PIXMAN_COLOR(0xff, 0xff, 0xff),  /* white */
+    }
+};
+
+static bool cursor_visible_phase;
+static QEMUTimer *cursor_timer;
+
+const char *
+qemu_text_console_get_label(QemuTextConsole *c)
+{
+    return c->chr ? c->chr->label : NULL;
+}
+
+static void qemu_console_fill_rect(QemuConsole *con, int posx, int posy,
+                                   int width, int height, pixman_color_t color)
+{
+    DisplaySurface *surface = qemu_console_surface(con);
+    pixman_rectangle16_t rect = {
+        .x = posx, .y = posy, .width = width, .height = height
+    };
+
+    assert(surface);
+    pixman_image_fill_rectangles(PIXMAN_OP_SRC, surface->image,
+                                 &color, 1, &rect);
+}
+
+/* copy from (xs, ys) to (xd, yd) a rectangle of size (w, h) */
+static void qemu_console_bitblt(QemuConsole *con,
+                                int xs, int ys, int xd, int yd, int w, int h)
+{
+    DisplaySurface *surface = qemu_console_surface(con);
+
+    assert(surface);
+    pixman_image_composite(PIXMAN_OP_SRC,
+                           surface->image, NULL, surface->image,
+                           xs, ys, 0, 0, xd, yd, w, h);
+}
+
+static void vga_putcharxy(QemuConsole *s, int x, int y, int ch,
+                          TextAttributes *t_attrib)
+{
+    static pixman_image_t *glyphs[256];
+    DisplaySurface *surface = qemu_console_surface(s);
+    pixman_color_t fgcol, bgcol;
+
+    assert(surface);
+    if (t_attrib->invers) {
+        bgcol = color_table_rgb[t_attrib->bold][t_attrib->fgcol];
+        fgcol = color_table_rgb[t_attrib->bold][t_attrib->bgcol];
+    } else {
+        fgcol = color_table_rgb[t_attrib->bold][t_attrib->fgcol];
+        bgcol = color_table_rgb[t_attrib->bold][t_attrib->bgcol];
+    }
+
+    if (!glyphs[ch]) {
+        glyphs[ch] = qemu_pixman_glyph_from_vgafont(FONT_HEIGHT, vgafont16, ch);
+    }
+    qemu_pixman_glyph_render(glyphs[ch], surface->image,
+                             &fgcol, &bgcol, x, y, FONT_WIDTH, FONT_HEIGHT);
+}
+
+static void invalidate_xy(QemuTextConsole *s, int x, int y)
+{
+    if (!qemu_console_is_visible(QEMU_CONSOLE(s))) {
+        return;
+    }
+    if (s->update_x0 > x * FONT_WIDTH)
+        s->update_x0 = x * FONT_WIDTH;
+    if (s->update_y0 > y * FONT_HEIGHT)
+        s->update_y0 = y * FONT_HEIGHT;
+    if (s->update_x1 < (x + 1) * FONT_WIDTH)
+        s->update_x1 = (x + 1) * FONT_WIDTH;
+    if (s->update_y1 < (y + 1) * FONT_HEIGHT)
+        s->update_y1 = (y + 1) * FONT_HEIGHT;
+}
+
+static void console_show_cursor(QemuTextConsole *s, int show)
+{
+    TextCell *c;
+    int y, y1;
+    int x = s->x;
+
+    s->cursor_invalidate = 1;
+
+    if (x >= s->width) {
+        x = s->width - 1;
+    }
+    y1 = (s->y_base + s->y) % s->total_height;
+    y = y1 - s->y_displayed;
+    if (y < 0) {
+        y += s->total_height;
+    }
+    if (y < s->height) {
+        c = &s->cells[y1 * s->width + x];
+        if (show && cursor_visible_phase) {
+            TextAttributes t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+            t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
+            vga_putcharxy(QEMU_CONSOLE(s), x, y, c->ch, &t_attrib);
+        } else {
+            vga_putcharxy(QEMU_CONSOLE(s), x, y, c->ch, &(c->t_attrib));
+        }
+        invalidate_xy(s, x, y);
+    }
+}
+
+static void console_refresh(QemuTextConsole *s)
+{
+    DisplaySurface *surface = qemu_console_surface(QEMU_CONSOLE(s));
+    TextCell *c;
+    int x, y, y1;
+
+    assert(surface);
+    s->text_x[0] = 0;
+    s->text_y[0] = 0;
+    s->text_x[1] = s->width - 1;
+    s->text_y[1] = s->height - 1;
+    s->cursor_invalidate = 1;
+
+    qemu_console_fill_rect(QEMU_CONSOLE(s), 0, 0, surface_width(surface), surface_height(surface),
+                           color_table_rgb[0][QEMU_COLOR_BLACK]);
+    y1 = s->y_displayed;
+    for (y = 0; y < s->height; y++) {
+        c = s->cells + y1 * s->width;
+        for (x = 0; x < s->width; x++) {
+            vga_putcharxy(QEMU_CONSOLE(s), x, y, c->ch,
+                          &(c->t_attrib));
+            c++;
+        }
+        if (++y1 == s->total_height) {
+            y1 = 0;
+        }
+    }
+    console_show_cursor(s, 1);
+    dpy_gfx_update(QEMU_CONSOLE(s), 0, 0,
+                   surface_width(surface), surface_height(surface));
+}
+
+static void console_scroll(QemuTextConsole *s, int ydelta)
+{
+    int i, y1;
+
+    if (ydelta > 0) {
+        for(i = 0; i < ydelta; i++) {
+            if (s->y_displayed == s->y_base)
+                break;
+            if (++s->y_displayed == s->total_height)
+                s->y_displayed = 0;
+        }
+    } else {
+        ydelta = -ydelta;
+        i = s->backscroll_height;
+        if (i > s->total_height - s->height)
+            i = s->total_height - s->height;
+        y1 = s->y_base - i;
+        if (y1 < 0)
+            y1 += s->total_height;
+        for(i = 0; i < ydelta; i++) {
+            if (s->y_displayed == y1)
+                break;
+            if (--s->y_displayed < 0)
+                s->y_displayed = s->total_height - 1;
+        }
+    }
+    console_refresh(s);
+}
+
+static void kbd_send_chars(QemuTextConsole *s)
+{
+    uint32_t len, avail;
+
+    len = qemu_chr_be_can_write(s->chr);
+    avail = fifo8_num_used(&s->out_fifo);
+    while (len > 0 && avail > 0) {
+        const uint8_t *buf;
+        uint32_t size;
+
+        buf = fifo8_pop_buf(&s->out_fifo, MIN(len, avail), &size);
+        qemu_chr_be_write(s->chr, buf, size);
+        len = qemu_chr_be_can_write(s->chr);
+        avail -= size;
+    }
+}
+
+/* called when an ascii key is pressed */
+void qemu_text_console_handle_keysym(QemuTextConsole *s, int keysym)
+{
+    uint8_t buf[16], *q;
+    int c;
+    uint32_t num_free;
+
+    switch(keysym) {
+    case QEMU_KEY_CTRL_UP:
+        console_scroll(s, -1);
+        break;
+    case QEMU_KEY_CTRL_DOWN:
+        console_scroll(s, 1);
+        break;
+    case QEMU_KEY_CTRL_PAGEUP:
+        console_scroll(s, -10);
+        break;
+    case QEMU_KEY_CTRL_PAGEDOWN:
+        console_scroll(s, 10);
+        break;
+    default:
+        /* convert the QEMU keysym to VT100 key string */
+        q = buf;
+        if (keysym >= 0xe100 && keysym <= 0xe11f) {
+            *q++ = '\033';
+            *q++ = '[';
+            c = keysym - 0xe100;
+            if (c >= 10)
+                *q++ = '0' + (c / 10);
+            *q++ = '0' + (c % 10);
+            *q++ = '~';
+        } else if (keysym >= 0xe120 && keysym <= 0xe17f) {
+            *q++ = '\033';
+            *q++ = '[';
+            *q++ = keysym & 0xff;
+        } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
+            qemu_chr_write(s->chr, (uint8_t *)"\r", 1, true);
+            *q++ = '\n';
+        } else {
+            *q++ = keysym;
+        }
+        if (s->echo) {
+            qemu_chr_write(s->chr, buf, q - buf, true);
+        }
+        num_free = fifo8_num_free(&s->out_fifo);
+        fifo8_push_all(&s->out_fifo, buf, MIN(num_free, q - buf));
+        kbd_send_chars(s);
+        break;
+    }
+}
+
+static void text_console_update(void *opaque, console_ch_t *chardata)
+{
+    QemuTextConsole *s = QEMU_TEXT_CONSOLE(opaque);
+    int i, j, src;
+
+    if (s->text_x[0] <= s->text_x[1]) {
+        src = (s->y_base + s->text_y[0]) * s->width;
+        chardata += s->text_y[0] * s->width;
+        for (i = s->text_y[0]; i <= s->text_y[1]; i ++)
+            for (j = 0; j < s->width; j++, src++) {
+                console_write_ch(chardata ++,
+                                 ATTR2CHTYPE(s->cells[src].ch,
+                                             s->cells[src].t_attrib.fgcol,
+                                             s->cells[src].t_attrib.bgcol,
+                                             s->cells[src].t_attrib.bold));
+            }
+        dpy_text_update(QEMU_CONSOLE(s), s->text_x[0], s->text_y[0],
+                        s->text_x[1] - s->text_x[0], i - s->text_y[0]);
+        s->text_x[0] = s->width;
+        s->text_y[0] = s->height;
+        s->text_x[1] = 0;
+        s->text_y[1] = 0;
+    }
+    if (s->cursor_invalidate) {
+        dpy_text_cursor(QEMU_CONSOLE(s), s->x, s->y);
+        s->cursor_invalidate = 0;
+    }
+}
+
+static void text_console_resize(QemuTextConsole *t)
+{
+    QemuConsole *s = QEMU_CONSOLE(t);
+    TextCell *cells, *c, *c1;
+    int w1, x, y, last_width, w, h;
+
+    assert(s->scanout.kind == SCANOUT_SURFACE);
+
+    w = surface_width(s->surface) / FONT_WIDTH;
+    h = surface_height(s->surface) / FONT_HEIGHT;
+    if (w == t->width && h == t->height) {
+        return;
+    }
+
+    last_width = t->width;
+    t->width = w;
+    t->height = h;
+
+    w1 = MIN(t->width, last_width);
+
+    cells = g_new(TextCell, t->width * t->total_height + 1);
+    for (y = 0; y < t->total_height; y++) {
+        c = &cells[y * t->width];
+        if (w1 > 0) {
+            c1 = &t->cells[y * last_width];
+            for (x = 0; x < w1; x++) {
+                *c++ = *c1++;
+            }
+        }
+        for (x = w1; x < t->width; x++) {
+            c->ch = ' ';
+            c->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+            c++;
+        }
+    }
+    g_free(t->cells);
+    t->cells = cells;
+}
+
+static void vc_put_lf(VCChardev *vc)
+{
+    QemuTextConsole *s = vc->console;
+    TextCell *c;
+    int x, y1;
+
+    s->y++;
+    if (s->y >= s->height) {
+        s->y = s->height - 1;
+
+        if (s->y_displayed == s->y_base) {
+            if (++s->y_displayed == s->total_height)
+                s->y_displayed = 0;
+        }
+        if (++s->y_base == s->total_height)
+            s->y_base = 0;
+        if (s->backscroll_height < s->total_height)
+            s->backscroll_height++;
+        y1 = (s->y_base + s->height - 1) % s->total_height;
+        c = &s->cells[y1 * s->width];
+        for(x = 0; x < s->width; x++) {
+            c->ch = ' ';
+            c->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+            c++;
+        }
+        if (s->y_displayed == s->y_base) {
+            s->text_x[0] = 0;
+            s->text_y[0] = 0;
+            s->text_x[1] = s->width - 1;
+            s->text_y[1] = s->height - 1;
+
+            qemu_console_bitblt(QEMU_CONSOLE(s), 0, FONT_HEIGHT, 0, 0,
+                                s->width * FONT_WIDTH,
+                                (s->height - 1) * FONT_HEIGHT);
+            qemu_console_fill_rect(QEMU_CONSOLE(s), 0, (s->height - 1) * FONT_HEIGHT,
+                                   s->width * FONT_WIDTH, FONT_HEIGHT,
+                                   color_table_rgb[0][TEXT_ATTRIBUTES_DEFAULT.bgcol]);
+            s->update_x0 = 0;
+            s->update_y0 = 0;
+            s->update_x1 = s->width * FONT_WIDTH;
+            s->update_y1 = s->height * FONT_HEIGHT;
+        }
+    }
+}
+
+/* Set console attributes depending on the current escape codes.
+ * NOTE: I know this code is not very efficient (checking every color for it
+ * self) but it is more readable and better maintainable.
+ */
+static void vc_handle_escape(VCChardev *vc)
+{
+    int i;
+
+    for (i = 0; i < vc->nb_esc_params; i++) {
+        switch (vc->esc_params[i]) {
+            case 0: /* reset all console attributes to default */
+                vc->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+                break;
+            case 1:
+                vc->t_attrib.bold = 1;
+                break;
+            case 4:
+                vc->t_attrib.uline = 1;
+                break;
+            case 5:
+                vc->t_attrib.blink = 1;
+                break;
+            case 7:
+                vc->t_attrib.invers = 1;
+                break;
+            case 8:
+                vc->t_attrib.unvisible = 1;
+                break;
+            case 22:
+                vc->t_attrib.bold = 0;
+                break;
+            case 24:
+                vc->t_attrib.uline = 0;
+                break;
+            case 25:
+                vc->t_attrib.blink = 0;
+                break;
+            case 27:
+                vc->t_attrib.invers = 0;
+                break;
+            case 28:
+                vc->t_attrib.unvisible = 0;
+                break;
+            /* set foreground color */
+            case 30:
+                vc->t_attrib.fgcol = QEMU_COLOR_BLACK;
+                break;
+            case 31:
+                vc->t_attrib.fgcol = QEMU_COLOR_RED;
+                break;
+            case 32:
+                vc->t_attrib.fgcol = QEMU_COLOR_GREEN;
+                break;
+            case 33:
+                vc->t_attrib.fgcol = QEMU_COLOR_YELLOW;
+                break;
+            case 34:
+                vc->t_attrib.fgcol = QEMU_COLOR_BLUE;
+                break;
+            case 35:
+                vc->t_attrib.fgcol = QEMU_COLOR_MAGENTA;
+                break;
+            case 36:
+                vc->t_attrib.fgcol = QEMU_COLOR_CYAN;
+                break;
+            case 37:
+                vc->t_attrib.fgcol = QEMU_COLOR_WHITE;
+                break;
+            /* set background color */
+            case 40:
+                vc->t_attrib.bgcol = QEMU_COLOR_BLACK;
+                break;
+            case 41:
+                vc->t_attrib.bgcol = QEMU_COLOR_RED;
+                break;
+            case 42:
+                vc->t_attrib.bgcol = QEMU_COLOR_GREEN;
+                break;
+            case 43:
+                vc->t_attrib.bgcol = QEMU_COLOR_YELLOW;
+                break;
+            case 44:
+                vc->t_attrib.bgcol = QEMU_COLOR_BLUE;
+                break;
+            case 45:
+                vc->t_attrib.bgcol = QEMU_COLOR_MAGENTA;
+                break;
+            case 46:
+                vc->t_attrib.bgcol = QEMU_COLOR_CYAN;
+                break;
+            case 47:
+                vc->t_attrib.bgcol = QEMU_COLOR_WHITE;
+                break;
+        }
+    }
+}
+
+static void vc_update_xy(VCChardev *vc, int x, int y)
+{
+    QemuTextConsole *s = vc->console;
+    TextCell *c;
+    int y1, y2;
+
+    s->text_x[0] = MIN(s->text_x[0], x);
+    s->text_x[1] = MAX(s->text_x[1], x);
+    s->text_y[0] = MIN(s->text_y[0], y);
+    s->text_y[1] = MAX(s->text_y[1], y);
+
+    y1 = (s->y_base + y) % s->total_height;
+    y2 = y1 - s->y_displayed;
+    if (y2 < 0) {
+        y2 += s->total_height;
+    }
+    if (y2 < s->height) {
+        if (x >= s->width) {
+            x = s->width - 1;
+        }
+        c = &s->cells[y1 * s->width + x];
+        vga_putcharxy(QEMU_CONSOLE(s), x, y2, c->ch,
+                      &(c->t_attrib));
+        invalidate_xy(s, x, y2);
+    }
+}
+
+static void vc_clear_xy(VCChardev *vc, int x, int y)
+{
+    QemuTextConsole *s = vc->console;
+    int y1 = (s->y_base + y) % s->total_height;
+    if (x >= s->width) {
+        x = s->width - 1;
+    }
+    TextCell *c = &s->cells[y1 * s->width + x];
+    c->ch = ' ';
+    c->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+    vc_update_xy(vc, x, y);
+}
+
+static void vc_put_one(VCChardev *vc, int ch)
+{
+    QemuTextConsole *s = vc->console;
+    TextCell *c;
+    int y1;
+    if (s->x >= s->width) {
+        /* line wrap */
+        s->x = 0;
+        vc_put_lf(vc);
+    }
+    y1 = (s->y_base + s->y) % s->total_height;
+    c = &s->cells[y1 * s->width + s->x];
+    c->ch = ch;
+    c->t_attrib = vc->t_attrib;
+    vc_update_xy(vc, s->x, s->y);
+    s->x++;
+}
+
+static void vc_respond_str(VCChardev *vc, const char *buf)
+{
+    while (*buf) {
+        vc_put_one(vc, *buf);
+        buf++;
+    }
+}
+
+/* set cursor, checking bounds */
+static void vc_set_cursor(VCChardev *vc, int x, int y)
+{
+    QemuTextConsole *s = vc->console;
+
+    if (x < 0) {
+        x = 0;
+    }
+    if (y < 0) {
+        y = 0;
+    }
+    if (y >= s->height) {
+        y = s->height - 1;
+    }
+    if (x >= s->width) {
+        x = s->width - 1;
+    }
+
+    s->x = x;
+    s->y = y;
+}
+
+static void vc_putchar(VCChardev *vc, int ch)
+{
+    QemuTextConsole *s = vc->console;
+    int i;
+    int x, y;
+    char response[40];
+
+    switch(vc->state) {
+    case TTY_STATE_NORM:
+        switch(ch) {
+        case '\r':  /* carriage return */
+            s->x = 0;
+            break;
+        case '\n':  /* newline */
+            vc_put_lf(vc);
+            break;
+        case '\b':  /* backspace */
+            if (s->x > 0)
+                s->x--;
+            break;
+        case '\t':  /* tabspace */
+            if (s->x + (8 - (s->x % 8)) > s->width) {
+                s->x = 0;
+                vc_put_lf(vc);
+            } else {
+                s->x = s->x + (8 - (s->x % 8));
+            }
+            break;
+        case '\a':  /* alert aka. bell */
+            /* TODO: has to be implemented */
+            break;
+        case 14:
+            /* SI (shift in), character set 0 (ignored) */
+            break;
+        case 15:
+            /* SO (shift out), character set 1 (ignored) */
+            break;
+        case 27:    /* esc (introducing an escape sequence) */
+            vc->state = TTY_STATE_ESC;
+            break;
+        default:
+            vc_put_one(vc, ch);
+            break;
+        }
+        break;
+    case TTY_STATE_ESC: /* check if it is a terminal escape sequence */
+        if (ch == '[') {
+            for(i=0;i<MAX_ESC_PARAMS;i++)
+                vc->esc_params[i] = 0;
+            vc->nb_esc_params = 0;
+            vc->state = TTY_STATE_CSI;
+        } else {
+            vc->state = TTY_STATE_NORM;
+        }
+        break;
+    case TTY_STATE_CSI: /* handle escape sequence parameters */
+        if (ch >= '0' && ch <= '9') {
+            if (vc->nb_esc_params < MAX_ESC_PARAMS) {
+                int *param = &vc->esc_params[vc->nb_esc_params];
+                int digit = (ch - '0');
+
+                *param = (*param <= (INT_MAX - digit) / 10) ?
+                         *param * 10 + digit : INT_MAX;
+            }
+        } else {
+            if (vc->nb_esc_params < MAX_ESC_PARAMS)
+                vc->nb_esc_params++;
+            if (ch == ';' || ch == '?') {
+                break;
+            }
+            trace_console_putchar_csi(vc->esc_params[0], vc->esc_params[1],
+                                      ch, vc->nb_esc_params);
+            vc->state = TTY_STATE_NORM;
+            switch(ch) {
+            case 'A':
+                /* move cursor up */
+                if (vc->esc_params[0] == 0) {
+                    vc->esc_params[0] = 1;
+                }
+                vc_set_cursor(vc, s->x, s->y - vc->esc_params[0]);
+                break;
+            case 'B':
+                /* move cursor down */
+                if (vc->esc_params[0] == 0) {
+                    vc->esc_params[0] = 1;
+                }
+                vc_set_cursor(vc, s->x, s->y + vc->esc_params[0]);
+                break;
+            case 'C':
+                /* move cursor right */
+                if (vc->esc_params[0] == 0) {
+                    vc->esc_params[0] = 1;
+                }
+                vc_set_cursor(vc, s->x + vc->esc_params[0], s->y);
+                break;
+            case 'D':
+                /* move cursor left */
+                if (vc->esc_params[0] == 0) {
+                    vc->esc_params[0] = 1;
+                }
+                vc_set_cursor(vc, s->x - vc->esc_params[0], s->y);
+                break;
+            case 'G':
+                /* move cursor to column */
+                vc_set_cursor(vc, vc->esc_params[0] - 1, s->y);
+                break;
+            case 'f':
+            case 'H':
+                /* move cursor to row, column */
+                vc_set_cursor(vc, vc->esc_params[1] - 1, vc->esc_params[0] - 1);
+                break;
+            case 'J':
+                switch (vc->esc_params[0]) {
+                case 0:
+                    /* clear to end of screen */
+                    for (y = s->y; y < s->height; y++) {
+                        for (x = 0; x < s->width; x++) {
+                            if (y == s->y && x < s->x) {
+                                continue;
+                            }
+                            vc_clear_xy(vc, x, y);
+                        }
+                    }
+                    break;
+                case 1:
+                    /* clear from beginning of screen */
+                    for (y = 0; y <= s->y; y++) {
+                        for (x = 0; x < s->width; x++) {
+                            if (y == s->y && x > s->x) {
+                                break;
+                            }
+                            vc_clear_xy(vc, x, y);
+                        }
+                    }
+                    break;
+                case 2:
+                    /* clear entire screen */
+                    for (y = 0; y <= s->height; y++) {
+                        for (x = 0; x < s->width; x++) {
+                            vc_clear_xy(vc, x, y);
+                        }
+                    }
+                    break;
+                }
+                break;
+            case 'K':
+                switch (vc->esc_params[0]) {
+                case 0:
+                    /* clear to eol */
+                    for(x = s->x; x < s->width; x++) {
+                        vc_clear_xy(vc, x, s->y);
+                    }
+                    break;
+                case 1:
+                    /* clear from beginning of line */
+                    for (x = 0; x <= s->x && x < s->width; x++) {
+                        vc_clear_xy(vc, x, s->y);
+                    }
+                    break;
+                case 2:
+                    /* clear entire line */
+                    for(x = 0; x < s->width; x++) {
+                        vc_clear_xy(vc, x, s->y);
+                    }
+                    break;
+                }
+                break;
+            case 'm':
+                vc_handle_escape(vc);
+                break;
+            case 'n':
+                switch (vc->esc_params[0]) {
+                case 5:
+                    /* report console status (always succeed)*/
+                    vc_respond_str(vc, "\033[0n");
+                    break;
+                case 6:
+                    /* report cursor position */
+                    sprintf(response, "\033[%d;%dR",
+                           (s->y_base + s->y) % s->total_height + 1,
+                            s->x + 1);
+                    vc_respond_str(vc, response);
+                    break;
+                }
+                break;
+            case 's':
+                /* save cursor position */
+                vc->x_saved = s->x;
+                vc->y_saved = s->y;
+                break;
+            case 'u':
+                /* restore cursor position */
+                s->x = vc->x_saved;
+                s->y = vc->y_saved;
+                break;
+            default:
+                trace_console_putchar_unhandled(ch);
+                break;
+            }
+            break;
+        }
+    }
+}
+
+#define TYPE_CHARDEV_VC "chardev-vc"
+DECLARE_INSTANCE_CHECKER(VCChardev, VC_CHARDEV,
+                         TYPE_CHARDEV_VC)
+
+static int vc_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    VCChardev *drv = VC_CHARDEV(chr);
+    QemuTextConsole *s = drv->console;
+    int i;
+
+    s->update_x0 = s->width * FONT_WIDTH;
+    s->update_y0 = s->height * FONT_HEIGHT;
+    s->update_x1 = 0;
+    s->update_y1 = 0;
+    console_show_cursor(s, 0);
+    for(i = 0; i < len; i++) {
+        vc_putchar(drv, buf[i]);
+    }
+    console_show_cursor(s, 1);
+    if (s->update_x0 < s->update_x1) {
+        dpy_gfx_update(QEMU_CONSOLE(s), s->update_x0, s->update_y0,
+                       s->update_x1 - s->update_x0,
+                       s->update_y1 - s->update_y0);
+    }
+    return len;
+}
+
+void qemu_text_console_update_cursor(void)
+{
+    cursor_visible_phase = !cursor_visible_phase;
+
+    if (qemu_invalidate_text_consoles()) {
+        timer_mod(cursor_timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + CONSOLE_CURSOR_PERIOD / 2);
+    }
+}
+
+static void
+cursor_timer_cb(void *opaque)
+{
+    qemu_text_console_update_cursor();
+}
+
+static void text_console_invalidate(void *opaque)
+{
+    QemuTextConsole *s = QEMU_TEXT_CONSOLE(opaque);
+
+    if (!QEMU_IS_FIXED_TEXT_CONSOLE(s)) {
+        text_console_resize(QEMU_TEXT_CONSOLE(s));
+    }
+    console_refresh(s);
+}
+
+static void
+qemu_text_console_finalize(Object *obj)
+{
+}
+
+static void
+qemu_text_console_class_init(ObjectClass *oc, void *data)
+{
+    if (!cursor_timer) {
+        cursor_timer = timer_new_ms(QEMU_CLOCK_REALTIME, cursor_timer_cb, NULL);
+    }
+}
+
+static const GraphicHwOps text_console_ops = {
+    .invalidate  = text_console_invalidate,
+    .text_update = text_console_update,
+};
+
+static void
+qemu_text_console_init(Object *obj)
+{
+    QemuTextConsole *c = QEMU_TEXT_CONSOLE(obj);
+
+    fifo8_create(&c->out_fifo, 16);
+    c->total_height = DEFAULT_BACKSCROLL;
+    QEMU_CONSOLE(c)->hw_ops = &text_console_ops;
+    QEMU_CONSOLE(c)->hw = c;
+}
+
+static void
+qemu_fixed_text_console_finalize(Object *obj)
+{
+}
+
+static void
+qemu_fixed_text_console_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void
+qemu_fixed_text_console_init(Object *obj)
+{
+}
+
+static void vc_chr_accept_input(Chardev *chr)
+{
+    VCChardev *drv = VC_CHARDEV(chr);
+
+    kbd_send_chars(drv->console);
+}
+
+static void vc_chr_set_echo(Chardev *chr, bool echo)
+{
+    VCChardev *drv = VC_CHARDEV(chr);
+
+    drv->console->echo = echo;
+}
+
+void qemu_text_console_select(QemuTextConsole *c)
+{
+    dpy_text_resize(QEMU_CONSOLE(c), c->width, c->height);
+    qemu_text_console_update_cursor();
+}
+
+static void vc_chr_open(Chardev *chr,
+                        ChardevBackend *backend,
+                        bool *be_opened,
+                        Error **errp)
+{
+    ChardevVC *vc = backend->u.vc.data;
+    VCChardev *drv = VC_CHARDEV(chr);
+    QemuTextConsole *s;
+    unsigned width = 0;
+    unsigned height = 0;
+
+    if (vc->has_width) {
+        width = vc->width;
+    } else if (vc->has_cols) {
+        width = vc->cols * FONT_WIDTH;
+    }
+
+    if (vc->has_height) {
+        height = vc->height;
+    } else if (vc->has_rows) {
+        height = vc->rows * FONT_HEIGHT;
+    }
+
+    trace_console_txt_new(width, height);
+    if (width == 0 || height == 0) {
+        s = QEMU_TEXT_CONSOLE(object_new(TYPE_QEMU_TEXT_CONSOLE));
+        width = qemu_console_get_width(NULL, 80 * FONT_WIDTH);
+        height = qemu_console_get_height(NULL, 24 * FONT_HEIGHT);
+    } else {
+        s = QEMU_TEXT_CONSOLE(object_new(TYPE_QEMU_FIXED_TEXT_CONSOLE));
+    }
+
+    dpy_gfx_replace_surface(QEMU_CONSOLE(s), qemu_create_displaysurface(width, height));
+
+    s->chr = chr;
+    drv->console = s;
+
+    /* set current text attributes to default */
+    drv->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+    text_console_resize(s);
+
+    if (chr->label) {
+        char *msg;
+
+        drv->t_attrib.bgcol = QEMU_COLOR_BLUE;
+        msg = g_strdup_printf("%s console\r\n", chr->label);
+        qemu_chr_write(chr, (uint8_t *)msg, strlen(msg), true);
+        g_free(msg);
+        drv->t_attrib = TEXT_ATTRIBUTES_DEFAULT;
+    }
+
+    *be_opened = true;
+}
+
+static void vc_chr_parse(QemuOpts *opts, ChardevBackend *backend, Error **errp)
+{
+    int val;
+    ChardevVC *vc;
+
+    backend->type = CHARDEV_BACKEND_KIND_VC;
+    vc = backend->u.vc.data = g_new0(ChardevVC, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevVC_base(vc));
+
+    val = qemu_opt_get_number(opts, "width", 0);
+    if (val != 0) {
+        vc->has_width = true;
+        vc->width = val;
+    }
+
+    val = qemu_opt_get_number(opts, "height", 0);
+    if (val != 0) {
+        vc->has_height = true;
+        vc->height = val;
+    }
+
+    val = qemu_opt_get_number(opts, "cols", 0);
+    if (val != 0) {
+        vc->has_cols = true;
+        vc->cols = val;
+    }
+
+    val = qemu_opt_get_number(opts, "rows", 0);
+    if (val != 0) {
+        vc->has_rows = true;
+        vc->rows = val;
+    }
+}
+
+static void char_vc_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = vc_chr_parse;
+    cc->open = vc_chr_open;
+    cc->chr_write = vc_chr_write;
+    cc->chr_accept_input = vc_chr_accept_input;
+    cc->chr_set_echo = vc_chr_set_echo;
+}
+
+static const TypeInfo char_vc_type_info = {
+    .name = TYPE_CHARDEV_VC,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(VCChardev),
+    .class_init = char_vc_class_init,
+};
+
+void qemu_console_early_init(void)
+{
+    /* set the default vc driver */
+    if (!object_class_by_name(TYPE_CHARDEV_VC)) {
+        type_register(&char_vc_type_info);
+    }
+}
diff --git a/ui/console.c b/ui/console.c
index 8da2170a7e..4a4f19ed33 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -27,117 +27,40 @@
 #include "hw/qdev-core.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-ui.h"
+#include "qapi/visitor.h"
 #include "qemu/coroutine.h"
 #include "qemu/error-report.h"
-#include "qemu/fifo8.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
-#include "qemu/timer.h"
 #include "chardev/char.h"
 #include "trace.h"
 #include "exec/memory.h"
-#include "io/channel-file.h"
 #include "qom/object.h"
-#ifdef CONFIG_PNG
-#include <png.h>
-#endif
 
-#define DEFAULT_BACKSCROLL 512
-#define CONSOLE_CURSOR_PERIOD 500
+#include "console-priv.h"
 
-typedef struct TextAttributes {
-    uint8_t fgcol:4;
-    uint8_t bgcol:4;
-    uint8_t bold:1;
-    uint8_t uline:1;
-    uint8_t blink:1;
-    uint8_t invers:1;
-    uint8_t unvisible:1;
-} TextAttributes;
+OBJECT_DEFINE_ABSTRACT_TYPE(QemuConsole, qemu_console, QEMU_CONSOLE, OBJECT)
 
-typedef struct TextCell {
-    uint8_t ch;
-    TextAttributes t_attrib;
-} TextCell;
+typedef struct QemuGraphicConsole {
+    QemuConsole parent;
 
-#define MAX_ESC_PARAMS 3
-
-enum TTYState {
-    TTY_STATE_NORM,
-    TTY_STATE_ESC,
-    TTY_STATE_CSI,
-};
-
-typedef enum {
-    GRAPHIC_CONSOLE,
-    TEXT_CONSOLE,
-    TEXT_CONSOLE_FIXED_SIZE
-} console_type_t;
-
-struct QemuConsole {
-    Object parent;
-
-    int index;
-    console_type_t console_type;
-    DisplayState *ds;
-    DisplaySurface *surface;
-    DisplayScanout scanout;
-    int dcls;
-    DisplayGLCtx *gl;
-    int gl_block;
-    QEMUTimer *gl_unblock_timer;
-    int window_id;
-
-    /* Graphic console state.  */
     Object *device;
     uint32_t head;
-    QemuUIInfo ui_info;
-    QEMUTimer *ui_timer;
+
     QEMUCursor *cursor;
     int cursor_x, cursor_y, cursor_on;
-    const GraphicHwOps *hw_ops;
-    void *hw;
+} QemuGraphicConsole;
 
-    /* Text console state */
-    int width;
-    int height;
-    int total_height;
-    int backscroll_height;
-    int x, y;
-    int x_saved, y_saved;
-    int y_displayed;
-    int y_base;
-    TextAttributes t_attrib_default; /* default text attributes */
-    TextAttributes t_attrib; /* currently active text attributes */
-    TextCell *cells;
-    int text_x[2], text_y[2], cursor_invalidate;
-    int echo;
+typedef QemuConsoleClass QemuGraphicConsoleClass;
 
-    int update_x0;
-    int update_y0;
-    int update_x1;
-    int update_y1;
-
-    enum TTYState state;
-    int esc_params[MAX_ESC_PARAMS];
-    int nb_esc_params;
-
-    Chardev *chr;
-    /* fifo for key pressed */
-    Fifo8 out_fifo;
-    CoQueue dump_queue;
-
-    QTAILQ_ENTRY(QemuConsole) next;
-};
+OBJECT_DEFINE_TYPE(QemuGraphicConsole, qemu_graphic_console, QEMU_GRAPHIC_CONSOLE, QEMU_CONSOLE)
 
 struct DisplayState {
     QEMUTimer *gui_timer;
     uint64_t last_update;
     uint64_t update_interval;
     bool refreshing;
-    bool have_gfx;
-    bool have_text;
 
     QLIST_HEAD(, DisplayChangeListener) listeners;
 };
@@ -146,17 +69,14 @@ static DisplayState *display_state;
 static QemuConsole *active_console;
 static QTAILQ_HEAD(, QemuConsole) consoles =
     QTAILQ_HEAD_INITIALIZER(consoles);
-static bool cursor_visible_phase;
-static QEMUTimer *cursor_timer;
 
-static void text_console_do_init(Chardev *chr, DisplayState *ds);
 static void dpy_refresh(DisplayState *s);
 static DisplayState *get_alloc_displaystate(void);
-static void text_console_update_cursor_timer(void);
-static void text_console_update_cursor(void *opaque);
 static bool displaychangelistener_has_dmabuf(DisplayChangeListener *dcl);
 static bool console_compatible_with(QemuConsole *con,
                                     DisplayChangeListener *dcl, Error **errp);
+static QemuConsole *qemu_graphic_console_lookup_unused(void);
+static void dpy_set_ui_info_timer(void *opaque);
 
 static void gui_update(void *opaque)
 {
@@ -188,19 +108,11 @@ static void gui_setup_refresh(DisplayState *ds)
 {
     DisplayChangeListener *dcl;
     bool need_timer = false;
-    bool have_gfx = false;
-    bool have_text = false;
 
     QLIST_FOREACH(dcl, &ds->listeners, next) {
         if (dcl->ops->dpy_refresh != NULL) {
             need_timer = true;
         }
-        if (dcl->ops->dpy_gfx_update != NULL) {
-            have_gfx = true;
-        }
-        if (dcl->ops->dpy_text_update != NULL) {
-            have_text = true;
-        }
     }
 
     if (need_timer && ds->gui_timer == NULL) {
@@ -211,9 +123,6 @@ static void gui_setup_refresh(DisplayState *ds)
         timer_free(ds->gui_timer);
         ds->gui_timer = NULL;
     }
-
-    ds->have_gfx = have_gfx;
-    ds->have_text = have_text;
 }
 
 void graphic_hw_update_done(QemuConsole *con)
@@ -239,6 +148,22 @@ void graphic_hw_update(QemuConsole *con)
     }
 }
 
+static void graphic_hw_update_bh(void *con)
+{
+    graphic_hw_update(con);
+}
+
+void qemu_console_co_wait_update(QemuConsole *con)
+{
+    if (qemu_co_queue_empty(&con->dump_queue)) {
+        /* Defer the update, it will restart the pending coroutines */
+        aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                                graphic_hw_update_bh, con);
+    }
+    qemu_co_queue_wait(&con->dump_queue, NULL);
+
+}
+
 static void graphic_hw_gl_unblock_timer(void *opaque)
 {
     warn_report("console: no gl-unblock within one second");
@@ -292,196 +217,6 @@ void graphic_hw_invalidate(QemuConsole *con)
     }
 }
 
-#ifdef CONFIG_PNG
-/**
- * png_save: Take a screenshot as PNG
- *
- * Saves screendump as a PNG file
- *
- * Returns true for success or false for error.
- *
- * @fd: File descriptor for PNG file.
- * @image: Image data in pixman format.
- * @errp: Pointer to an error.
- */
-static bool png_save(int fd, pixman_image_t *image, Error **errp)
-{
-    int width = pixman_image_get_width(image);
-    int height = pixman_image_get_height(image);
-    png_struct *png_ptr;
-    png_info *info_ptr;
-    g_autoptr(pixman_image_t) linebuf =
-        qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
-    uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf);
-    FILE *f = fdopen(fd, "wb");
-    int y;
-    if (!f) {
-        error_setg_errno(errp, errno,
-                         "Failed to create file from file descriptor");
-        return false;
-    }
-
-    png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL,
-                                      NULL, NULL);
-    if (!png_ptr) {
-        error_setg(errp, "PNG creation failed. Unable to write struct");
-        fclose(f);
-        return false;
-    }
-
-    info_ptr = png_create_info_struct(png_ptr);
-
-    if (!info_ptr) {
-        error_setg(errp, "PNG creation failed. Unable to write info");
-        fclose(f);
-        png_destroy_write_struct(&png_ptr, &info_ptr);
-        return false;
-    }
-
-    png_init_io(png_ptr, f);
-
-    png_set_IHDR(png_ptr, info_ptr, width, height, 8,
-                 PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
-                 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
-
-    png_write_info(png_ptr, info_ptr);
-
-    for (y = 0; y < height; ++y) {
-        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
-        png_write_row(png_ptr, buf);
-    }
-
-    png_write_end(png_ptr, NULL);
-
-    png_destroy_write_struct(&png_ptr, &info_ptr);
-
-    if (fclose(f) != 0) {
-        error_setg_errno(errp, errno,
-                         "PNG creation failed. Unable to close file");
-        return false;
-    }
-
-    return true;
-}
-
-#else /* no png support */
-
-static bool png_save(int fd, pixman_image_t *image, Error **errp)
-{
-    error_setg(errp, "Enable PNG support with libpng for screendump");
-    return false;
-}
-
-#endif /* CONFIG_PNG */
-
-static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
-{
-    int width = pixman_image_get_width(image);
-    int height = pixman_image_get_height(image);
-    g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
-    g_autofree char *header = NULL;
-    g_autoptr(pixman_image_t) linebuf = NULL;
-    int y;
-
-    trace_ppm_save(fd, image);
-
-    header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
-    if (qio_channel_write_all(QIO_CHANNEL(ioc),
-                              header, strlen(header), errp) < 0) {
-        return false;
-    }
-
-    linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
-    for (y = 0; y < height; y++) {
-        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
-        if (qio_channel_write_all(QIO_CHANNEL(ioc),
-                                  (char *)pixman_image_get_data(linebuf),
-                                  pixman_image_get_stride(linebuf), errp) < 0) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static void graphic_hw_update_bh(void *con)
-{
-    graphic_hw_update(con);
-}
-
-/* Safety: coroutine-only, concurrent-coroutine safe, main thread only */
-void coroutine_fn
-qmp_screendump(const char *filename, const char *device,
-               bool has_head, int64_t head,
-               bool has_format, ImageFormat format, Error **errp)
-{
-    g_autoptr(pixman_image_t) image = NULL;
-    QemuConsole *con;
-    DisplaySurface *surface;
-    int fd;
-
-    if (device) {
-        con = qemu_console_lookup_by_device_name(device, has_head ? head : 0,
-                                                 errp);
-        if (!con) {
-            return;
-        }
-    } else {
-        if (has_head) {
-            error_setg(errp, "'head' must be specified together with 'device'");
-            return;
-        }
-        con = qemu_console_lookup_by_index(0);
-        if (!con) {
-            error_setg(errp, "There is no console to take a screendump from");
-            return;
-        }
-    }
-
-    if (qemu_co_queue_empty(&con->dump_queue)) {
-        /* Defer the update, it will restart the pending coroutines */
-        aio_bh_schedule_oneshot(qemu_get_aio_context(),
-                                graphic_hw_update_bh, con);
-    }
-    qemu_co_queue_wait(&con->dump_queue, NULL);
-
-    /*
-     * All pending coroutines are woken up, while the BQL is held.  No
-     * further graphic update are possible until it is released.  Take
-     * an image ref before that.
-     */
-    surface = qemu_console_surface(con);
-    if (!surface) {
-        error_setg(errp, "no surface");
-        return;
-    }
-    image = pixman_image_ref(surface->image);
-
-    fd = qemu_open_old(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
-    if (fd == -1) {
-        error_setg(errp, "failed to open file '%s': %s", filename,
-                   strerror(errno));
-        return;
-    }
-
-    /*
-     * The image content could potentially be updated as the coroutine
-     * yields and releases the BQL. It could produce corrupted dump, but
-     * it should be otherwise safe.
-     */
-    if (has_format && format == IMAGE_FORMAT_PNG) {
-        /* PNG format specified for screendump */
-        if (!png_save(fd, image, errp)) {
-            qemu_unlink(filename);
-        }
-    } else {
-        /* PPM format specified/default for screendump */
-        if (!ppm_save(fd, image, errp)) {
-            qemu_unlink(filename);
-        }
-    }
-}
-
 void graphic_hw_text_update(QemuConsole *con, console_ch_t *chardata)
 {
     if (!con) {
@@ -492,664 +227,6 @@ void graphic_hw_text_update(QemuConsole *con, console_ch_t *chardata)
     }
 }
 
-static void vga_fill_rect(QemuConsole *con,
-                          int posx, int posy, int width, int height,
-                          pixman_color_t color)
-{
-    DisplaySurface *surface = qemu_console_surface(con);
-    pixman_rectangle16_t rect = {
-        .x = posx, .y = posy, .width = width, .height = height
-    };
-
-    pixman_image_fill_rectangles(PIXMAN_OP_SRC, surface->image,
-                                 &color, 1, &rect);
-}
-
-/* copy from (xs, ys) to (xd, yd) a rectangle of size (w, h) */
-static void vga_bitblt(QemuConsole *con,
-                       int xs, int ys, int xd, int yd, int w, int h)
-{
-    DisplaySurface *surface = qemu_console_surface(con);
-
-    pixman_image_composite(PIXMAN_OP_SRC,
-                           surface->image, NULL, surface->image,
-                           xs, ys, 0, 0, xd, yd, w, h);
-}
-
-/***********************************************************/
-/* basic char display */
-
-#define FONT_HEIGHT 16
-#define FONT_WIDTH 8
-
-#include "vgafont.h"
-
-#define QEMU_RGB(r, g, b)                                               \
-    { .red = r << 8, .green = g << 8, .blue = b << 8, .alpha = 0xffff }
-
-static const pixman_color_t color_table_rgb[2][8] = {
-    {   /* dark */
-        [QEMU_COLOR_BLACK]   = QEMU_RGB(0x00, 0x00, 0x00),  /* black */
-        [QEMU_COLOR_BLUE]    = QEMU_RGB(0x00, 0x00, 0xaa),  /* blue */
-        [QEMU_COLOR_GREEN]   = QEMU_RGB(0x00, 0xaa, 0x00),  /* green */
-        [QEMU_COLOR_CYAN]    = QEMU_RGB(0x00, 0xaa, 0xaa),  /* cyan */
-        [QEMU_COLOR_RED]     = QEMU_RGB(0xaa, 0x00, 0x00),  /* red */
-        [QEMU_COLOR_MAGENTA] = QEMU_RGB(0xaa, 0x00, 0xaa),  /* magenta */
-        [QEMU_COLOR_YELLOW]  = QEMU_RGB(0xaa, 0xaa, 0x00),  /* yellow */
-        [QEMU_COLOR_WHITE]   = QEMU_RGB(0xaa, 0xaa, 0xaa),  /* white */
-    },
-    {   /* bright */
-        [QEMU_COLOR_BLACK]   = QEMU_RGB(0x00, 0x00, 0x00),  /* black */
-        [QEMU_COLOR_BLUE]    = QEMU_RGB(0x00, 0x00, 0xff),  /* blue */
-        [QEMU_COLOR_GREEN]   = QEMU_RGB(0x00, 0xff, 0x00),  /* green */
-        [QEMU_COLOR_CYAN]    = QEMU_RGB(0x00, 0xff, 0xff),  /* cyan */
-        [QEMU_COLOR_RED]     = QEMU_RGB(0xff, 0x00, 0x00),  /* red */
-        [QEMU_COLOR_MAGENTA] = QEMU_RGB(0xff, 0x00, 0xff),  /* magenta */
-        [QEMU_COLOR_YELLOW]  = QEMU_RGB(0xff, 0xff, 0x00),  /* yellow */
-        [QEMU_COLOR_WHITE]   = QEMU_RGB(0xff, 0xff, 0xff),  /* white */
-    }
-};
-
-static void vga_putcharxy(QemuConsole *s, int x, int y, int ch,
-                          TextAttributes *t_attrib)
-{
-    static pixman_image_t *glyphs[256];
-    DisplaySurface *surface = qemu_console_surface(s);
-    pixman_color_t fgcol, bgcol;
-
-    if (t_attrib->invers) {
-        bgcol = color_table_rgb[t_attrib->bold][t_attrib->fgcol];
-        fgcol = color_table_rgb[t_attrib->bold][t_attrib->bgcol];
-    } else {
-        fgcol = color_table_rgb[t_attrib->bold][t_attrib->fgcol];
-        bgcol = color_table_rgb[t_attrib->bold][t_attrib->bgcol];
-    }
-
-    if (!glyphs[ch]) {
-        glyphs[ch] = qemu_pixman_glyph_from_vgafont(FONT_HEIGHT, vgafont16, ch);
-    }
-    qemu_pixman_glyph_render(glyphs[ch], surface->image,
-                             &fgcol, &bgcol, x, y, FONT_WIDTH, FONT_HEIGHT);
-}
-
-static void text_console_resize(QemuConsole *s)
-{
-    TextCell *cells, *c, *c1;
-    int w1, x, y, last_width;
-
-    assert(s->scanout.kind == SCANOUT_SURFACE);
-
-    last_width = s->width;
-    s->width = surface_width(s->surface) / FONT_WIDTH;
-    s->height = surface_height(s->surface) / FONT_HEIGHT;
-
-    w1 = last_width;
-    if (s->width < w1)
-        w1 = s->width;
-
-    cells = g_new(TextCell, s->width * s->total_height + 1);
-    for(y = 0; y < s->total_height; y++) {
-        c = &cells[y * s->width];
-        if (w1 > 0) {
-            c1 = &s->cells[y * last_width];
-            for(x = 0; x < w1; x++) {
-                *c++ = *c1++;
-            }
-        }
-        for(x = w1; x < s->width; x++) {
-            c->ch = ' ';
-            c->t_attrib = s->t_attrib_default;
-            c++;
-        }
-    }
-    g_free(s->cells);
-    s->cells = cells;
-}
-
-static inline void text_update_xy(QemuConsole *s, int x, int y)
-{
-    s->text_x[0] = MIN(s->text_x[0], x);
-    s->text_x[1] = MAX(s->text_x[1], x);
-    s->text_y[0] = MIN(s->text_y[0], y);
-    s->text_y[1] = MAX(s->text_y[1], y);
-}
-
-static void invalidate_xy(QemuConsole *s, int x, int y)
-{
-    if (!qemu_console_is_visible(s)) {
-        return;
-    }
-    if (s->update_x0 > x * FONT_WIDTH)
-        s->update_x0 = x * FONT_WIDTH;
-    if (s->update_y0 > y * FONT_HEIGHT)
-        s->update_y0 = y * FONT_HEIGHT;
-    if (s->update_x1 < (x + 1) * FONT_WIDTH)
-        s->update_x1 = (x + 1) * FONT_WIDTH;
-    if (s->update_y1 < (y + 1) * FONT_HEIGHT)
-        s->update_y1 = (y + 1) * FONT_HEIGHT;
-}
-
-static void update_xy(QemuConsole *s, int x, int y)
-{
-    TextCell *c;
-    int y1, y2;
-
-    if (s->ds->have_text) {
-        text_update_xy(s, x, y);
-    }
-
-    y1 = (s->y_base + y) % s->total_height;
-    y2 = y1 - s->y_displayed;
-    if (y2 < 0) {
-        y2 += s->total_height;
-    }
-    if (y2 < s->height) {
-        if (x >= s->width) {
-            x = s->width - 1;
-        }
-        c = &s->cells[y1 * s->width + x];
-        vga_putcharxy(s, x, y2, c->ch,
-                      &(c->t_attrib));
-        invalidate_xy(s, x, y2);
-    }
-}
-
-static void console_show_cursor(QemuConsole *s, int show)
-{
-    TextCell *c;
-    int y, y1;
-    int x = s->x;
-
-    if (s->ds->have_text) {
-        s->cursor_invalidate = 1;
-    }
-
-    if (x >= s->width) {
-        x = s->width - 1;
-    }
-    y1 = (s->y_base + s->y) % s->total_height;
-    y = y1 - s->y_displayed;
-    if (y < 0) {
-        y += s->total_height;
-    }
-    if (y < s->height) {
-        c = &s->cells[y1 * s->width + x];
-        if (show && cursor_visible_phase) {
-            TextAttributes t_attrib = s->t_attrib_default;
-            t_attrib.invers = !(t_attrib.invers); /* invert fg and bg */
-            vga_putcharxy(s, x, y, c->ch, &t_attrib);
-        } else {
-            vga_putcharxy(s, x, y, c->ch, &(c->t_attrib));
-        }
-        invalidate_xy(s, x, y);
-    }
-}
-
-static void console_refresh(QemuConsole *s)
-{
-    DisplaySurface *surface = qemu_console_surface(s);
-    TextCell *c;
-    int x, y, y1;
-
-    if (s->ds->have_text) {
-        s->text_x[0] = 0;
-        s->text_y[0] = 0;
-        s->text_x[1] = s->width - 1;
-        s->text_y[1] = s->height - 1;
-        s->cursor_invalidate = 1;
-    }
-
-    vga_fill_rect(s, 0, 0, surface_width(surface), surface_height(surface),
-                  color_table_rgb[0][QEMU_COLOR_BLACK]);
-    y1 = s->y_displayed;
-    for (y = 0; y < s->height; y++) {
-        c = s->cells + y1 * s->width;
-        for (x = 0; x < s->width; x++) {
-            vga_putcharxy(s, x, y, c->ch,
-                          &(c->t_attrib));
-            c++;
-        }
-        if (++y1 == s->total_height) {
-            y1 = 0;
-        }
-    }
-    console_show_cursor(s, 1);
-    dpy_gfx_update(s, 0, 0,
-                   surface_width(surface), surface_height(surface));
-}
-
-static void console_scroll(QemuConsole *s, int ydelta)
-{
-    int i, y1;
-
-    if (ydelta > 0) {
-        for(i = 0; i < ydelta; i++) {
-            if (s->y_displayed == s->y_base)
-                break;
-            if (++s->y_displayed == s->total_height)
-                s->y_displayed = 0;
-        }
-    } else {
-        ydelta = -ydelta;
-        i = s->backscroll_height;
-        if (i > s->total_height - s->height)
-            i = s->total_height - s->height;
-        y1 = s->y_base - i;
-        if (y1 < 0)
-            y1 += s->total_height;
-        for(i = 0; i < ydelta; i++) {
-            if (s->y_displayed == y1)
-                break;
-            if (--s->y_displayed < 0)
-                s->y_displayed = s->total_height - 1;
-        }
-    }
-    console_refresh(s);
-}
-
-static void console_put_lf(QemuConsole *s)
-{
-    TextCell *c;
-    int x, y1;
-
-    s->y++;
-    if (s->y >= s->height) {
-        s->y = s->height - 1;
-
-        if (s->y_displayed == s->y_base) {
-            if (++s->y_displayed == s->total_height)
-                s->y_displayed = 0;
-        }
-        if (++s->y_base == s->total_height)
-            s->y_base = 0;
-        if (s->backscroll_height < s->total_height)
-            s->backscroll_height++;
-        y1 = (s->y_base + s->height - 1) % s->total_height;
-        c = &s->cells[y1 * s->width];
-        for(x = 0; x < s->width; x++) {
-            c->ch = ' ';
-            c->t_attrib = s->t_attrib_default;
-            c++;
-        }
-        if (s->y_displayed == s->y_base) {
-            if (s->ds->have_text) {
-                s->text_x[0] = 0;
-                s->text_y[0] = 0;
-                s->text_x[1] = s->width - 1;
-                s->text_y[1] = s->height - 1;
-            }
-
-            vga_bitblt(s, 0, FONT_HEIGHT, 0, 0,
-                       s->width * FONT_WIDTH,
-                       (s->height - 1) * FONT_HEIGHT);
-            vga_fill_rect(s, 0, (s->height - 1) * FONT_HEIGHT,
-                          s->width * FONT_WIDTH, FONT_HEIGHT,
-                          color_table_rgb[0][s->t_attrib_default.bgcol]);
-            s->update_x0 = 0;
-            s->update_y0 = 0;
-            s->update_x1 = s->width * FONT_WIDTH;
-            s->update_y1 = s->height * FONT_HEIGHT;
-        }
-    }
-}
-
-/* Set console attributes depending on the current escape codes.
- * NOTE: I know this code is not very efficient (checking every color for it
- * self) but it is more readable and better maintainable.
- */
-static void console_handle_escape(QemuConsole *s)
-{
-    int i;
-
-    for (i=0; i<s->nb_esc_params; i++) {
-        switch (s->esc_params[i]) {
-            case 0: /* reset all console attributes to default */
-                s->t_attrib = s->t_attrib_default;
-                break;
-            case 1:
-                s->t_attrib.bold = 1;
-                break;
-            case 4:
-                s->t_attrib.uline = 1;
-                break;
-            case 5:
-                s->t_attrib.blink = 1;
-                break;
-            case 7:
-                s->t_attrib.invers = 1;
-                break;
-            case 8:
-                s->t_attrib.unvisible = 1;
-                break;
-            case 22:
-                s->t_attrib.bold = 0;
-                break;
-            case 24:
-                s->t_attrib.uline = 0;
-                break;
-            case 25:
-                s->t_attrib.blink = 0;
-                break;
-            case 27:
-                s->t_attrib.invers = 0;
-                break;
-            case 28:
-                s->t_attrib.unvisible = 0;
-                break;
-            /* set foreground color */
-            case 30:
-                s->t_attrib.fgcol = QEMU_COLOR_BLACK;
-                break;
-            case 31:
-                s->t_attrib.fgcol = QEMU_COLOR_RED;
-                break;
-            case 32:
-                s->t_attrib.fgcol = QEMU_COLOR_GREEN;
-                break;
-            case 33:
-                s->t_attrib.fgcol = QEMU_COLOR_YELLOW;
-                break;
-            case 34:
-                s->t_attrib.fgcol = QEMU_COLOR_BLUE;
-                break;
-            case 35:
-                s->t_attrib.fgcol = QEMU_COLOR_MAGENTA;
-                break;
-            case 36:
-                s->t_attrib.fgcol = QEMU_COLOR_CYAN;
-                break;
-            case 37:
-                s->t_attrib.fgcol = QEMU_COLOR_WHITE;
-                break;
-            /* set background color */
-            case 40:
-                s->t_attrib.bgcol = QEMU_COLOR_BLACK;
-                break;
-            case 41:
-                s->t_attrib.bgcol = QEMU_COLOR_RED;
-                break;
-            case 42:
-                s->t_attrib.bgcol = QEMU_COLOR_GREEN;
-                break;
-            case 43:
-                s->t_attrib.bgcol = QEMU_COLOR_YELLOW;
-                break;
-            case 44:
-                s->t_attrib.bgcol = QEMU_COLOR_BLUE;
-                break;
-            case 45:
-                s->t_attrib.bgcol = QEMU_COLOR_MAGENTA;
-                break;
-            case 46:
-                s->t_attrib.bgcol = QEMU_COLOR_CYAN;
-                break;
-            case 47:
-                s->t_attrib.bgcol = QEMU_COLOR_WHITE;
-                break;
-        }
-    }
-}
-
-static void console_clear_xy(QemuConsole *s, int x, int y)
-{
-    int y1 = (s->y_base + y) % s->total_height;
-    if (x >= s->width) {
-        x = s->width - 1;
-    }
-    TextCell *c = &s->cells[y1 * s->width + x];
-    c->ch = ' ';
-    c->t_attrib = s->t_attrib_default;
-    update_xy(s, x, y);
-}
-
-static void console_put_one(QemuConsole *s, int ch)
-{
-    TextCell *c;
-    int y1;
-    if (s->x >= s->width) {
-        /* line wrap */
-        s->x = 0;
-        console_put_lf(s);
-    }
-    y1 = (s->y_base + s->y) % s->total_height;
-    c = &s->cells[y1 * s->width + s->x];
-    c->ch = ch;
-    c->t_attrib = s->t_attrib;
-    update_xy(s, s->x, s->y);
-    s->x++;
-}
-
-static void console_respond_str(QemuConsole *s, const char *buf)
-{
-    while (*buf) {
-        console_put_one(s, *buf);
-        buf++;
-    }
-}
-
-/* set cursor, checking bounds */
-static void set_cursor(QemuConsole *s, int x, int y)
-{
-    if (x < 0) {
-        x = 0;
-    }
-    if (y < 0) {
-        y = 0;
-    }
-    if (y >= s->height) {
-        y = s->height - 1;
-    }
-    if (x >= s->width) {
-        x = s->width - 1;
-    }
-
-    s->x = x;
-    s->y = y;
-}
-
-static void console_putchar(QemuConsole *s, int ch)
-{
-    int i;
-    int x, y;
-    char response[40];
-
-    switch(s->state) {
-    case TTY_STATE_NORM:
-        switch(ch) {
-        case '\r':  /* carriage return */
-            s->x = 0;
-            break;
-        case '\n':  /* newline */
-            console_put_lf(s);
-            break;
-        case '\b':  /* backspace */
-            if (s->x > 0)
-                s->x--;
-            break;
-        case '\t':  /* tabspace */
-            if (s->x + (8 - (s->x % 8)) > s->width) {
-                s->x = 0;
-                console_put_lf(s);
-            } else {
-                s->x = s->x + (8 - (s->x % 8));
-            }
-            break;
-        case '\a':  /* alert aka. bell */
-            /* TODO: has to be implemented */
-            break;
-        case 14:
-            /* SI (shift in), character set 0 (ignored) */
-            break;
-        case 15:
-            /* SO (shift out), character set 1 (ignored) */
-            break;
-        case 27:    /* esc (introducing an escape sequence) */
-            s->state = TTY_STATE_ESC;
-            break;
-        default:
-            console_put_one(s, ch);
-            break;
-        }
-        break;
-    case TTY_STATE_ESC: /* check if it is a terminal escape sequence */
-        if (ch == '[') {
-            for(i=0;i<MAX_ESC_PARAMS;i++)
-                s->esc_params[i] = 0;
-            s->nb_esc_params = 0;
-            s->state = TTY_STATE_CSI;
-        } else {
-            s->state = TTY_STATE_NORM;
-        }
-        break;
-    case TTY_STATE_CSI: /* handle escape sequence parameters */
-        if (ch >= '0' && ch <= '9') {
-            if (s->nb_esc_params < MAX_ESC_PARAMS) {
-                int *param = &s->esc_params[s->nb_esc_params];
-                int digit = (ch - '0');
-
-                *param = (*param <= (INT_MAX - digit) / 10) ?
-                         *param * 10 + digit : INT_MAX;
-            }
-        } else {
-            if (s->nb_esc_params < MAX_ESC_PARAMS)
-                s->nb_esc_params++;
-            if (ch == ';' || ch == '?') {
-                break;
-            }
-            trace_console_putchar_csi(s->esc_params[0], s->esc_params[1],
-                                      ch, s->nb_esc_params);
-            s->state = TTY_STATE_NORM;
-            switch(ch) {
-            case 'A':
-                /* move cursor up */
-                if (s->esc_params[0] == 0) {
-                    s->esc_params[0] = 1;
-                }
-                set_cursor(s, s->x, s->y - s->esc_params[0]);
-                break;
-            case 'B':
-                /* move cursor down */
-                if (s->esc_params[0] == 0) {
-                    s->esc_params[0] = 1;
-                }
-                set_cursor(s, s->x, s->y + s->esc_params[0]);
-                break;
-            case 'C':
-                /* move cursor right */
-                if (s->esc_params[0] == 0) {
-                    s->esc_params[0] = 1;
-                }
-                set_cursor(s, s->x + s->esc_params[0], s->y);
-                break;
-            case 'D':
-                /* move cursor left */
-                if (s->esc_params[0] == 0) {
-                    s->esc_params[0] = 1;
-                }
-                set_cursor(s, s->x - s->esc_params[0], s->y);
-                break;
-            case 'G':
-                /* move cursor to column */
-                set_cursor(s, s->esc_params[0] - 1, s->y);
-                break;
-            case 'f':
-            case 'H':
-                /* move cursor to row, column */
-                set_cursor(s, s->esc_params[1] - 1, s->esc_params[0] - 1);
-                break;
-            case 'J':
-                switch (s->esc_params[0]) {
-                case 0:
-                    /* clear to end of screen */
-                    for (y = s->y; y < s->height; y++) {
-                        for (x = 0; x < s->width; x++) {
-                            if (y == s->y && x < s->x) {
-                                continue;
-                            }
-                            console_clear_xy(s, x, y);
-                        }
-                    }
-                    break;
-                case 1:
-                    /* clear from beginning of screen */
-                    for (y = 0; y <= s->y; y++) {
-                        for (x = 0; x < s->width; x++) {
-                            if (y == s->y && x > s->x) {
-                                break;
-                            }
-                            console_clear_xy(s, x, y);
-                        }
-                    }
-                    break;
-                case 2:
-                    /* clear entire screen */
-                    for (y = 0; y <= s->height; y++) {
-                        for (x = 0; x < s->width; x++) {
-                            console_clear_xy(s, x, y);
-                        }
-                    }
-                    break;
-                }
-                break;
-            case 'K':
-                switch (s->esc_params[0]) {
-                case 0:
-                    /* clear to eol */
-                    for(x = s->x; x < s->width; x++) {
-                        console_clear_xy(s, x, s->y);
-                    }
-                    break;
-                case 1:
-                    /* clear from beginning of line */
-                    for (x = 0; x <= s->x && x < s->width; x++) {
-                        console_clear_xy(s, x, s->y);
-                    }
-                    break;
-                case 2:
-                    /* clear entire line */
-                    for(x = 0; x < s->width; x++) {
-                        console_clear_xy(s, x, s->y);
-                    }
-                    break;
-                }
-                break;
-            case 'm':
-                console_handle_escape(s);
-                break;
-            case 'n':
-                switch (s->esc_params[0]) {
-                case 5:
-                    /* report console status (always succeed)*/
-                    console_respond_str(s, "\033[0n");
-                    break;
-                case 6:
-                    /* report cursor position */
-                    sprintf(response, "\033[%d;%dR",
-                           (s->y_base + s->y) % s->total_height + 1,
-                            s->x + 1);
-                    console_respond_str(s, response);
-                    break;
-                }
-                break;
-            case 's':
-                /* save cursor position */
-                s->x_saved = s->x;
-                s->y_saved = s->y;
-                break;
-            case 'u':
-                /* restore cursor position */
-                s->x = s->x_saved;
-                s->y = s->y_saved;
-                break;
-            default:
-                trace_console_putchar_unhandled(ch);
-                break;
-            }
-            break;
-        }
-    }
-}
-
 static void displaychangelistener_gfx_switch(DisplayChangeListener *dcl,
                                              struct DisplaySurface *new_surface,
                                              bool update)
@@ -1239,127 +316,29 @@ void console_select(unsigned int index)
         DisplayState *ds = s->ds;
 
         active_console = s;
-        if (ds->have_gfx) {
-            QLIST_FOREACH(dcl, &ds->listeners, next) {
-                if (dcl->con != NULL) {
-                    continue;
-                }
-                displaychangelistener_display_console(dcl, s, NULL);
+        QLIST_FOREACH (dcl, &ds->listeners, next) {
+            if (dcl->con != NULL) {
+                continue;
             }
+            displaychangelistener_display_console(dcl, s, NULL);
         }
-        if (ds->have_text) {
-            dpy_text_resize(s, s->width, s->height);
+
+        if (QEMU_IS_TEXT_CONSOLE(s)) {
+            qemu_text_console_select(QEMU_TEXT_CONSOLE(s));
         }
-        text_console_update_cursor(NULL);
     }
 }
 
-struct VCChardev {
-    Chardev parent;
-    QemuConsole *console;
-};
-typedef struct VCChardev VCChardev;
-
-#define TYPE_CHARDEV_VC "chardev-vc"
-DECLARE_INSTANCE_CHECKER(VCChardev, VC_CHARDEV,
-                         TYPE_CHARDEV_VC)
-
-static int vc_chr_write(Chardev *chr, const uint8_t *buf, int len)
+void qemu_text_console_put_keysym(QemuTextConsole *s, int keysym)
 {
-    VCChardev *drv = VC_CHARDEV(chr);
-    QemuConsole *s = drv->console;
-    int i;
-
-    if (!s->ds) {
-        return 0;
-    }
-
-    s->update_x0 = s->width * FONT_WIDTH;
-    s->update_y0 = s->height * FONT_HEIGHT;
-    s->update_x1 = 0;
-    s->update_y1 = 0;
-    console_show_cursor(s, 0);
-    for(i = 0; i < len; i++) {
-        console_putchar(s, buf[i]);
-    }
-    console_show_cursor(s, 1);
-    if (s->ds->have_gfx && s->update_x0 < s->update_x1) {
-        dpy_gfx_update(s, s->update_x0, s->update_y0,
-                       s->update_x1 - s->update_x0,
-                       s->update_y1 - s->update_y0);
-    }
-    return len;
-}
-
-static void kbd_send_chars(QemuConsole *s)
-{
-    uint32_t len, avail;
-
-    len = qemu_chr_be_can_write(s->chr);
-    avail = fifo8_num_used(&s->out_fifo);
-    while (len > 0 && avail > 0) {
-        const uint8_t *buf;
-        uint32_t size;
-
-        buf = fifo8_pop_buf(&s->out_fifo, MIN(len, avail), &size);
-        qemu_chr_be_write(s->chr, buf, size);
-        len = qemu_chr_be_can_write(s->chr);
-        avail -= size;
-    }
-}
-
-/* called when an ascii key is pressed */
-void kbd_put_keysym_console(QemuConsole *s, int keysym)
-{
-    uint8_t buf[16], *q;
-    int c;
-    uint32_t num_free;
-
-    if (!s || (s->console_type == GRAPHIC_CONSOLE))
-        return;
-
-    switch(keysym) {
-    case QEMU_KEY_CTRL_UP:
-        console_scroll(s, -1);
-        break;
-    case QEMU_KEY_CTRL_DOWN:
-        console_scroll(s, 1);
-        break;
-    case QEMU_KEY_CTRL_PAGEUP:
-        console_scroll(s, -10);
-        break;
-    case QEMU_KEY_CTRL_PAGEDOWN:
-        console_scroll(s, 10);
-        break;
-    default:
-        /* convert the QEMU keysym to VT100 key string */
-        q = buf;
-        if (keysym >= 0xe100 && keysym <= 0xe11f) {
-            *q++ = '\033';
-            *q++ = '[';
-            c = keysym - 0xe100;
-            if (c >= 10)
-                *q++ = '0' + (c / 10);
-            *q++ = '0' + (c % 10);
-            *q++ = '~';
-        } else if (keysym >= 0xe120 && keysym <= 0xe17f) {
-            *q++ = '\033';
-            *q++ = '[';
-            *q++ = keysym & 0xff;
-        } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
-            vc_chr_write(s->chr, (const uint8_t *) "\r", 1);
-            *q++ = '\n';
-        } else {
-            *q++ = keysym;
+    if (!s) {
+        if (!QEMU_IS_TEXT_CONSOLE(active_console)) {
+            return;
         }
-        if (s->echo) {
-            vc_chr_write(s->chr, buf, q - buf);
-        }
-        num_free = fifo8_num_free(&s->out_fifo);
-        fifo8_push_all(&s->out_fifo, buf, MIN(num_free, q - buf));
-        kbd_send_chars(s);
-        break;
+        s = QEMU_TEXT_CONSOLE(active_console);
     }
+
+    qemu_text_console_handle_keysym(s, keysym);
 }
 
 static const int qcode_to_keysym[Q_KEY_CODE__MAX] = {
@@ -1387,7 +366,7 @@ static const int ctrl_qcode_to_keysym[Q_KEY_CODE__MAX] = {
     [Q_KEY_CODE_PGDN]   = QEMU_KEY_CTRL_PAGEDOWN,
 };
 
-bool kbd_put_qcode_console(QemuConsole *s, int qcode, bool ctrl)
+bool qemu_text_console_put_qcode(QemuTextConsole *s, int qcode, bool ctrl)
 {
     int keysym;
 
@@ -1395,96 +374,36 @@ bool kbd_put_qcode_console(QemuConsole *s, int qcode, bool ctrl)
     if (keysym == 0) {
         return false;
     }
-    kbd_put_keysym_console(s, keysym);
+    qemu_text_console_put_keysym(s, keysym);
     return true;
 }
 
-void kbd_put_string_console(QemuConsole *s, const char *str, int len)
+void qemu_text_console_put_string(QemuTextConsole *s, const char *str, int len)
 {
     int i;
 
     for (i = 0; i < len && str[i]; i++) {
-        kbd_put_keysym_console(s, str[i]);
+        qemu_text_console_put_keysym(s, str[i]);
     }
 }
 
-void kbd_put_keysym(int keysym)
+static void
+qemu_console_register(QemuConsole *c)
 {
-    kbd_put_keysym_console(active_console, keysym);
-}
-
-static void text_console_invalidate(void *opaque)
-{
-    QemuConsole *s = (QemuConsole *) opaque;
-
-    if (s->ds->have_text && s->console_type == TEXT_CONSOLE) {
-        text_console_resize(s);
-    }
-    console_refresh(s);
-}
-
-static void text_console_update(void *opaque, console_ch_t *chardata)
-{
-    QemuConsole *s = (QemuConsole *) opaque;
-    int i, j, src;
-
-    if (s->text_x[0] <= s->text_x[1]) {
-        src = (s->y_base + s->text_y[0]) * s->width;
-        chardata += s->text_y[0] * s->width;
-        for (i = s->text_y[0]; i <= s->text_y[1]; i ++)
-            for (j = 0; j < s->width; j++, src++) {
-                console_write_ch(chardata ++,
-                                 ATTR2CHTYPE(s->cells[src].ch,
-                                             s->cells[src].t_attrib.fgcol,
-                                             s->cells[src].t_attrib.bgcol,
-                                             s->cells[src].t_attrib.bold));
-            }
-        dpy_text_update(s, s->text_x[0], s->text_y[0],
-                        s->text_x[1] - s->text_x[0], i - s->text_y[0]);
-        s->text_x[0] = s->width;
-        s->text_y[0] = s->height;
-        s->text_x[1] = 0;
-        s->text_y[1] = 0;
-    }
-    if (s->cursor_invalidate) {
-        dpy_text_cursor(s, s->x, s->y);
-        s->cursor_invalidate = 0;
-    }
-}
-
-static QemuConsole *new_console(DisplayState *ds, console_type_t console_type,
-                                uint32_t head)
-{
-    Object *obj;
-    QemuConsole *s;
     int i;
 
-    obj = object_new(TYPE_QEMU_CONSOLE);
-    s = QEMU_CONSOLE(obj);
-    qemu_co_queue_init(&s->dump_queue);
-    s->head = head;
-    object_property_add_link(obj, "device", TYPE_DEVICE,
-                             (Object **)&s->device,
-                             object_property_allow_set_link,
-                             OBJ_PROP_LINK_STRONG);
-    object_property_add_uint32_ptr(obj, "head", &s->head,
-                                   OBJ_PROP_FLAG_READ);
-
-    if (!active_console || ((active_console->console_type != GRAPHIC_CONSOLE) &&
-        (console_type == GRAPHIC_CONSOLE))) {
-        active_console = s;
+    if (!active_console || (!QEMU_IS_GRAPHIC_CONSOLE(active_console) &&
+                            QEMU_IS_GRAPHIC_CONSOLE(c))) {
+        active_console = c;
     }
-    s->ds = ds;
-    s->console_type = console_type;
-    s->window_id = -1;
 
     if (QTAILQ_EMPTY(&consoles)) {
-        s->index = 0;
-        QTAILQ_INSERT_TAIL(&consoles, s, next);
-    } else if (console_type != GRAPHIC_CONSOLE || phase_check(PHASE_MACHINE_READY)) {
+        c->index = 0;
+        QTAILQ_INSERT_TAIL(&consoles, c, next);
+    } else if (!QEMU_IS_GRAPHIC_CONSOLE(c) || phase_check(PHASE_MACHINE_READY)) {
         QemuConsole *last = QTAILQ_LAST(&consoles);
-        s->index = last->index + 1;
-        QTAILQ_INSERT_TAIL(&consoles, s, next);
+        c->index = last->index + 1;
+        QTAILQ_INSERT_TAIL(&consoles, c, next);
     } else {
         /*
          * HACK: Put graphical consoles before text consoles.
@@ -1492,26 +411,88 @@ static QemuConsole *new_console(DisplayState *ds, console_type_t console_type,
          * Only do that for coldplugged devices.  After initial device
          * initialization we will not renumber the consoles any more.
          */
-        QemuConsole *c = QTAILQ_FIRST(&consoles);
+        QemuConsole *it = QTAILQ_FIRST(&consoles);
 
-        while (QTAILQ_NEXT(c, next) != NULL &&
-               c->console_type == GRAPHIC_CONSOLE) {
-            c = QTAILQ_NEXT(c, next);
+        while (QTAILQ_NEXT(it, next) != NULL && QEMU_IS_GRAPHIC_CONSOLE(it)) {
+            it = QTAILQ_NEXT(it, next);
         }
-        if (c->console_type == GRAPHIC_CONSOLE) {
+        if (QEMU_IS_GRAPHIC_CONSOLE(it)) {
             /* have no text consoles */
-            s->index = c->index + 1;
-            QTAILQ_INSERT_AFTER(&consoles, c, s, next);
+            c->index = it->index + 1;
+            QTAILQ_INSERT_AFTER(&consoles, it, c, next);
         } else {
-            s->index = c->index;
-            QTAILQ_INSERT_BEFORE(c, s, next);
+            c->index = it->index;
+            QTAILQ_INSERT_BEFORE(it, c, next);
             /* renumber text consoles */
-            for (i = s->index + 1; c != NULL; c = QTAILQ_NEXT(c, next), i++) {
-                c->index = i;
+            for (i = c->index + 1; it != NULL; it = QTAILQ_NEXT(it, next), i++) {
+                it->index = i;
             }
         }
     }
-    return s;
+}
+
+static void
+qemu_console_finalize(Object *obj)
+{
+    QemuConsole *c = QEMU_CONSOLE(obj);
+
+    /* TODO: check this code path, and unregister from consoles */
+    g_clear_pointer(&c->surface, qemu_free_displaysurface);
+    g_clear_pointer(&c->gl_unblock_timer, timer_free);
+    g_clear_pointer(&c->ui_timer, timer_free);
+}
+
+static void
+qemu_console_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void
+qemu_console_init(Object *obj)
+{
+    QemuConsole *c = QEMU_CONSOLE(obj);
+    DisplayState *ds = get_alloc_displaystate();
+
+    qemu_co_queue_init(&c->dump_queue);
+    c->ds = ds;
+    c->window_id = -1;
+    c->ui_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+                               dpy_set_ui_info_timer, c);
+    qemu_console_register(c);
+}
+
+static void
+qemu_graphic_console_finalize(Object *obj)
+{
+    QemuGraphicConsole *c = QEMU_GRAPHIC_CONSOLE(obj);
+
+    g_clear_pointer(&c->device, object_unref);
+}
+
+static void
+qemu_graphic_console_prop_get_head(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    QemuGraphicConsole *c = QEMU_GRAPHIC_CONSOLE(obj);
+
+    visit_type_uint32(v, name, &c->head, errp);
+}
+
+static void
+qemu_graphic_console_class_init(ObjectClass *oc, void *data)
+{
+    object_class_property_add_link(oc, "device", TYPE_DEVICE,
+                                   offsetof(QemuGraphicConsole, device),
+                                   object_property_allow_set_link,
+                                   OBJ_PROP_LINK_STRONG);
+    object_class_property_add(oc, "head", "uint32",
+                              qemu_graphic_console_prop_get_head,
+                              NULL, NULL, NULL);
+}
+
+static void
+qemu_graphic_console_init(Object *obj)
+{
 }
 
 #ifdef WIN32
@@ -1577,8 +558,7 @@ DisplaySurface *qemu_create_displaysurface_from(int width, int height,
     DisplaySurface *surface = g_new0(DisplaySurface, 1);
 
     trace_displaysurface_create_from(surface, width, height, format);
-    surface->format = format;
-    surface->image = pixman_image_create_bits(surface->format,
+    surface->image = pixman_image_create_bits(format,
                                               width, height,
                                               (void *)data, linesize);
     assert(surface->image != NULL);
@@ -1595,7 +575,6 @@ DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image)
     DisplaySurface *surface = g_new0(DisplaySurface, 1);
 
     trace_displaysurface_create_pixman(surface);
-    surface->format = pixman_image_get_format(image);
     surface->image = pixman_image_ref(image);
 
     return surface;
@@ -1605,8 +584,8 @@ DisplaySurface *qemu_create_placeholder_surface(int w, int h,
                                                 const char *msg)
 {
     DisplaySurface *surface = qemu_create_displaysurface(w, h);
-    pixman_color_t bg = color_table_rgb[0][QEMU_COLOR_BLACK];
-    pixman_color_t fg = color_table_rgb[0][QEMU_COLOR_WHITE];
+    pixman_color_t bg = QEMU_PIXMAN_COLOR_BLACK;
+    pixman_color_t fg = QEMU_PIXMAN_COLOR_GRAY;
     pixman_image_t *glyph;
     int len, x, y, i;
 
@@ -1757,6 +736,17 @@ void qemu_console_set_display_gl_ctx(QemuConsole *con, DisplayGLCtx *gl)
     con->gl = gl;
 }
 
+static void
+dcl_set_graphic_cursor(DisplayChangeListener *dcl, QemuGraphicConsole *con)
+{
+    if (con && con->cursor && dcl->ops->dpy_cursor_define) {
+        dcl->ops->dpy_cursor_define(dcl, con->cursor);
+    }
+    if (con && dcl->ops->dpy_mouse_set) {
+        dcl->ops->dpy_mouse_set(dcl, con->cursor_x, con->cursor_y, con->cursor_on);
+    }
+}
+
 void register_displaychangelistener(DisplayChangeListener *dcl)
 {
     QemuConsole *con;
@@ -1774,13 +764,10 @@ void register_displaychangelistener(DisplayChangeListener *dcl)
         con = active_console;
     }
     displaychangelistener_display_console(dcl, con, dcl->con ? &error_fatal : NULL);
-    if (con && con->cursor && dcl->ops->dpy_cursor_define) {
-        dcl->ops->dpy_cursor_define(dcl, con->cursor);
+    if (QEMU_IS_GRAPHIC_CONSOLE(con)) {
+        dcl_set_graphic_cursor(dcl, QEMU_GRAPHIC_CONSOLE(con));
     }
-    if (con && dcl->ops->dpy_mouse_set) {
-        dcl->ops->dpy_mouse_set(dcl, con->cursor_x, con->cursor_y, con->cursor_on);
-    }
-    text_console_update_cursor(NULL);
+    qemu_text_console_update_cursor();
 }
 
 void update_displaychangelistener(DisplayChangeListener *dcl,
@@ -1809,21 +796,27 @@ void unregister_displaychangelistener(DisplayChangeListener *dcl)
 static void dpy_set_ui_info_timer(void *opaque)
 {
     QemuConsole *con = opaque;
+    uint32_t head = qemu_console_get_head(con);
 
-    con->hw_ops->ui_info(con->hw, con->head, &con->ui_info);
+    con->hw_ops->ui_info(con->hw, head, &con->ui_info);
 }
 
-bool dpy_ui_info_supported(QemuConsole *con)
+bool dpy_ui_info_supported(const QemuConsole *con)
 {
     if (con == NULL) {
         con = active_console;
     }
+    if (con == NULL) {
+        return false;
+    }
 
     return con->hw_ops->ui_info != NULL;
 }
 
 const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con)
 {
+    assert(dpy_ui_info_supported(con));
+
     if (con == NULL) {
         con = active_console;
     }
@@ -2020,19 +1013,20 @@ void dpy_text_resize(QemuConsole *con, int w, int h)
     }
 }
 
-void dpy_mouse_set(QemuConsole *con, int x, int y, int on)
+void dpy_mouse_set(QemuConsole *c, int x, int y, int on)
 {
-    DisplayState *s = con->ds;
+    QemuGraphicConsole *con = QEMU_GRAPHIC_CONSOLE(c);
+    DisplayState *s = c->ds;
     DisplayChangeListener *dcl;
 
     con->cursor_x = x;
     con->cursor_y = y;
     con->cursor_on = on;
-    if (!qemu_console_is_visible(con)) {
+    if (!qemu_console_is_visible(c)) {
         return;
     }
     QLIST_FOREACH(dcl, &s->listeners, next) {
-        if (con != (dcl->con ? dcl->con : active_console)) {
+        if (c != (dcl->con ? dcl->con : active_console)) {
             continue;
         }
         if (dcl->ops->dpy_mouse_set) {
@@ -2041,18 +1035,19 @@ void dpy_mouse_set(QemuConsole *con, int x, int y, int on)
     }
 }
 
-void dpy_cursor_define(QemuConsole *con, QEMUCursor *cursor)
+void dpy_cursor_define(QemuConsole *c, QEMUCursor *cursor)
 {
-    DisplayState *s = con->ds;
+    QemuGraphicConsole *con = QEMU_GRAPHIC_CONSOLE(c);
+    DisplayState *s = c->ds;
     DisplayChangeListener *dcl;
 
     cursor_unref(con->cursor);
     con->cursor = cursor_ref(cursor);
-    if (!qemu_console_is_visible(con)) {
+    if (!qemu_console_is_visible(c)) {
         return;
     }
     QLIST_FOREACH(dcl, &s->listeners, next) {
-        if (con != (dcl->con ? dcl->con : active_console)) {
+        if (c != (dcl->con ? dcl->con : active_console)) {
             continue;
         }
         if (dcl->ops->dpy_cursor_define) {
@@ -2237,8 +1232,6 @@ static DisplayState *get_alloc_displaystate(void)
 {
     if (!display_state) {
         display_state = g_new0(DisplayState, 1);
-        cursor_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
-                                    text_console_update_cursor, NULL);
     }
     return display_state;
 }
@@ -2252,14 +1245,8 @@ DisplayState *init_displaystate(void)
     gchar *name;
     QemuConsole *con;
 
-    get_alloc_displaystate();
     QTAILQ_FOREACH(con, &consoles, next) {
-        if (con->console_type != GRAPHIC_CONSOLE &&
-            con->ds == NULL) {
-            text_console_do_init(con->chr, display_state);
-        }
-
-        /* Hook up into the qom tree here (not in new_console()), once
+        /* Hook up into the qom tree here (not in object_new()), once
          * all QemuConsoles are created and the order / numbering
          * doesn't change any more */
         name = g_strdup_printf("console[%d]", con->index);
@@ -2288,21 +1275,18 @@ QemuConsole *graphic_console_init(DeviceState *dev, uint32_t head,
     int width = 640;
     int height = 480;
     QemuConsole *s;
-    DisplayState *ds;
     DisplaySurface *surface;
 
-    ds = get_alloc_displaystate();
-    s = qemu_console_lookup_unused();
+    s = qemu_graphic_console_lookup_unused();
     if (s) {
         trace_console_gfx_reuse(s->index);
         width = qemu_console_get_width(s, 0);
         height = qemu_console_get_height(s, 0);
     } else {
         trace_console_gfx_new();
-        s = new_console(ds, GRAPHIC_CONSOLE, head);
-        s->ui_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
-                                   dpy_set_ui_info_timer, s);
+        s = (QemuConsole *)object_new(TYPE_QEMU_GRAPHIC_CONSOLE);
     }
+    QEMU_GRAPHIC_CONSOLE(s)->head = head;
     graphic_console_set_hwops(s, hw_ops, opaque);
     if (dev) {
         object_property_set_link(OBJECT(s), "device", OBJECT(dev),
@@ -2396,13 +1380,13 @@ QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
     return con;
 }
 
-QemuConsole *qemu_console_lookup_unused(void)
+static QemuConsole *qemu_graphic_console_lookup_unused(void)
 {
     QemuConsole *con;
     Object *obj;
 
     QTAILQ_FOREACH(con, &consoles, next) {
-        if (con->hw_ops != &unused_ops) {
+        if (!QEMU_IS_GRAPHIC_CONSOLE(con) || con->hw_ops != &unused_ops) {
             continue;
         }
         obj = object_property_get_link(OBJECT(con),
@@ -2420,7 +1404,7 @@ QEMUCursor *qemu_console_get_cursor(QemuConsole *con)
     if (con == NULL) {
         con = active_console;
     }
-    return con ? con->cursor : NULL;
+    return QEMU_IS_GRAPHIC_CONSOLE(con) ? QEMU_GRAPHIC_CONSOLE(con)->cursor : NULL;
 }
 
 bool qemu_console_is_visible(QemuConsole *con)
@@ -2433,7 +1417,7 @@ bool qemu_console_is_graphic(QemuConsole *con)
     if (con == NULL) {
         con = active_console;
     }
-    return con && (con->console_type == GRAPHIC_CONSOLE);
+    return con && QEMU_IS_GRAPHIC_CONSOLE(con);
 }
 
 bool qemu_console_is_fixedsize(QemuConsole *con)
@@ -2441,7 +1425,7 @@ bool qemu_console_is_fixedsize(QemuConsole *con)
     if (con == NULL) {
         con = active_console;
     }
-    return con && (con->console_type != TEXT_CONSOLE);
+    return con && (QEMU_IS_GRAPHIC_CONSOLE(con) || QEMU_IS_FIXED_TEXT_CONSOLE(con));
 }
 
 bool qemu_console_is_gl_blocked(QemuConsole *con)
@@ -2477,31 +1461,34 @@ bool qemu_console_is_multihead(DeviceState *dev)
 
 char *qemu_console_get_label(QemuConsole *con)
 {
-    if (con->console_type == GRAPHIC_CONSOLE) {
-        if (con->device) {
+    if (QEMU_IS_GRAPHIC_CONSOLE(con)) {
+        QemuGraphicConsole *c = QEMU_GRAPHIC_CONSOLE(con);
+        if (c->device) {
             DeviceState *dev;
             bool multihead;
 
-            dev = DEVICE(con->device);
+            dev = DEVICE(c->device);
             multihead = qemu_console_is_multihead(dev);
             if (multihead) {
                 return g_strdup_printf("%s.%d", dev->id ?
                                        dev->id :
-                                       object_get_typename(con->device),
-                                       con->head);
+                                       object_get_typename(c->device),
+                                       c->head);
             } else {
                 return g_strdup_printf("%s", dev->id ?
                                        dev->id :
-                                       object_get_typename(con->device));
+                                       object_get_typename(c->device));
             }
         }
         return g_strdup("VGA");
-    } else {
-        if (con->chr && con->chr->label) {
-            return g_strdup(con->chr->label);
+    } else if (QEMU_IS_TEXT_CONSOLE(con)) {
+        const char *label = qemu_text_console_get_label(QEMU_TEXT_CONSOLE(con));
+        if (label) {
+            return g_strdup(label);
         }
-        return g_strdup_printf("vc%d", con->index);
     }
+
+    return g_strdup_printf("vc%d", con->index);
 }
 
 int qemu_console_get_index(QemuConsole *con)
@@ -2517,7 +1504,13 @@ uint32_t qemu_console_get_head(QemuConsole *con)
     if (con == NULL) {
         con = active_console;
     }
-    return con ? con->head : -1;
+    if (con == NULL) {
+        return -1;
+    }
+    if (QEMU_IS_GRAPHIC_CONSOLE(con)) {
+        return QEMU_GRAPHIC_CONSOLE(con)->head;
+    }
+    return 0;
 }
 
 int qemu_console_get_width(QemuConsole *con, int fallback)
@@ -2560,35 +1553,11 @@ int qemu_console_get_height(QemuConsole *con, int fallback)
     }
 }
 
-static void vc_chr_accept_input(Chardev *chr)
-{
-    VCChardev *drv = VC_CHARDEV(chr);
-    QemuConsole *s = drv->console;
-
-    kbd_send_chars(s);
-}
-
-static void vc_chr_set_echo(Chardev *chr, bool echo)
-{
-    VCChardev *drv = VC_CHARDEV(chr);
-    QemuConsole *s = drv->console;
-
-    s->echo = echo;
-}
-
-static void text_console_update_cursor_timer(void)
-{
-    timer_mod(cursor_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
-              + CONSOLE_CURSOR_PERIOD / 2);
-}
-
-static void text_console_update_cursor(void *opaque)
+int qemu_invalidate_text_consoles(void)
 {
     QemuConsole *s;
     int count = 0;
 
-    cursor_visible_phase = !cursor_visible_phase;
-
     QTAILQ_FOREACH(s, &consoles, next) {
         if (qemu_console_is_graphic(s) ||
             !qemu_console_is_visible(s)) {
@@ -2598,123 +1567,14 @@ static void text_console_update_cursor(void *opaque)
         graphic_hw_invalidate(s);
     }
 
-    if (count) {
-        text_console_update_cursor_timer();
-    }
-}
-
-static const GraphicHwOps text_console_ops = {
-    .invalidate  = text_console_invalidate,
-    .text_update = text_console_update,
-};
-
-static void text_console_do_init(Chardev *chr, DisplayState *ds)
-{
-    VCChardev *drv = VC_CHARDEV(chr);
-    QemuConsole *s = drv->console;
-    int g_width = 80 * FONT_WIDTH;
-    int g_height = 24 * FONT_HEIGHT;
-
-    fifo8_create(&s->out_fifo, 16);
-    s->ds = ds;
-
-    s->y_displayed = 0;
-    s->y_base = 0;
-    s->total_height = DEFAULT_BACKSCROLL;
-    s->x = 0;
-    s->y = 0;
-    if (s->scanout.kind != SCANOUT_SURFACE) {
-        if (active_console && active_console->scanout.kind == SCANOUT_SURFACE) {
-            g_width = qemu_console_get_width(active_console, g_width);
-            g_height = qemu_console_get_height(active_console, g_height);
-        }
-        s->surface = qemu_create_displaysurface(g_width, g_height);
-        s->scanout.kind = SCANOUT_SURFACE;
-    }
-
-    s->hw_ops = &text_console_ops;
-    s->hw = s;
-
-    /* Set text attribute defaults */
-    s->t_attrib_default.bold = 0;
-    s->t_attrib_default.uline = 0;
-    s->t_attrib_default.blink = 0;
-    s->t_attrib_default.invers = 0;
-    s->t_attrib_default.unvisible = 0;
-    s->t_attrib_default.fgcol = QEMU_COLOR_WHITE;
-    s->t_attrib_default.bgcol = QEMU_COLOR_BLACK;
-    /* set current text attributes to default */
-    s->t_attrib = s->t_attrib_default;
-    text_console_resize(s);
-
-    if (chr->label) {
-        char *msg;
-
-        s->t_attrib.bgcol = QEMU_COLOR_BLUE;
-        msg = g_strdup_printf("%s console\r\n", chr->label);
-        vc_chr_write(chr, (uint8_t *)msg, strlen(msg));
-        g_free(msg);
-        s->t_attrib = s->t_attrib_default;
-    }
-
-    qemu_chr_be_event(chr, CHR_EVENT_OPENED);
-}
-
-static void vc_chr_open(Chardev *chr,
-                        ChardevBackend *backend,
-                        bool *be_opened,
-                        Error **errp)
-{
-    ChardevVC *vc = backend->u.vc.data;
-    VCChardev *drv = VC_CHARDEV(chr);
-    QemuConsole *s;
-    unsigned width = 0;
-    unsigned height = 0;
-
-    if (vc->has_width) {
-        width = vc->width;
-    } else if (vc->has_cols) {
-        width = vc->cols * FONT_WIDTH;
-    }
-
-    if (vc->has_height) {
-        height = vc->height;
-    } else if (vc->has_rows) {
-        height = vc->rows * FONT_HEIGHT;
-    }
-
-    trace_console_txt_new(width, height);
-    if (width == 0 || height == 0) {
-        s = new_console(NULL, TEXT_CONSOLE, 0);
-    } else {
-        s = new_console(NULL, TEXT_CONSOLE_FIXED_SIZE, 0);
-        s->scanout.kind = SCANOUT_SURFACE;
-        s->surface = qemu_create_displaysurface(width, height);
-    }
-
-    if (!s) {
-        error_setg(errp, "cannot create text console");
-        return;
-    }
-
-    s->chr = chr;
-    drv->console = s;
-
-    if (display_state) {
-        text_console_do_init(chr, display_state);
-    }
-
-    /* console/chardev init sometimes completes elsewhere in a 2nd
-     * stage, so defer OPENED events until they are fully initialized
-     */
-    *be_opened = false;
+    return count;
 }
 
 void qemu_console_resize(QemuConsole *s, int width, int height)
 {
     DisplaySurface *surface = qemu_console_surface(s);
 
-    assert(s->console_type == GRAPHIC_CONSOLE);
+    assert(QEMU_IS_GRAPHIC_CONSOLE(s));
 
     if ((s->scanout.kind != SCANOUT_SURFACE ||
          (surface && surface->flags & QEMU_ALLOCATED_FLAG)) &&
@@ -2836,77 +1696,3 @@ void qemu_display_help(void)
         }
     }
 }
-
-void qemu_chr_parse_vc(QemuOpts *opts, ChardevBackend *backend, Error **errp)
-{
-    int val;
-    ChardevVC *vc;
-
-    backend->type = CHARDEV_BACKEND_KIND_VC;
-    vc = backend->u.vc.data = g_new0(ChardevVC, 1);
-    qemu_chr_parse_common(opts, qapi_ChardevVC_base(vc));
-
-    val = qemu_opt_get_number(opts, "width", 0);
-    if (val != 0) {
-        vc->has_width = true;
-        vc->width = val;
-    }
-
-    val = qemu_opt_get_number(opts, "height", 0);
-    if (val != 0) {
-        vc->has_height = true;
-        vc->height = val;
-    }
-
-    val = qemu_opt_get_number(opts, "cols", 0);
-    if (val != 0) {
-        vc->has_cols = true;
-        vc->cols = val;
-    }
-
-    val = qemu_opt_get_number(opts, "rows", 0);
-    if (val != 0) {
-        vc->has_rows = true;
-        vc->rows = val;
-    }
-}
-
-static const TypeInfo qemu_console_info = {
-    .name = TYPE_QEMU_CONSOLE,
-    .parent = TYPE_OBJECT,
-    .instance_size = sizeof(QemuConsole),
-    .class_size = sizeof(QemuConsoleClass),
-};
-
-static void char_vc_class_init(ObjectClass *oc, void *data)
-{
-    ChardevClass *cc = CHARDEV_CLASS(oc);
-
-    cc->parse = qemu_chr_parse_vc;
-    cc->open = vc_chr_open;
-    cc->chr_write = vc_chr_write;
-    cc->chr_accept_input = vc_chr_accept_input;
-    cc->chr_set_echo = vc_chr_set_echo;
-}
-
-static const TypeInfo char_vc_type_info = {
-    .name = TYPE_CHARDEV_VC,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(VCChardev),
-    .class_init = char_vc_class_init,
-};
-
-void qemu_console_early_init(void)
-{
-    /* set the default vc driver */
-    if (!object_class_by_name(TYPE_CHARDEV_VC)) {
-        type_register(&char_vc_type_info);
-    }
-}
-
-static void register_types(void)
-{
-    type_register_static(&qemu_console_info);
-}
-
-type_init(register_types);
diff --git a/ui/curses.c b/ui/curses.c
index de962faa7c..8bde8c5cf7 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -400,7 +400,7 @@ static void curses_refresh(DisplayChangeListener *dcl)
             if (keysym == -1)
                 keysym = chr;
 
-            kbd_put_keysym(keysym);
+            qemu_text_console_put_keysym(NULL, keysym);
         }
     }
 }
diff --git a/ui/dbus-console.c b/ui/dbus-console.c
index e19774f985..36f7349585 100644
--- a/ui/dbus-console.c
+++ b/ui/dbus-console.c
@@ -150,6 +150,8 @@ dbus_display_console_dispose(GObject *object)
     DBusDisplayConsole *ddc = DBUS_DISPLAY_CONSOLE(object);
 
     unregister_displaychangelistener(&ddc->dcl);
+    g_clear_object(&ddc->iface_touch);
+    g_clear_object(&ddc->iface_mouse);
     g_clear_object(&ddc->iface_kbd);
     g_clear_object(&ddc->iface);
     g_clear_pointer(&ddc->listeners, g_hash_table_unref);
diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c
index 30917271ab..36548a7f52 100644
--- a/ui/dbus-listener.c
+++ b/ui/dbus-listener.c
@@ -26,6 +26,9 @@
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "dbus.h"
+#ifdef CONFIG_OPENGL
+#include <pixman.h>
+#endif
 #ifdef G_OS_UNIX
 #include <gio/gunixfdlist.h>
 #endif
@@ -59,12 +62,15 @@ struct _DBusDisplayListener {
 
     QemuDBusDisplay1Listener *proxy;
 
+#ifdef CONFIG_OPENGL
+    /* Keep track of the damage region */
+    pixman_region32_t gl_damage;
+#endif
+
     DisplayChangeListener dcl;
     DisplaySurface *ds;
     enum share_kind ds_share;
 
-    int gl_updates;
-
     bool ds_mapped;
     bool can_share_map;
 
@@ -539,11 +545,16 @@ static void dbus_gl_refresh(DisplayChangeListener *dcl)
         return;
     }
 
-    if (ddl->gl_updates) {
-        dbus_call_update_gl(dcl, 0, 0,
-                            surface_width(ddl->ds), surface_height(ddl->ds));
-        ddl->gl_updates = 0;
+    int n_rects = pixman_region32_n_rects(&ddl->gl_damage);
+
+    for (int i = 0; i < n_rects; i++) {
+        pixman_box32_t *box;
+        box = pixman_region32_rectangles(&ddl->gl_damage, NULL) + i;
+        /* TODO: Add a UpdateList call to send multiple updates at once */
+        dbus_call_update_gl(dcl, box->x1, box->y1,
+                            box->x2 - box->x1, box->y2 - box->y1);
     }
+    pixman_region32_clear(&ddl->gl_damage);
 }
 #endif /* OPENGL */
 
@@ -558,7 +569,10 @@ static void dbus_gl_gfx_update(DisplayChangeListener *dcl,
 {
     DBusDisplayListener *ddl = container_of(dcl, DBusDisplayListener, dcl);
 
-    ddl->gl_updates++;
+    pixman_region32_t rect_region;
+    pixman_region32_init_rect(&rect_region, x, y, w, h);
+    pixman_region32_union(&ddl->gl_damage, &ddl->gl_damage, &rect_region);
+    pixman_region32_fini(&rect_region);
 }
 #endif
 
@@ -738,6 +752,7 @@ dbus_display_listener_dispose(GObject *object)
     g_clear_object(&ddl->d3d11_proxy);
     g_clear_pointer(&ddl->peer_process, CloseHandle);
 #ifdef CONFIG_OPENGL
+    pixman_region32_fini(&ddl->gl_damage);
     egl_fb_destroy(&ddl->fb);
 #endif
 #endif
@@ -772,6 +787,9 @@ dbus_display_listener_class_init(DBusDisplayListenerClass *klass)
 static void
 dbus_display_listener_init(DBusDisplayListener *ddl)
 {
+#ifdef CONFIG_OPENGL
+    pixman_region32_init(&ddl->gl_damage);
+#endif
 }
 
 const char *
diff --git a/ui/gtk.c b/ui/gtk.c
index 8ba41c8f13..e09f97a86b 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -514,7 +514,7 @@ static void gd_switch(DisplayChangeListener *dcl,
     }
     vc->gfx.ds = surface;
 
-    if (surface->format == PIXMAN_x8r8g8b8) {
+    if (surface_format(surface) == PIXMAN_x8r8g8b8) {
         /*
          * PIXMAN_x8r8g8b8 == CAIRO_FORMAT_RGB24
          *
@@ -1187,15 +1187,15 @@ static gboolean gd_text_key_down(GtkWidget *widget,
                                  GdkEventKey *key, void *opaque)
 {
     VirtualConsole *vc = opaque;
-    QemuConsole *con = vc->gfx.dcl.con;
+    QemuTextConsole *con = QEMU_TEXT_CONSOLE(vc->gfx.dcl.con);
 
     if (key->keyval == GDK_KEY_Delete) {
-        kbd_put_qcode_console(con, Q_KEY_CODE_DELETE, false);
+        qemu_text_console_put_qcode(con, Q_KEY_CODE_DELETE, false);
     } else if (key->length) {
-        kbd_put_string_console(con, key->string, key->length);
+        qemu_text_console_put_string(con, key->string, key->length);
     } else {
         int qcode = gd_map_keycode(gd_get_keycode(key));
-        kbd_put_qcode_console(con, qcode, false);
+        qemu_text_console_put_qcode(con, qcode, false);
     }
     return TRUE;
 }
@@ -1860,7 +1860,6 @@ static void char_gd_vc_class_init(ObjectClass *oc, void *data)
 {
     ChardevClass *cc = CHARDEV_CLASS(oc);
 
-    cc->parse = qemu_chr_parse_vc;
     cc->open = gd_vc_open;
     cc->chr_write = gd_vc_chr_write;
     cc->chr_accept_input = gd_vc_chr_accept_input;
@@ -2360,7 +2359,7 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
 {
     VirtualConsole *vc;
 
-    GtkDisplayState *s = g_malloc0(sizeof(*s));
+    GtkDisplayState *s;
     GdkDisplay *window_display;
     GtkIconTheme *theme;
     char *dir;
@@ -2370,6 +2369,7 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
         exit(1);
     }
     assert(opts->type == DISPLAY_TYPE_GTK);
+    s = g_malloc0(sizeof(*s));
     s->opts = opts;
 
     theme = gtk_icon_theme_get_default();
diff --git a/ui/keymaps.h b/ui/keymaps.h
index 6473405485..3d52c0882a 100644
--- a/ui/keymaps.h
+++ b/ui/keymaps.h
@@ -44,7 +44,7 @@ typedef struct {
 /* "up" flag */
 #define SCANCODE_UP     0x80
 
-/* Additional modifiers to use if not catched another way. */
+/* Additional modifiers to use if not caught another way. */
 #define SCANCODE_SHIFT  0x100
 #define SCANCODE_CTRL   0x200
 #define SCANCODE_ALT    0x400
diff --git a/ui/meson.build b/ui/meson.build
index d81609fb0e..0a1e8272a3 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -6,6 +6,7 @@ system_ss.add(png)
 system_ss.add(files(
   'clipboard.c',
   'console.c',
+  'console-vc.c',
   'cursor.c',
   'input-keymap.c',
   'input-legacy.c',
diff --git a/ui/qemu-pixman.c b/ui/qemu-pixman.c
index e4f024a85e..be00a96340 100644
--- a/ui/qemu-pixman.c
+++ b/ui/qemu-pixman.c
@@ -200,14 +200,6 @@ void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
                            x, y, 0, 0, 0, 0, width, 1);
 }
 
-/* copy linebuf to framebuffer */
-void qemu_pixman_linebuf_copy(pixman_image_t *fb, int width, int x, int y,
-                              pixman_image_t *linebuf)
-{
-    pixman_image_composite(PIXMAN_OP_SRC, linebuf, NULL, fb,
-                           0, 0, 0, 0, x, y, width, 1);
-}
-
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
                                           pixman_image_t *image)
 {
@@ -226,17 +218,6 @@ void qemu_pixman_image_unref(pixman_image_t *image)
     pixman_image_unref(image);
 }
 
-pixman_color_t qemu_pixman_color(PixelFormat *pf, uint32_t color)
-{
-    pixman_color_t c;
-
-    c.red   = ((color & pf->rmask) >> pf->rshift) << (16 - pf->rbits);
-    c.green = ((color & pf->gmask) >> pf->gshift) << (16 - pf->gbits);
-    c.blue  = ((color & pf->bmask) >> pf->bshift) << (16 - pf->bbits);
-    c.alpha = ((color & pf->amask) >> pf->ashift) << (16 - pf->abits);
-    return c;
-}
-
 pixman_image_t *qemu_pixman_glyph_from_vgafont(int height, const uint8_t *font,
                                                unsigned int ch)
 {
diff --git a/ui/sdl2-2d.c b/ui/sdl2-2d.c
index bfebbdeaea..06468cd493 100644
--- a/ui/sdl2-2d.c
+++ b/ui/sdl2-2d.c
@@ -150,7 +150,7 @@ bool sdl2_2d_check_format(DisplayChangeListener *dcl,
 {
     /*
      * We let SDL convert for us a few more formats than,
-     * the native ones. Thes are the ones I have tested.
+     * the native ones. These are the ones I have tested.
      */
     return (format == PIXMAN_x8r8g8b8 ||
             format == PIXMAN_a8r8g8b8 ||
diff --git a/ui/sdl2-input.c b/ui/sdl2-input.c
index f068382209..b02a89ee7c 100644
--- a/ui/sdl2-input.c
+++ b/ui/sdl2-input.c
@@ -43,15 +43,16 @@ void sdl2_process_key(struct sdl2_console *scon,
                            ev->type == SDL_KEYDOWN ? "down" : "up");
     qkbd_state_key_event(scon->kbd, qcode, ev->type == SDL_KEYDOWN);
 
-    if (!qemu_console_is_graphic(con)) {
+    if (QEMU_IS_TEXT_CONSOLE(con)) {
+        QemuTextConsole *s = QEMU_TEXT_CONSOLE(con);
         bool ctrl = qkbd_state_modifier_get(scon->kbd, QKBD_MOD_CTRL);
         if (ev->type == SDL_KEYDOWN) {
             switch (qcode) {
             case Q_KEY_CODE_RET:
-                kbd_put_keysym_console(con, '\n');
+                qemu_text_console_put_keysym(s, '\n');
                 break;
             default:
-                kbd_put_qcode_console(con, qcode, ctrl);
+                qemu_text_console_put_qcode(s, qcode, ctrl);
                 break;
             }
         }
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 0d91b555e3..178cc054ab 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -483,10 +483,9 @@ static void handle_textinput(SDL_Event *ev)
         return;
     }
 
-    if (qemu_console_is_graphic(con)) {
-        return;
+    if (QEMU_IS_TEXT_CONSOLE(con)) {
+        qemu_text_console_put_string(QEMU_TEXT_CONSOLE(con), ev->text.text, strlen(ev->text.text));
     }
-    kbd_put_string_console(con, ev->text.text, strlen(ev->text.text));
 }
 
 static void handle_mousemotion(SDL_Event *ev)
@@ -860,7 +859,7 @@ static void sdl2_display_init(DisplayState *ds, DisplayOptions *o)
     SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
 #endif
 #ifndef CONFIG_WIN32
-    /* QEMU uses its own low level keyboard hook procecure on Windows */
+    /* QEMU uses its own low level keyboard hook procedure on Windows */
     SDL_SetHint(SDL_HINT_GRAB_KEYBOARD, "1");
 #endif
 #ifdef SDL_HINT_ALLOW_ALT_TAB_WHILE_GRABBED
diff --git a/ui/spice-app.c b/ui/spice-app.c
index ad7f0551ad..405fb7f9f5 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -96,6 +96,11 @@ static void vc_chr_set_echo(Chardev *chr, bool echo)
     /* TODO: set echo for frontends QMP and qtest */
 }
 
+static void vc_chr_parse(QemuOpts *opts, ChardevBackend *backend, Error **errp)
+{
+    /* fqdn is dealt with in vc_chr_open() */
+}
+
 static void char_vc_class_init(ObjectClass *oc, void *data)
 {
     VCChardevClass *vc = CHARDEV_VC_CLASS(oc);
@@ -103,7 +108,7 @@ static void char_vc_class_init(ObjectClass *oc, void *data)
 
     vc->parent_open = cc->open;
 
-    cc->parse = qemu_chr_parse_vc;
+    cc->parse = vc_chr_parse;
     cc->open = vc_chr_open;
     cc->chr_set_echo = vc_chr_set_echo;
 }
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 3f3f8013d8..5cc47bd668 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -189,7 +189,7 @@ static void qemu_spice_create_update(SimpleSpiceDisplay *ssd)
 {
     static const int blksize = 32;
     int blocks = DIV_ROUND_UP(surface_width(ssd->ds), blksize);
-    int dirty_top[blocks];
+    g_autofree int *dirty_top = NULL;
     int y, yoff1, yoff2, x, xoff, blk, bw;
     int bpp = surface_bytes_per_pixel(ssd->ds);
     uint8_t *guest, *mirror;
@@ -198,6 +198,7 @@ static void qemu_spice_create_update(SimpleSpiceDisplay *ssd)
         return;
     };
 
+    dirty_top = g_new(int, blocks);
     for (blk = 0; blk < blocks; blk++) {
         dirty_top[blk] = -1;
     }
@@ -436,7 +437,7 @@ void qemu_spice_display_switch(SimpleSpiceDisplay *ssd,
     }
     if (ssd->ds) {
         ssd->surface = pixman_image_ref(ssd->ds->image);
-        ssd->mirror  = qemu_pixman_mirror_create(ssd->ds->format,
+        ssd->mirror  = qemu_pixman_mirror_create(surface_format(ssd->ds),
                                                  ssd->ds->image);
         qemu_spice_create_host_primary(ssd);
     }
diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c
index a37a7024f3..debc07d678 100644
--- a/ui/ui-qmp-cmds.c
+++ b/ui/ui-qmp-cmds.c
@@ -14,13 +14,20 @@
  */
 
 #include "qemu/osdep.h"
+
+#include "io/channel-file.h"
 #include "monitor/qmp-helpers.h"
 #include "qapi/qapi-commands-ui.h"
 #include "qapi/qmp/qerror.h"
+#include "qemu/coroutine.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 #include "ui/console.h"
 #include "ui/dbus-display.h"
 #include "ui/qemu-spice.h"
+#ifdef CONFIG_PNG
+#include <png.h>
+#endif
 
 void qmp_set_password(SetPasswordOptions *opts, Error **errp)
 {
@@ -204,3 +211,183 @@ void qmp_client_migrate_info(const char *protocol, const char *hostname,
 
     error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
 }
+
+#ifdef CONFIG_PNG
+/**
+ * png_save: Take a screenshot as PNG
+ *
+ * Saves screendump as a PNG file
+ *
+ * Returns true for success or false for error.
+ *
+ * @fd: File descriptor for PNG file.
+ * @image: Image data in pixman format.
+ * @errp: Pointer to an error.
+ */
+static bool png_save(int fd, pixman_image_t *image, Error **errp)
+{
+    int width = pixman_image_get_width(image);
+    int height = pixman_image_get_height(image);
+    png_struct *png_ptr;
+    png_info *info_ptr;
+    g_autoptr(pixman_image_t) linebuf =
+        qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
+    uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf);
+    FILE *f = fdopen(fd, "wb");
+    int y;
+    if (!f) {
+        error_setg_errno(errp, errno,
+                         "Failed to create file from file descriptor");
+        return false;
+    }
+
+    png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL,
+                                      NULL, NULL);
+    if (!png_ptr) {
+        error_setg(errp, "PNG creation failed. Unable to write struct");
+        fclose(f);
+        return false;
+    }
+
+    info_ptr = png_create_info_struct(png_ptr);
+
+    if (!info_ptr) {
+        error_setg(errp, "PNG creation failed. Unable to write info");
+        fclose(f);
+        png_destroy_write_struct(&png_ptr, &info_ptr);
+        return false;
+    }
+
+    png_init_io(png_ptr, f);
+
+    png_set_IHDR(png_ptr, info_ptr, width, height, 8,
+                 PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
+                 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+    png_write_info(png_ptr, info_ptr);
+
+    for (y = 0; y < height; ++y) {
+        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
+        png_write_row(png_ptr, buf);
+    }
+
+    png_write_end(png_ptr, NULL);
+
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+
+    if (fclose(f) != 0) {
+        error_setg_errno(errp, errno,
+                         "PNG creation failed. Unable to close file");
+        return false;
+    }
+
+    return true;
+}
+
+#else /* no png support */
+
+static bool png_save(int fd, pixman_image_t *image, Error **errp)
+{
+    error_setg(errp, "Enable PNG support with libpng for screendump");
+    return false;
+}
+
+#endif /* CONFIG_PNG */
+
+static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
+{
+    int width = pixman_image_get_width(image);
+    int height = pixman_image_get_height(image);
+    g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
+    g_autofree char *header = NULL;
+    g_autoptr(pixman_image_t) linebuf = NULL;
+    int y;
+
+    trace_ppm_save(fd, image);
+
+    header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
+    if (qio_channel_write_all(QIO_CHANNEL(ioc),
+                              header, strlen(header), errp) < 0) {
+        return false;
+    }
+
+    linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
+    for (y = 0; y < height; y++) {
+        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
+        if (qio_channel_write_all(QIO_CHANNEL(ioc),
+                                  (char *)pixman_image_get_data(linebuf),
+                                  pixman_image_get_stride(linebuf), errp) < 0) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/* Safety: coroutine-only, concurrent-coroutine safe, main thread only */
+void coroutine_fn
+qmp_screendump(const char *filename, const char *device,
+               bool has_head, int64_t head,
+               bool has_format, ImageFormat format, Error **errp)
+{
+    g_autoptr(pixman_image_t) image = NULL;
+    QemuConsole *con;
+    DisplaySurface *surface;
+    int fd;
+
+    if (device) {
+        con = qemu_console_lookup_by_device_name(device, has_head ? head : 0,
+                                                 errp);
+        if (!con) {
+            return;
+        }
+    } else {
+        if (has_head) {
+            error_setg(errp, "'head' must be specified together with 'device'");
+            return;
+        }
+        con = qemu_console_lookup_by_index(0);
+        if (!con) {
+            error_setg(errp, "There is no console to take a screendump from");
+            return;
+        }
+    }
+
+    qemu_console_co_wait_update(con);
+
+    /*
+     * All pending coroutines are woken up, while the BQL is held.  No
+     * further graphic update are possible until it is released.  Take
+     * an image ref before that.
+     */
+    surface = qemu_console_surface(con);
+    if (!surface) {
+        error_setg(errp, "no surface");
+        return;
+    }
+    image = pixman_image_ref(surface->image);
+
+    fd = qemu_open_old(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
+    if (fd == -1) {
+        error_setg(errp, "failed to open file '%s': %s", filename,
+                   strerror(errno));
+        return;
+    }
+
+    /*
+     * The image content could potentially be updated as the coroutine
+     * yields and releases the BQL. It could produce corrupted dump, but
+     * it should be otherwise safe.
+     */
+    if (has_format && format == IMAGE_FORMAT_PNG) {
+        /* PNG format specified for screendump */
+        if (!png_save(fd, image, errp)) {
+            qemu_unlink(filename);
+        }
+    } else {
+        /* PPM format specified/default for screendump */
+        if (!ppm_save(fd, image, errp)) {
+            qemu_unlink(filename);
+        }
+    }
+}
diff --git a/ui/vdagent.c b/ui/vdagent.c
index 8a651492f0..00d36a8677 100644
--- a/ui/vdagent.c
+++ b/ui/vdagent.c
@@ -870,8 +870,11 @@ static void vdagent_disconnect(VDAgentChardev *vd)
 
 static void vdagent_chr_set_fe_open(struct Chardev *chr, int fe_open)
 {
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(chr);
+
     if (!fe_open) {
         trace_vdagent_close();
+        vdagent_disconnect(vd);
         /* To reset_serial, we CLOSED our side. Make sure the other end knows we
          * are ready again. */
         qemu_chr_be_event(chr, CHR_EVENT_OPENED);
@@ -923,6 +926,9 @@ static void vdagent_chr_fini(Object *obj)
 
     migrate_del_blocker(vd->migration_blocker);
     vdagent_disconnect(vd);
+    if (vd->mouse_hs) {
+        qemu_input_handler_unregister(vd->mouse_hs);
+    }
     buffer_free(&vd->outbuf);
     error_free(vd->migration_blocker);
 }
diff --git a/ui/vnc-enc-hextile-template.h b/ui/vnc-enc-hextile-template.h
index 0c56262aff..8ee92086ac 100644
--- a/ui/vnc-enc-hextile-template.h
+++ b/ui/vnc-enc-hextile-template.h
@@ -7,6 +7,8 @@
 #define NAME BPP
 #endif
 
+#define MAX_BYTES_PER_PIXEL 4
+
 static void CONCAT(send_hextile_tile_, NAME)(VncState *vs,
                                              int x, int y, int w, int h,
                                              void *last_bg_,
@@ -25,10 +27,13 @@ static void CONCAT(send_hextile_tile_, NAME)(VncState *vs,
     int bg_count = 0;
     int fg_count = 0;
     int flags = 0;
-    uint8_t data[(vs->client_pf.bytes_per_pixel + 2) * 16 * 16];
+    uint8_t data[(MAX_BYTES_PER_PIXEL + 2) * 16 * 16];
     int n_data = 0;
     int n_subtiles = 0;
 
+    /* Enforced by set_pixel_format() */
+    assert(vs->client_pf.bytes_per_pixel <= MAX_BYTES_PER_PIXEL);
+
     for (j = 0; j < h; j++) {
         for (i = 0; i < w; i++) {
             switch (n_colors) {
@@ -205,6 +210,7 @@ static void CONCAT(send_hextile_tile_, NAME)(VncState *vs,
     }
 }
 
+#undef MAX_BYTES_PER_PIXEL
 #undef NAME
 #undef pixel_t
 #undef CONCAT_I
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index ee853dcfcb..41f559eb83 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -1097,13 +1097,13 @@ static int send_palette_rect(VncState *vs, int x, int y,
     switch (vs->client_pf.bytes_per_pixel) {
     case 4:
     {
-        size_t old_offset, offset;
-        uint32_t header[palette_size(palette)];
+        size_t old_offset, offset, palette_sz = palette_size(palette);
+        g_autofree uint32_t *header = g_new(uint32_t, palette_sz);
         struct palette_cb_priv priv = { vs, (uint8_t *)header };
 
         old_offset = vs->output.offset;
         palette_iter(palette, write_palette, &priv);
-        vnc_write(vs, header, sizeof(header));
+        vnc_write(vs, header, palette_sz * sizeof(uint32_t));
 
         if (vs->tight->pixel24) {
             tight_pack24(vs, vs->output.buffer + old_offset, colors, &offset);
@@ -1115,11 +1115,12 @@ static int send_palette_rect(VncState *vs, int x, int y,
     }
     case 2:
     {
-        uint16_t header[palette_size(palette)];
+        size_t palette_sz = palette_size(palette);
+        g_autofree uint16_t *header = g_new(uint16_t, palette_sz);
         struct palette_cb_priv priv = { vs, (uint8_t *)header };
 
         palette_iter(palette, write_palette, &priv);
-        vnc_write(vs, header, sizeof(header));
+        vnc_write(vs, header, palette_sz * sizeof(uint16_t));
         tight_encode_indexed_rect16(vs->tight->tight.buffer, w * h, palette);
         break;
     }
diff --git a/ui/vnc-enc-zrle.c.inc b/ui/vnc-enc-zrle.c.inc
index c107d8affc..a8ca37d05e 100644
--- a/ui/vnc-enc-zrle.c.inc
+++ b/ui/vnc-enc-zrle.c.inc
@@ -110,7 +110,7 @@ static void ZRLE_ENCODE_TILE(VncState *vs, ZRLE_PIXEL *data, int w, int h,
     ZRLE_PIXEL *end = ptr + h * w;
     *end = ~*(end-1); /* one past the end is different so the while loop ends */
 
-    /* Real limit is 127 but we wan't a way to know if there is more than 127 */
+    /* Real limit is 127 but we want a way to know if there is more than 127 */
     palette_init(palette, 256, ZRLE_BPP);
 
     while (ptr < end) {
diff --git a/ui/vnc-enc-zywrle.h b/ui/vnc-enc-zywrle.h
index e661ec117d..64fbc90ee7 100644
--- a/ui/vnc-enc-zywrle.h
+++ b/ui/vnc-enc-zywrle.h
@@ -485,7 +485,7 @@ static inline void wavelet(int *buf, int width, int height, int level)
 
 /*
   RGB <=> YUV conversion stuffs.
-  YUV coversion is explained as following formula in strict meaning:
+  YUV conversion is explained as following formula in strict meaning:
   Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
   U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
   V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
@@ -539,7 +539,7 @@ static inline void wavelet(int *buf, int width, int height, int level)
    +------+------+
 
  So, we must transfer each sub images individually in strict meaning.
- But at least ZRLE meaning, following one decompositon image is same as
+ But at least ZRLE meaning, following one decomposition image is same as
  avobe individual sub image. I use this format.
  (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
   for simplified procedure for any wavelet level.)
diff --git a/ui/vnc.c b/ui/vnc.c
index 92964dcc0c..6fd86996a5 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -833,7 +833,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
     /* guest surface */
     qemu_pixman_image_unref(vd->guest.fb);
     vd->guest.fb = pixman_image_ref(surface->image);
-    vd->guest.format = surface->format;
+    vd->guest.format = surface_format(surface);
 
 
     if (pageflip) {
@@ -1945,88 +1945,88 @@ static void do_key_event(VncState *vs, int down, int keycode, int sym)
             case 0xb8:                          /* Right ALT */
                 break;
             case 0xc8:
-                kbd_put_keysym(QEMU_KEY_UP);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_UP);
                 break;
             case 0xd0:
-                kbd_put_keysym(QEMU_KEY_DOWN);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_DOWN);
                 break;
             case 0xcb:
-                kbd_put_keysym(QEMU_KEY_LEFT);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_LEFT);
                 break;
             case 0xcd:
-                kbd_put_keysym(QEMU_KEY_RIGHT);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_RIGHT);
                 break;
             case 0xd3:
-                kbd_put_keysym(QEMU_KEY_DELETE);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_DELETE);
                 break;
             case 0xc7:
-                kbd_put_keysym(QEMU_KEY_HOME);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_HOME);
                 break;
             case 0xcf:
-                kbd_put_keysym(QEMU_KEY_END);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_END);
                 break;
             case 0xc9:
-                kbd_put_keysym(QEMU_KEY_PAGEUP);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_PAGEUP);
                 break;
             case 0xd1:
-                kbd_put_keysym(QEMU_KEY_PAGEDOWN);
+                qemu_text_console_put_keysym(NULL, QEMU_KEY_PAGEDOWN);
                 break;
 
             case 0x47:
-                kbd_put_keysym(numlock ? '7' : QEMU_KEY_HOME);
+                qemu_text_console_put_keysym(NULL, numlock ? '7' : QEMU_KEY_HOME);
                 break;
             case 0x48:
-                kbd_put_keysym(numlock ? '8' : QEMU_KEY_UP);
+                qemu_text_console_put_keysym(NULL, numlock ? '8' : QEMU_KEY_UP);
                 break;
             case 0x49:
-                kbd_put_keysym(numlock ? '9' : QEMU_KEY_PAGEUP);
+                qemu_text_console_put_keysym(NULL, numlock ? '9' : QEMU_KEY_PAGEUP);
                 break;
             case 0x4b:
-                kbd_put_keysym(numlock ? '4' : QEMU_KEY_LEFT);
+                qemu_text_console_put_keysym(NULL, numlock ? '4' : QEMU_KEY_LEFT);
                 break;
             case 0x4c:
-                kbd_put_keysym('5');
+                qemu_text_console_put_keysym(NULL, '5');
                 break;
             case 0x4d:
-                kbd_put_keysym(numlock ? '6' : QEMU_KEY_RIGHT);
+                qemu_text_console_put_keysym(NULL, numlock ? '6' : QEMU_KEY_RIGHT);
                 break;
             case 0x4f:
-                kbd_put_keysym(numlock ? '1' : QEMU_KEY_END);
+                qemu_text_console_put_keysym(NULL, numlock ? '1' : QEMU_KEY_END);
                 break;
             case 0x50:
-                kbd_put_keysym(numlock ? '2' : QEMU_KEY_DOWN);
+                qemu_text_console_put_keysym(NULL, numlock ? '2' : QEMU_KEY_DOWN);
                 break;
             case 0x51:
-                kbd_put_keysym(numlock ? '3' : QEMU_KEY_PAGEDOWN);
+                qemu_text_console_put_keysym(NULL, numlock ? '3' : QEMU_KEY_PAGEDOWN);
                 break;
             case 0x52:
-                kbd_put_keysym('0');
+                qemu_text_console_put_keysym(NULL, '0');
                 break;
             case 0x53:
-                kbd_put_keysym(numlock ? '.' : QEMU_KEY_DELETE);
+                qemu_text_console_put_keysym(NULL, numlock ? '.' : QEMU_KEY_DELETE);
                 break;
 
             case 0xb5:
-                kbd_put_keysym('/');
+                qemu_text_console_put_keysym(NULL, '/');
                 break;
             case 0x37:
-                kbd_put_keysym('*');
+                qemu_text_console_put_keysym(NULL, '*');
                 break;
             case 0x4a:
-                kbd_put_keysym('-');
+                qemu_text_console_put_keysym(NULL, '-');
                 break;
             case 0x4e:
-                kbd_put_keysym('+');
+                qemu_text_console_put_keysym(NULL, '+');
                 break;
             case 0x9c:
-                kbd_put_keysym('\n');
+                qemu_text_console_put_keysym(NULL, '\n');
                 break;
 
             default:
                 if (control) {
-                    kbd_put_keysym(sym & 0x1f);
+                    qemu_text_console_put_keysym(NULL, sym & 0x1f);
                 } else {
-                    kbd_put_keysym(sym);
+                    qemu_text_console_put_keysym(NULL, sym);
                 }
                 break;
             }
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
index ababc39550..4c8a005715 100644
--- a/util/cpuinfo-aarch64.c
+++ b/util/cpuinfo-aarch64.c
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for AArch64.
+ * Host specific cpu identification for AArch64.
  */
 
 #include "qemu/osdep.h"
@@ -13,6 +13,9 @@
 #  include <asm/hwcap.h>
 #  include "elf.h"
 # endif
+# ifndef HWCAP2_BTI
+#  define HWCAP2_BTI 0  /* added in glibc 2.32 */
+# endif
 #endif
 #ifdef CONFIG_DARWIN
 # include <sys/sysctl.h>
@@ -33,7 +36,7 @@ static bool sysctl_for_bool(const char *name)
     /*
      * We might in the future ask for properties not present in older kernels,
      * but we're only asking about static properties, all of which should be
-     * 'int'.  So we shouln't see ENOMEM (val too small), or any of the other
+     * 'int'.  So we shouldn't see ENOMEM (val too small), or any of the other
      * more exotic errors.
      */
     assert(errno == ENOENT);
@@ -56,12 +59,18 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
     info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
     info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
-    info |= (hwcap & HWCAP_AES ? CPUINFO_AES: 0);
+    info |= (hwcap & HWCAP_AES ? CPUINFO_AES : 0);
+    info |= (hwcap & HWCAP_PMULL ? CPUINFO_PMULL : 0);
+
+    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+    info |= (hwcap2 & HWCAP2_BTI ? CPUINFO_BTI : 0);
 #endif
 #ifdef CONFIG_DARWIN
     info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
     info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
     info |= sysctl_for_bool("hw.optional.arm.FEAT_AES") * CPUINFO_AES;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_PMULL") * CPUINFO_PMULL;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_BTI") * CPUINFO_BTI;
 #endif
 
     cpuinfo = info;
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 3a7b7e0ad1..9fddb18303 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for x86.
+ * Host specific cpu identification for x86.
  */
 
 #include "qemu/osdep.h"
@@ -39,6 +39,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
         info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
+        info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
 
         /* Our AES support requires PSHUFB as well. */
         info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0);
@@ -74,7 +75,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
                  * of their memory operands to be 16-byte aligned.
                  *
                  * AMD has provided an even stronger guarantee that processors
-                 * with AVX provide 16-byte atomicity for all cachable,
+                 * with AVX provide 16-byte atomicity for all cacheable,
                  * naturally aligned single loads and stores, e.g. MOVDQU.
                  *
                  * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c
index 7212afa45d..1ea3db0ac8 100644
--- a/util/cpuinfo-ppc.c
+++ b/util/cpuinfo-ppc.c
@@ -1,6 +1,6 @@
 /*
  * SPDX-License-Identifier: GPL-2.0-or-later
- * Host specific cpu indentification for ppc.
+ * Host specific cpu identification for ppc.
  */
 
 #include "qemu/osdep.h"
diff --git a/util/iov.c b/util/iov.c
index 866fb577f3..7e73948f5e 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -571,7 +571,7 @@ static int sortelem_cmp_src_index(const void *a, const void *b)
  */
 void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
 {
-    IOVectorSortElem sortelems[src->niov];
+    g_autofree IOVectorSortElem *sortelems = g_new(IOVectorSortElem, src->niov);
     void *last_end;
     int i;
 
diff --git a/util/main-loop.c b/util/main-loop.c
index 014c795916..797b640c41 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -47,7 +47,7 @@
  */
 /*
  * Disable CFI checks.
- * We are going to call a signal hander directly. Such handler may or may not
+ * We are going to call a signal handler directly. Such handler may or may not
  * have been defined in our binary, so there's no guarantee that the pointer
  * used to set the handler is a cfi-valid pointer. Since the handlers are
  * stored in kernel memory, changing the handler to an attacker-defined
diff --git a/util/meson.build b/util/meson.build
index a375160286..c4827fd70a 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -3,7 +3,6 @@ util_ss.add(files('thread-context.c'), numa)
 if not config_host_data.get('CONFIG_ATOMIC64')
   util_ss.add(files('atomic64.c'))
 endif
-util_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
 if config_host_data.get('CONFIG_EPOLL_CREATE1')
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 760390b31e..e86fd64e09 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -585,7 +585,7 @@ char *qemu_get_pid_name(pid_t pid)
 
 void *qemu_alloc_stack(size_t *sz)
 {
-    void *ptr, *guardpage;
+    void *ptr;
     int flags;
 #ifdef CONFIG_DEBUG_STACK_USAGE
     void *ptr2;
@@ -618,17 +618,8 @@ void *qemu_alloc_stack(size_t *sz)
         abort();
     }
 
-#if defined(HOST_IA64)
-    /* separate register stack */
-    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
-#elif defined(HOST_HPPA)
-    /* stack grows up */
-    guardpage = ptr + *sz - pagesz;
-#else
-    /* stack grows down */
-    guardpage = ptr;
-#endif
-    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+    /* Stack grows down -- guard page at the bottom. */
+    if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
         perror("failed to set up stack guard page");
         abort();
     }
@@ -671,7 +662,7 @@ void qemu_free_stack(void *stack, size_t sz)
 
 /*
  * Disable CFI checks.
- * We are going to call a signal hander directly. Such handler may or may not
+ * We are going to call a signal handler directly. Such handler may or may not
  * have been defined in our binary, so there's no guarantee that the pointer
  * used to set the handler is a cfi-valid pointer. Since the handlers are
  * stored in kernel memory, changing the handler to an attacker-defined
diff --git a/util/qdist.c b/util/qdist.c
index 5f75e24c29..ef3566b03a 100644
--- a/util/qdist.c
+++ b/util/qdist.c
@@ -210,7 +210,7 @@ void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
 
         /*
          * To avoid double-counting we capture [left, right) ranges, except for
-         * the righmost bin, which captures a [left, right] range.
+         * the rightmost bin, which captures a [left, right] range.
          */
         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
             struct qdist_entry *o = &from->entries[j];
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 892d33f5e6..83e84b1186 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -929,7 +929,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr,
 
     if (pathbuf != NULL) {
         /*
-         * This dummy fd usage silences the mktemp() unsecure warning.
+         * This dummy fd usage silences the mktemp() insecure warning.
          * Using mkstemp() doesn't make things more secure here
          * though.  bind() complains about existing files, so we have
          * to unlink first and thus re-open the race window.  The
diff --git a/util/rcu.c b/util/rcu.c
index 30a7e22026..e587bcc483 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -355,7 +355,7 @@ void drain_call_rcu(void)
      *
      * Note that since we have only one global queue of the RCU callbacks,
      * we also end up waiting for most of RCU callbacks that were registered
-     * on the other threads, but this is a side effect that shoudn't be
+     * on the other threads, but this is a side effect that shouldn't be
      * assumed.
      */
 
diff --git a/util/selfmap.c b/util/selfmap.c
index 4db5b42651..483cb617e2 100644
--- a/util/selfmap.c
+++ b/util/selfmap.c
@@ -30,19 +30,21 @@ IntervalTreeRoot *read_self_maps(void)
 
         if (nfields > 4) {
             uint64_t start, end, offset, inode;
+            unsigned dev_maj, dev_min;
             int errors = 0;
             const char *p;
 
             errors |= qemu_strtou64(fields[0], &p, 16, &start);
             errors |= qemu_strtou64(p + 1, NULL, 16, &end);
             errors |= qemu_strtou64(fields[2], NULL, 16, &offset);
+            errors |= qemu_strtoui(fields[3], &p, 16, &dev_maj);
+            errors |= qemu_strtoui(p + 1, NULL, 16, &dev_min);
             errors |= qemu_strtou64(fields[4], NULL, 10, &inode);
 
             if (!errors) {
-                size_t dev_len, path_len;
+                size_t path_len;
                 MapInfo *e;
 
-                dev_len = strlen(fields[3]) + 1;
                 if (nfields == 6) {
                     p = fields[5];
                     p += strspn(p, " ");
@@ -52,11 +54,12 @@ IntervalTreeRoot *read_self_maps(void)
                     path_len = 0;
                 }
 
-                e = g_malloc0(sizeof(*e) + dev_len + path_len);
+                e = g_malloc0(sizeof(*e) + path_len);
 
                 e->itree.start = start;
                 e->itree.last = end - 1;
                 e->offset = offset;
+                e->dev = makedev(dev_maj, dev_min);
                 e->inode = inode;
 
                 e->is_read  = fields[1][0] == 'r';
@@ -64,9 +67,8 @@ IntervalTreeRoot *read_self_maps(void)
                 e->is_exec  = fields[1][2] == 'x';
                 e->is_priv  = fields[1][3] == 'p';
 
-                memcpy(e->dev, fields[3], dev_len);
                 if (path_len) {
-                    e->path = memcpy(e->dev + dev_len, p, path_len);
+                    e->path = memcpy(e + 1, p, path_len);
                 }
 
                 interval_tree_insert(&e->itree, root);
diff --git a/util/thread-pool.c b/util/thread-pool.c
index e3d8292d14..22f9ba3286 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -228,17 +228,9 @@ static void thread_pool_cancel(BlockAIOCB *acb)
 
 }
 
-static AioContext *thread_pool_get_aio_context(BlockAIOCB *acb)
-{
-    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
-    ThreadPool *pool = elem->pool;
-    return pool->ctx;
-}
-
 static const AIOCBInfo thread_pool_aiocb_info = {
     .aiocb_size         = sizeof(ThreadPoolElement),
     .cancel_async       = thread_pool_cancel,
-    .get_aio_context    = thread_pool_get_aio_context,
 };
 
 BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
diff --git a/util/throttle.c b/util/throttle.c
index 81f247a8d1..9582899da3 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -136,13 +136,14 @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
 
 /* This function compute the time that must be waited while this IO
  *
- * @is_write:   true if the current IO is a write, false if it's a read
+ * @direction:  throttle direction
  * @ret:        time to wait
  */
 static int64_t throttle_compute_wait_for(ThrottleState *ts,
-                                         bool is_write)
+                                         ThrottleDirection direction)
 {
-    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+    static const BucketType to_check[THROTTLE_MAX][4] = {
+                                  {THROTTLE_BPS_TOTAL,
                                    THROTTLE_OPS_TOTAL,
                                    THROTTLE_BPS_READ,
                                    THROTTLE_OPS_READ},
@@ -153,8 +154,8 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
     int64_t wait, max_wait = 0;
     int i;
 
-    for (i = 0; i < 4; i++) {
-        BucketType index = to_check[is_write][i];
+    for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) {
+        BucketType index = to_check[direction][i];
         wait = throttle_compute_wait(&ts->cfg.buckets[index]);
         if (wait > max_wait) {
             max_wait = wait;
@@ -166,13 +167,13 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
 
 /* compute the timer for this type of operation
  *
- * @is_write:   the type of operation
+ * @direction:  throttle direction
  * @now:        the current clock timestamp
  * @next_timestamp: the resulting timer
  * @ret:        true if a timer must be set
  */
 static bool throttle_compute_timer(ThrottleState *ts,
-                                   bool is_write,
+                                   ThrottleDirection direction,
                                    int64_t now,
                                    int64_t *next_timestamp)
 {
@@ -182,7 +183,7 @@ static bool throttle_compute_timer(ThrottleState *ts,
     throttle_do_leak(ts, now);
 
     /* compute the wait time if any */
-    wait = throttle_compute_wait_for(ts, is_write);
+    wait = throttle_compute_wait_for(ts, direction);
 
     /* if the code must wait compute when the next timer should fire */
     if (wait) {
@@ -199,10 +200,15 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
                                         AioContext *new_context)
 {
-    tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-                                  tt->read_timer_cb, tt->timer_opaque);
-    tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-                                  tt->write_timer_cb, tt->timer_opaque);
+    ThrottleDirection dir;
+
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (tt->timer_cb[dir]) {
+            tt->timers[dir] =
+                aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                              tt->timer_cb[dir], tt->timer_opaque);
+        }
+    }
 }
 
 /*
@@ -233,11 +239,12 @@ void throttle_timers_init(ThrottleTimers *tt,
                           QEMUTimerCB *write_timer_cb,
                           void *timer_opaque)
 {
+    assert(read_timer_cb || write_timer_cb);
     memset(tt, 0, sizeof(ThrottleTimers));
 
     tt->clock_type = clock_type;
-    tt->read_timer_cb = read_timer_cb;
-    tt->write_timer_cb = write_timer_cb;
+    tt->timer_cb[THROTTLE_READ] = read_timer_cb;
+    tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
     tt->timer_opaque = timer_opaque;
     throttle_timers_attach_aio_context(tt, aio_context);
 }
@@ -245,7 +252,9 @@ void throttle_timers_init(ThrottleTimers *tt,
 /* destroy a timer */
 static void throttle_timer_destroy(QEMUTimer **timer)
 {
-    assert(*timer != NULL);
+    if (*timer == NULL) {
+        return;
+    }
 
     timer_free(*timer);
     *timer = NULL;
@@ -254,10 +263,10 @@ static void throttle_timer_destroy(QEMUTimer **timer)
 /* Remove timers from event loop */
 void throttle_timers_detach_aio_context(ThrottleTimers *tt)
 {
-    int i;
+    ThrottleDirection dir;
 
-    for (i = 0; i < 2; i++) {
-        throttle_timer_destroy(&tt->timers[i]);
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        throttle_timer_destroy(&tt->timers[dir]);
     }
 }
 
@@ -270,8 +279,12 @@ void throttle_timers_destroy(ThrottleTimers *tt)
 /* is any throttling timer configured */
 bool throttle_timers_are_initialized(ThrottleTimers *tt)
 {
-    if (tt->timers[0]) {
-        return true;
+    ThrottleDirection dir;
+
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (tt->timers[dir]) {
+            return true;
+        }
     }
 
     return false;
@@ -413,19 +426,24 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
  * NOTE: this function is not unit tested due to it's usage of timer_mod
  *
  * @tt:       the timers structure
- * @is_write: the type of operation (read/write)
+ * @direction: throttle direction
  * @ret:      true if the timer has been scheduled else false
  */
 bool throttle_schedule_timer(ThrottleState *ts,
                              ThrottleTimers *tt,
-                             bool is_write)
+                             ThrottleDirection direction)
 {
     int64_t now = qemu_clock_get_ns(tt->clock_type);
     int64_t next_timestamp;
+    QEMUTimer *timer;
     bool must_wait;
 
+    assert(direction < THROTTLE_MAX);
+    timer = tt->timers[direction];
+    assert(timer);
+
     must_wait = throttle_compute_timer(ts,
-                                       is_write,
+                                       direction,
                                        now,
                                        &next_timestamp);
 
@@ -435,48 +453,50 @@ bool throttle_schedule_timer(ThrottleState *ts,
     }
 
     /* request throttled and timer pending -> do nothing */
-    if (timer_pending(tt->timers[is_write])) {
+    if (timer_pending(timer)) {
         return true;
     }
 
     /* request throttled and timer not pending -> arm timer */
-    timer_mod(tt->timers[is_write], next_timestamp);
+    timer_mod(timer, next_timestamp);
     return true;
 }
 
 /* do the accounting for this operation
  *
- * @is_write: the type of operation (read/write)
+ * @direction: throttle direction
  * @size:     the size of the operation
  */
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
+                      uint64_t size)
 {
-    const BucketType bucket_types_size[2][2] = {
+    static const BucketType bucket_types_size[THROTTLE_MAX][2] = {
         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
     };
-    const BucketType bucket_types_units[2][2] = {
+    static const BucketType bucket_types_units[THROTTLE_MAX][2] = {
         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
     };
     double units = 1.0;
     unsigned i;
 
+    assert(direction < THROTTLE_MAX);
     /* if cfg.op_size is defined and smaller than size we compute unit count */
     if (ts->cfg.op_size && size > ts->cfg.op_size) {
         units = (double) size / ts->cfg.op_size;
     }
 
-    for (i = 0; i < 2; i++) {
+    for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) {
         LeakyBucket *bkt;
 
-        bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
+        bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
         bkt->level += size;
         if (bkt->burst_length > 1) {
             bkt->burst_level += size;
         }
 
-        bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
+        bkt = &ts->cfg.buckets[bucket_types_units[direction][i]];
         bkt->level += units;
         if (bkt->burst_length > 1) {
             bkt->burst_level += units;
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index cd17fb5326..b4b6bf30a2 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -127,7 +127,14 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
         if (rc < 0) {
             if (rc == QIO_CHANNEL_ERR_BLOCK) {
                 assert(local_err == NULL);
-                qio_channel_yield(ioc, G_IO_IN);
+                if (server->ctx) {
+                    server->in_qio_channel_yield = true;
+                    qio_channel_yield(ioc, G_IO_IN);
+                    server->in_qio_channel_yield = false;
+                } else {
+                    /* Wait until attached to an AioContext again */
+                    qemu_coroutine_yield();
+                }
                 continue;
             } else {
                 error_report_err(local_err);
@@ -278,7 +285,7 @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
         vu_fd_watch->fd = fd;
         vu_fd_watch->cb = cb;
         qemu_socket_set_nonblock(fd);
-        aio_set_fd_handler(server->ioc->ctx, fd, kick_handler,
+        aio_set_fd_handler(server->ctx, fd, kick_handler,
                            NULL, NULL, NULL, vu_fd_watch);
         vu_fd_watch->vu_dev = vu_dev;
         vu_fd_watch->pvt = pvt;
@@ -299,7 +306,7 @@ static void remove_watch(VuDev *vu_dev, int fd)
     if (!vu_fd_watch) {
         return;
     }
-    aio_set_fd_handler(server->ioc->ctx, fd, NULL, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(server->ctx, fd, NULL, NULL, NULL, NULL, NULL);
 
     QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
     g_free(vu_fd_watch);
@@ -344,6 +351,8 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
     /* TODO vu_message_write() spins if non-blocking! */
     qio_channel_set_blocking(server->ioc, false, NULL);
 
+    qio_channel_set_follow_coroutine_ctx(server->ioc, true);
+
     server->co_trip = qemu_coroutine_create(vu_client_trip, server);
 
     aio_context_acquire(server->ctx);
@@ -399,13 +408,12 @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
         return;
     }
 
-    qio_channel_attach_aio_context(server->ioc, ctx);
-
     QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
         aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
                            NULL, NULL, vu_fd_watch);
     }
 
+    assert(!server->in_qio_channel_yield);
     aio_co_schedule(ctx, server->co_trip);
 }
 
@@ -419,11 +427,16 @@ void vhost_user_server_detach_aio_context(VuServer *server)
             aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
                                NULL, NULL, NULL, NULL, vu_fd_watch);
         }
-
-        qio_channel_detach_aio_context(server->ioc);
     }
 
     server->ctx = NULL;
+
+    if (server->ioc) {
+        if (server->in_qio_channel_yield) {
+            /* Stop receiving the next vhost-user message */
+            qio_channel_wake_read(server->ioc);
+        }
+    }
 }
 
 bool vhost_user_server_start(VuServer *server,