From f489960d36c49e91cf57c185b70cd028aa4976e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:21:30 +0200 Subject: [PATCH 01/57] hw/arm/aspeed: Remove extraneous MemoryRegion object owner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm confused by this code, 'bmc' is created as: bmc = g_new0(AspeedBoardState, 1); Then we use it as QOM owner for different MemoryRegion objects. But looking at memory_region_init_ram (similarly for ROM): void memory_region_init_ram(MemoryRegion *mr, struct Object *owner, const char *name, uint64_t size, Error **errp) { DeviceState *owner_dev; Error *err = NULL; memory_region_init_ram_nomigrate(mr, owner, name, size, &err); if (err) { error_propagate(errp, err); return; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a * unique name for migration. TODO: Ideally we should implement * a naming scheme for Objects which are not DeviceStates, in * which case we can relax this restriction. */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); } The expected assertion is not triggered ('bmc' is not NULL neither a DeviceState). 'bmc' structure is defined as: struct AspeedBoardState { AspeedSoCState soc; MemoryRegion ram_container; MemoryRegion max_ram; }; What happens is when using 'OBJECT(bmc)', the QOM macros cast the memory pointed by bmc, which first member is 'soc', which is initialized ...: object_initialize_child(OBJECT(machine), "soc", &bmc->soc, amc->soc_name); The 'soc' object is indeed a DeviceState, so the assertion passes. Since this is fragile and only happens to work by luck, remove the dangerous OBJECT(bmc) owner argument. Note, this probably breaks migration for this machine. Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200623072132.2868-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/aspeed.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 863757e1f0..167fd5ed1c 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -329,12 +329,12 @@ static void aspeed_machine_init(MachineState *machine) * needed by the flash modules of the Aspeed machines. */ if (ASPEED_MACHINE(machine)->mmio_exec) { - memory_region_init_alias(boot_rom, OBJECT(bmc), "aspeed.boot_rom", + memory_region_init_alias(boot_rom, NULL, "aspeed.boot_rom", &fl->mmio, 0, fl->size); memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, boot_rom); } else { - memory_region_init_rom(boot_rom, OBJECT(bmc), "aspeed.boot_rom", + memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", fl->size, &error_abort); memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, boot_rom); @@ -345,7 +345,7 @@ static void aspeed_machine_init(MachineState *machine) if (machine->kernel_filename && sc->num_cpus > 1) { /* With no u-boot we must set up a boot stub for the secondary CPU */ MemoryRegion *smpboot = g_new(MemoryRegion, 1); - memory_region_init_ram(smpboot, OBJECT(bmc), "aspeed.smpboot", + memory_region_init_ram(smpboot, NULL, "aspeed.smpboot", 0x80, &error_abort); memory_region_add_subregion(get_system_memory(), AST_SMP_MAILBOX_BASE, smpboot); From 612b219a2a6e3d8192ce1d2408780be655f60359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:21:31 +0200 Subject: [PATCH 02/57] hw/arm/aspeed: Rename AspeedBoardState as AspeedMachineState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To have a more consistent naming, rename AspeedBoardState as AspeedMachineState. Suggested-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Message-id: 20200623072132.2868-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/aspeed.c | 20 ++++++++++---------- include/hw/arm/aspeed.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 167fd5ed1c..a167b73697 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -32,7 +32,7 @@ static struct arm_boot_info aspeed_board_binfo = { .board_id = -1, /* device-tree-only board */ }; -struct AspeedBoardState { +struct AspeedMachineState { AspeedSoCState soc; MemoryRegion ram_container; MemoryRegion max_ram; @@ -253,7 +253,7 @@ static void sdhci_attach_drive(SDHCIState *sdhci, DriveInfo *dinfo) static void aspeed_machine_init(MachineState *machine) { - AspeedBoardState *bmc; + AspeedMachineState *bmc; AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(machine); AspeedSoCClass *sc; DriveInfo *drive0 = drive_get(IF_MTD, 0, 0); @@ -261,7 +261,7 @@ static void aspeed_machine_init(MachineState *machine) int i; NICInfo *nd = &nd_table[0]; - bmc = g_new0(AspeedBoardState, 1); + bmc = g_new0(AspeedMachineState, 1); memory_region_init(&bmc->ram_container, NULL, "aspeed-ram-container", 4 * GiB); @@ -374,7 +374,7 @@ static void aspeed_machine_init(MachineState *machine) arm_load_kernel(ARM_CPU(first_cpu), machine, &aspeed_board_binfo); } -static void palmetto_bmc_i2c_init(AspeedBoardState *bmc) +static void palmetto_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; DeviceState *dev; @@ -396,7 +396,7 @@ static void palmetto_bmc_i2c_init(AspeedBoardState *bmc) object_property_set_int(OBJECT(dev), 110000, "temperature3", &error_abort); } -static void ast2500_evb_i2c_init(AspeedBoardState *bmc) +static void ast2500_evb_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; uint8_t *eeprom_buf = g_malloc0(8 * 1024); @@ -413,13 +413,13 @@ static void ast2500_evb_i2c_init(AspeedBoardState *bmc) i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 0x32); } -static void ast2600_evb_i2c_init(AspeedBoardState *bmc) +static void ast2600_evb_i2c_init(AspeedMachineState *bmc) { /* Start with some devices on our I2C busses */ ast2500_evb_i2c_init(bmc); } -static void romulus_bmc_i2c_init(AspeedBoardState *bmc) +static void romulus_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -428,7 +428,7 @@ static void romulus_bmc_i2c_init(AspeedBoardState *bmc) i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 0x32); } -static void swift_bmc_i2c_init(AspeedBoardState *bmc) +static void swift_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -457,7 +457,7 @@ static void swift_bmc_i2c_init(AspeedBoardState *bmc) i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 12), "tmp105", 0x4a); } -static void sonorapass_bmc_i2c_init(AspeedBoardState *bmc) +static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -501,7 +501,7 @@ static void sonorapass_bmc_i2c_init(AspeedBoardState *bmc) } -static void witherspoon_bmc_i2c_init(AspeedBoardState *bmc) +static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; uint8_t *eeprom_buf = g_malloc0(8 * 1024); diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h index 95b4daece8..5114ba0bd4 100644 --- a/include/hw/arm/aspeed.h +++ b/include/hw/arm/aspeed.h @@ -11,7 +11,7 @@ #include "hw/boards.h" -typedef struct AspeedBoardState AspeedBoardState; +typedef struct AspeedMachineState AspeedMachineState; #define TYPE_ASPEED_MACHINE MACHINE_TYPE_NAME("aspeed") #define ASPEED_MACHINE(obj) \ @@ -45,7 +45,7 @@ typedef struct AspeedMachineClass { const char *spi_model; uint32_t num_cs; uint32_t macs_mask; - void (*i2c_init)(AspeedBoardState *bmc); + void (*i2c_init)(AspeedMachineState *bmc); } AspeedMachineClass; From 888b2b034a29fa8fe99417c9665e0d671a9f33fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:21:32 +0200 Subject: [PATCH 03/57] hw/arm/aspeed: QOM'ify AspeedMachineState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AspeedMachineState seems crippled. We use incorrectly 2 different structures to do the same thing. Merge them altogether: - Move AspeedMachine fields to AspeedMachineState - AspeedMachineState is now QOM - Remove unused AspeedMachine structure Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Message-id: 20200623072132.2868-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/aspeed.c | 11 +++++++---- include/hw/arm/aspeed.h | 8 +------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index a167b73697..665d04fbf6 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -33,9 +33,14 @@ static struct arm_boot_info aspeed_board_binfo = { }; struct AspeedMachineState { + /* Private */ + MachineState parent_obj; + /* Public */ + AspeedSoCState soc; MemoryRegion ram_container; MemoryRegion max_ram; + bool mmio_exec; }; /* Palmetto hardware value: 0x120CE416 */ @@ -253,7 +258,7 @@ static void sdhci_attach_drive(SDHCIState *sdhci, DriveInfo *dinfo) static void aspeed_machine_init(MachineState *machine) { - AspeedMachineState *bmc; + AspeedMachineState *bmc = ASPEED_MACHINE(machine); AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(machine); AspeedSoCClass *sc; DriveInfo *drive0 = drive_get(IF_MTD, 0, 0); @@ -261,8 +266,6 @@ static void aspeed_machine_init(MachineState *machine) int i; NICInfo *nd = &nd_table[0]; - bmc = g_new0(AspeedMachineState, 1); - memory_region_init(&bmc->ram_container, NULL, "aspeed-ram-container", 4 * GiB); memory_region_add_subregion(&bmc->ram_container, 0, machine->ram); @@ -751,7 +754,7 @@ static const TypeInfo aspeed_machine_types[] = { }, { .name = TYPE_ASPEED_MACHINE, .parent = TYPE_MACHINE, - .instance_size = sizeof(AspeedMachine), + .instance_size = sizeof(AspeedMachineState), .instance_init = aspeed_machine_instance_init, .class_size = sizeof(AspeedMachineClass), .class_init = aspeed_machine_class_init, diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h index 5114ba0bd4..09da9d9acc 100644 --- a/include/hw/arm/aspeed.h +++ b/include/hw/arm/aspeed.h @@ -15,13 +15,7 @@ typedef struct AspeedMachineState AspeedMachineState; #define TYPE_ASPEED_MACHINE MACHINE_TYPE_NAME("aspeed") #define ASPEED_MACHINE(obj) \ - OBJECT_CHECK(AspeedMachine, (obj), TYPE_ASPEED_MACHINE) - -typedef struct AspeedMachine { - MachineState parent_obj; - - bool mmio_exec; -} AspeedMachine; + OBJECT_CHECK(AspeedMachineState, (obj), TYPE_ASPEED_MACHINE) #define ASPEED_MAC0_ON (1 << 0) #define ASPEED_MAC1_ON (1 << 1) From d88c42ff2c5cdcaecd02e6b31ea4c134b02be084 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:15 +0200 Subject: [PATCH 04/57] hw/i2c/core: Add i2c_try_create_slave() and i2c_realize_and_unref() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract i2c_try_create_slave() and i2c_realize_and_unref() from i2c_create_slave(). We can now set properties on a I2CSlave before it is realized. This is in line with the recent qdev/QOM changes merged in commit 6675a653d2e. Reviewed-by: Corey Minyard Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Markus Armbruster Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/i2c/core.c | 18 ++++++++++++++++-- include/hw/i2c/i2c.h | 2 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hw/i2c/core.c b/hw/i2c/core.c index 1aac457a2a..acf34a12d6 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -267,13 +267,27 @@ const VMStateDescription vmstate_i2c_slave = { } }; -DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr) +DeviceState *i2c_try_create_slave(const char *name, uint8_t addr) { DeviceState *dev; dev = qdev_new(name); qdev_prop_set_uint8(dev, "address", addr); - qdev_realize_and_unref(dev, &bus->qbus, &error_fatal); + return dev; +} + +bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp) +{ + return qdev_realize_and_unref(dev, &bus->qbus, errp); +} + +DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr) +{ + DeviceState *dev; + + dev = i2c_try_create_slave(name, addr); + i2c_realize_and_unref(dev, bus, &error_fatal); + return dev; } diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h index 4117211565..d6e3d85faf 100644 --- a/include/hw/i2c/i2c.h +++ b/include/hw/i2c/i2c.h @@ -80,6 +80,8 @@ int i2c_send(I2CBus *bus, uint8_t data); uint8_t i2c_recv(I2CBus *bus); DeviceState *i2c_create_slave(I2CBus *bus, const char *name, uint8_t addr); +DeviceState *i2c_try_create_slave(const char *name, uint8_t addr); +bool i2c_realize_and_unref(DeviceState *dev, I2CBus *bus, Error **errp); /* lm832x.c */ void lm832x_key_event(DeviceState *dev, int key, int state); From 8208335b9539e7b5aa4702b36e2f9a8abd704079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:16 +0200 Subject: [PATCH 05/57] hw/misc/pca9552: Rename 'nr_leds' as 'pin_count' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCA9552 device does not expose LEDs, but simple pins to connnect LEDs to. To be clearer with the device model, rename 'nr_leds' as 'pin_count'. Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 10 +++++----- include/hw/misc/pca9552.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index cac729e35a..81da757a7e 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -37,7 +37,7 @@ static void pca9552_update_pin_input(PCA9552State *s) { int i; - for (i = 0; i < s->nr_leds; i++) { + for (i = 0; i < s->pin_count; i++) { uint8_t input_reg = PCA9552_INPUT0 + (i / 8); uint8_t input_shift = (i % 8); uint8_t config = pca9552_pin_get_config(s, i); @@ -185,7 +185,7 @@ static void pca9552_get_led(Object *obj, Visitor *v, const char *name, error_setg(errp, "%s: error reading %s", __func__, name); return; } - if (led < 0 || led > s->nr_leds) { + if (led < 0 || led > s->pin_count) { error_setg(errp, "%s invalid led %s", __func__, name); return; } @@ -228,7 +228,7 @@ static void pca9552_set_led(Object *obj, Visitor *v, const char *name, error_setg(errp, "%s: error reading %s", __func__, name); return; } - if (led < 0 || led > s->nr_leds) { + if (led < 0 || led > s->pin_count) { error_setg(errp, "%s invalid led %s", __func__, name); return; } @@ -291,9 +291,9 @@ static void pca9552_initfn(Object *obj) * PCA955X device */ s->max_reg = PCA9552_LS3; - s->nr_leds = 16; + s->pin_count = 16; - for (led = 0; led < s->nr_leds; led++) { + for (led = 0; led < s->pin_count; led++) { char *name; name = g_strdup_printf("led%d", led); diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h index ebb43c63fe..bc5ed31087 100644 --- a/include/hw/misc/pca9552.h +++ b/include/hw/misc/pca9552.h @@ -26,7 +26,7 @@ typedef struct PCA9552State { uint8_t regs[PCA9552_NR_REGS]; uint8_t max_reg; - uint8_t nr_leds; + uint8_t pin_count; } PCA9552State; #endif From ec17228a25e7fd46287842322f8dc4923eccca92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:17 +0200 Subject: [PATCH 06/57] hw/misc/pca9552: Rename generic code as pca955x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various code from the PCA9552 device model is generic to the PCA955X family. We'll split the generic code in a base class in the next commit. To ease review, first do a dumb renaming. Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 80 +++++++++++++++++++-------------------- include/hw/misc/pca9552.h | 10 ++--- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 81da757a7e..5681ff3b22 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -25,7 +25,7 @@ static const char *led_state[] = {"on", "off", "pwm0", "pwm1"}; -static uint8_t pca9552_pin_get_config(PCA9552State *s, int pin) +static uint8_t pca955x_pin_get_config(PCA955xState *s, int pin) { uint8_t reg = PCA9552_LS0 + (pin / 4); uint8_t shift = (pin % 4) << 1; @@ -33,14 +33,14 @@ static uint8_t pca9552_pin_get_config(PCA9552State *s, int pin) return extract32(s->regs[reg], shift, 2); } -static void pca9552_update_pin_input(PCA9552State *s) +static void pca955x_update_pin_input(PCA955xState *s) { int i; for (i = 0; i < s->pin_count; i++) { uint8_t input_reg = PCA9552_INPUT0 + (i / 8); uint8_t input_shift = (i % 8); - uint8_t config = pca9552_pin_get_config(s, i); + uint8_t config = pca955x_pin_get_config(s, i); switch (config) { case PCA9552_LED_ON: @@ -58,7 +58,7 @@ static void pca9552_update_pin_input(PCA9552State *s) } } -static uint8_t pca9552_read(PCA9552State *s, uint8_t reg) +static uint8_t pca955x_read(PCA955xState *s, uint8_t reg) { switch (reg) { case PCA9552_INPUT0: @@ -79,7 +79,7 @@ static uint8_t pca9552_read(PCA9552State *s, uint8_t reg) } } -static void pca9552_write(PCA9552State *s, uint8_t reg, uint8_t data) +static void pca955x_write(PCA955xState *s, uint8_t reg, uint8_t data) { switch (reg) { case PCA9552_PSC0: @@ -94,7 +94,7 @@ static void pca9552_write(PCA9552State *s, uint8_t reg, uint8_t data) case PCA9552_LS2: case PCA9552_LS3: s->regs[reg] = data; - pca9552_update_pin_input(s); + pca955x_update_pin_input(s); break; case PCA9552_INPUT0: @@ -110,7 +110,7 @@ static void pca9552_write(PCA9552State *s, uint8_t reg, uint8_t data) * after each byte is sent to or received by the device. The index * rollovers to 0 when the maximum register address is reached. */ -static void pca9552_autoinc(PCA9552State *s) +static void pca955x_autoinc(PCA955xState *s) { if (s->pointer != 0xFF && s->pointer & PCA9552_AUTOINC) { uint8_t reg = s->pointer & 0xf; @@ -120,12 +120,12 @@ static void pca9552_autoinc(PCA9552State *s) } } -static uint8_t pca9552_recv(I2CSlave *i2c) +static uint8_t pca955x_recv(I2CSlave *i2c) { - PCA9552State *s = PCA9552(i2c); + PCA955xState *s = PCA955X(i2c); uint8_t ret; - ret = pca9552_read(s, s->pointer & 0xf); + ret = pca955x_read(s, s->pointer & 0xf); /* * From the Specs: @@ -143,40 +143,40 @@ static uint8_t pca9552_recv(I2CSlave *i2c) __func__); } - pca9552_autoinc(s); + pca955x_autoinc(s); return ret; } -static int pca9552_send(I2CSlave *i2c, uint8_t data) +static int pca955x_send(I2CSlave *i2c, uint8_t data) { - PCA9552State *s = PCA9552(i2c); + PCA955xState *s = PCA955X(i2c); /* First byte sent by is the register address */ if (s->len == 0) { s->pointer = data; s->len++; } else { - pca9552_write(s, s->pointer & 0xf, data); + pca955x_write(s, s->pointer & 0xf, data); - pca9552_autoinc(s); + pca955x_autoinc(s); } return 0; } -static int pca9552_event(I2CSlave *i2c, enum i2c_event event) +static int pca955x_event(I2CSlave *i2c, enum i2c_event event) { - PCA9552State *s = PCA9552(i2c); + PCA955xState *s = PCA955X(i2c); s->len = 0; return 0; } -static void pca9552_get_led(Object *obj, Visitor *v, const char *name, +static void pca955x_get_led(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - PCA9552State *s = PCA9552(obj); + PCA955xState *s = PCA955X(obj); int led, rc, reg; uint8_t state; @@ -195,7 +195,7 @@ static void pca9552_get_led(Object *obj, Visitor *v, const char *name, * reading the INPUTx reg */ reg = PCA9552_LS0 + led / 4; - state = (pca9552_read(s, reg) >> (led % 8)) & 0x3; + state = (pca955x_read(s, reg) >> (led % 8)) & 0x3; visit_type_str(v, name, (char **)&led_state[state], errp); } @@ -209,10 +209,10 @@ static inline uint8_t pca955x_ledsel(uint8_t oldval, int led_num, int state) ((state & 0x3) << (led_num << 1)); } -static void pca9552_set_led(Object *obj, Visitor *v, const char *name, +static void pca955x_set_led(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - PCA9552State *s = PCA9552(obj); + PCA955xState *s = PCA955X(obj); Error *local_err = NULL; int led, rc, reg, val; uint8_t state; @@ -244,9 +244,9 @@ static void pca9552_set_led(Object *obj, Visitor *v, const char *name, } reg = PCA9552_LS0 + led / 4; - val = pca9552_read(s, reg); + val = pca955x_read(s, reg); val = pca955x_ledsel(val, led % 4, state); - pca9552_write(s, reg, val); + pca955x_write(s, reg, val); } static const VMStateDescription pca9552_vmstate = { @@ -254,17 +254,17 @@ static const VMStateDescription pca9552_vmstate = { .version_id = 0, .minimum_version_id = 0, .fields = (VMStateField[]) { - VMSTATE_UINT8(len, PCA9552State), - VMSTATE_UINT8(pointer, PCA9552State), - VMSTATE_UINT8_ARRAY(regs, PCA9552State, PCA9552_NR_REGS), - VMSTATE_I2C_SLAVE(i2c, PCA9552State), + VMSTATE_UINT8(len, PCA955xState), + VMSTATE_UINT8(pointer, PCA955xState), + VMSTATE_UINT8_ARRAY(regs, PCA955xState, PCA955X_NR_REGS), + VMSTATE_I2C_SLAVE(i2c, PCA955xState), VMSTATE_END_OF_LIST() } }; static void pca9552_reset(DeviceState *dev) { - PCA9552State *s = PCA9552(dev); + PCA955xState *s = PCA955X(dev); s->regs[PCA9552_PSC0] = 0xFF; s->regs[PCA9552_PWM0] = 0x80; @@ -275,15 +275,15 @@ static void pca9552_reset(DeviceState *dev) s->regs[PCA9552_LS2] = 0x55; s->regs[PCA9552_LS3] = 0x55; - pca9552_update_pin_input(s); + pca955x_update_pin_input(s); s->pointer = 0xFF; s->len = 0; } -static void pca9552_initfn(Object *obj) +static void pca955x_initfn(Object *obj) { - PCA9552State *s = PCA9552(obj); + PCA955xState *s = PCA955X(obj); int led; /* If support for the other PCA955X devices are implemented, these @@ -297,7 +297,7 @@ static void pca9552_initfn(Object *obj) char *name; name = g_strdup_printf("led%d", led); - object_property_add(obj, name, "bool", pca9552_get_led, pca9552_set_led, + object_property_add(obj, name, "bool", pca955x_get_led, pca955x_set_led, NULL, NULL); g_free(name); } @@ -308,9 +308,9 @@ static void pca9552_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); - k->event = pca9552_event; - k->recv = pca9552_recv; - k->send = pca9552_send; + k->event = pca955x_event; + k->recv = pca955x_recv; + k->send = pca955x_send; dc->reset = pca9552_reset; dc->vmsd = &pca9552_vmstate; } @@ -318,14 +318,14 @@ static void pca9552_class_init(ObjectClass *klass, void *data) static const TypeInfo pca9552_info = { .name = TYPE_PCA9552, .parent = TYPE_I2C_SLAVE, - .instance_init = pca9552_initfn, - .instance_size = sizeof(PCA9552State), + .instance_init = pca955x_initfn, + .instance_size = sizeof(PCA955xState), .class_init = pca9552_class_init, }; -static void pca9552_register_types(void) +static void pca955x_register_types(void) { type_register_static(&pca9552_info); } -type_init(pca9552_register_types) +type_init(pca955x_register_types) diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h index bc5ed31087..db527595a3 100644 --- a/include/hw/misc/pca9552.h +++ b/include/hw/misc/pca9552.h @@ -12,11 +12,11 @@ #include "hw/i2c/i2c.h" #define TYPE_PCA9552 "pca9552" -#define PCA9552(obj) OBJECT_CHECK(PCA9552State, (obj), TYPE_PCA9552) +#define PCA955X(obj) OBJECT_CHECK(PCA955xState, (obj), TYPE_PCA9552) -#define PCA9552_NR_REGS 10 +#define PCA955X_NR_REGS 10 -typedef struct PCA9552State { +typedef struct PCA955xState { /*< private >*/ I2CSlave i2c; /*< public >*/ @@ -24,9 +24,9 @@ typedef struct PCA9552State { uint8_t len; uint8_t pointer; - uint8_t regs[PCA9552_NR_REGS]; + uint8_t regs[PCA955X_NR_REGS]; uint8_t max_reg; uint8_t pin_count; -} PCA9552State; +} PCA955xState; #endif From 736132e455eea08e37fe21b4140b8088f2c0956b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:18 +0200 Subject: [PATCH 07/57] hw/misc/pca9552: Add generic PCA955xClass, parent of TYPE_PCA9552 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the code common to the PCA955x family in PCA955xClass, keeping the PCA9552 specific parts into pca9552_class_init(). Remove the 'TODO' comment added in commit 5141d4158cf. Suggested-by: Cédric Le Goater Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-5-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 66 ++++++++++++++++++++++++++++----------- include/hw/misc/pca9552.h | 6 ++-- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 5681ff3b22..4de57dbe2e 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -4,6 +4,7 @@ * https://www.nxp.com/docs/en/application-note/AN264.pdf * * Copyright (c) 2017-2018, IBM Corporation. + * Copyright (c) 2020 Philippe Mathieu-Daudé * * This work is licensed under the terms of the GNU GPL, version 2 or * later. See the COPYING file in the top-level directory. @@ -18,6 +19,20 @@ #include "qapi/error.h" #include "qapi/visitor.h" +typedef struct PCA955xClass { + /*< private >*/ + I2CSlaveClass parent_class; + /*< public >*/ + + uint8_t pin_count; + uint8_t max_reg; +} PCA955xClass; + +#define PCA955X_CLASS(klass) \ + OBJECT_CLASS_CHECK(PCA955xClass, (klass), TYPE_PCA955X) +#define PCA955X_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCA955xClass, (obj), TYPE_PCA955X) + #define PCA9552_LED_ON 0x0 #define PCA9552_LED_OFF 0x1 #define PCA9552_LED_PWM0 0x2 @@ -35,9 +50,10 @@ static uint8_t pca955x_pin_get_config(PCA955xState *s, int pin) static void pca955x_update_pin_input(PCA955xState *s) { + PCA955xClass *k = PCA955X_GET_CLASS(s); int i; - for (i = 0; i < s->pin_count; i++) { + for (i = 0; i < k->pin_count; i++) { uint8_t input_reg = PCA9552_INPUT0 + (i / 8); uint8_t input_shift = (i % 8); uint8_t config = pca955x_pin_get_config(s, i); @@ -112,10 +128,12 @@ static void pca955x_write(PCA955xState *s, uint8_t reg, uint8_t data) */ static void pca955x_autoinc(PCA955xState *s) { + PCA955xClass *k = PCA955X_GET_CLASS(s); + if (s->pointer != 0xFF && s->pointer & PCA9552_AUTOINC) { uint8_t reg = s->pointer & 0xf; - reg = (reg + 1) % (s->max_reg + 1); + reg = (reg + 1) % (k->max_reg + 1); s->pointer = reg | PCA9552_AUTOINC; } } @@ -176,6 +194,7 @@ static int pca955x_event(I2CSlave *i2c, enum i2c_event event) static void pca955x_get_led(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { + PCA955xClass *k = PCA955X_GET_CLASS(obj); PCA955xState *s = PCA955X(obj); int led, rc, reg; uint8_t state; @@ -185,7 +204,7 @@ static void pca955x_get_led(Object *obj, Visitor *v, const char *name, error_setg(errp, "%s: error reading %s", __func__, name); return; } - if (led < 0 || led > s->pin_count) { + if (led < 0 || led > k->pin_count) { error_setg(errp, "%s invalid led %s", __func__, name); return; } @@ -212,6 +231,7 @@ static inline uint8_t pca955x_ledsel(uint8_t oldval, int led_num, int state) static void pca955x_set_led(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { + PCA955xClass *k = PCA955X_GET_CLASS(obj); PCA955xState *s = PCA955X(obj); Error *local_err = NULL; int led, rc, reg, val; @@ -228,7 +248,7 @@ static void pca955x_set_led(Object *obj, Visitor *v, const char *name, error_setg(errp, "%s: error reading %s", __func__, name); return; } - if (led < 0 || led > s->pin_count) { + if (led < 0 || led > k->pin_count) { error_setg(errp, "%s invalid led %s", __func__, name); return; } @@ -283,17 +303,11 @@ static void pca9552_reset(DeviceState *dev) static void pca955x_initfn(Object *obj) { - PCA955xState *s = PCA955X(obj); + PCA955xClass *k = PCA955X_GET_CLASS(obj); int led; - /* If support for the other PCA955X devices are implemented, these - * constant values might be part of class structure describing the - * PCA955X device - */ - s->max_reg = PCA9552_LS3; - s->pin_count = 16; - - for (led = 0; led < s->pin_count; led++) { + assert(k->pin_count <= PCA955X_PIN_COUNT_MAX); + for (led = 0; led < k->pin_count; led++) { char *name; name = g_strdup_printf("led%d", led); @@ -303,28 +317,44 @@ static void pca955x_initfn(Object *obj) } } -static void pca9552_class_init(ObjectClass *klass, void *data) +static void pca955x_class_init(ObjectClass *klass, void *data) { - DeviceClass *dc = DEVICE_CLASS(klass); I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); k->event = pca955x_event; k->recv = pca955x_recv; k->send = pca955x_send; +} + +static const TypeInfo pca955x_info = { + .name = TYPE_PCA955X, + .parent = TYPE_I2C_SLAVE, + .instance_init = pca955x_initfn, + .instance_size = sizeof(PCA955xState), + .class_init = pca955x_class_init, + .abstract = true, +}; + +static void pca9552_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCA955xClass *pc = PCA955X_CLASS(oc); + dc->reset = pca9552_reset; dc->vmsd = &pca9552_vmstate; + pc->max_reg = PCA9552_LS3; + pc->pin_count = 16; } static const TypeInfo pca9552_info = { .name = TYPE_PCA9552, - .parent = TYPE_I2C_SLAVE, - .instance_init = pca955x_initfn, - .instance_size = sizeof(PCA955xState), + .parent = TYPE_PCA955X, .class_init = pca9552_class_init, }; static void pca955x_register_types(void) { + type_register_static(&pca955x_info); type_register_static(&pca9552_info); } diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h index db527595a3..90843b03b8 100644 --- a/include/hw/misc/pca9552.h +++ b/include/hw/misc/pca9552.h @@ -12,9 +12,11 @@ #include "hw/i2c/i2c.h" #define TYPE_PCA9552 "pca9552" -#define PCA955X(obj) OBJECT_CHECK(PCA955xState, (obj), TYPE_PCA9552) +#define TYPE_PCA955X "pca955x" +#define PCA955X(obj) OBJECT_CHECK(PCA955xState, (obj), TYPE_PCA955X) #define PCA955X_NR_REGS 10 +#define PCA955X_PIN_COUNT_MAX 16 typedef struct PCA955xState { /*< private >*/ @@ -25,8 +27,6 @@ typedef struct PCA955xState { uint8_t pointer; uint8_t regs[PCA955X_NR_REGS]; - uint8_t max_reg; - uint8_t pin_count; } PCA955xState; #endif From 2df252d8793097386d4972640653da93f7857da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:19 +0200 Subject: [PATCH 08/57] hw/misc/pca9552: Add a 'description' property for debugging purpose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a description field to distinguish between multiple devices. Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-6-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 18 ++++++++++++++++++ include/hw/misc/pca9552.h | 1 + 2 files changed, 19 insertions(+) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 4de57dbe2e..2cc52b0205 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/module.h" +#include "hw/qdev-properties.h" #include "hw/misc/pca9552.h" #include "hw/misc/pca9552_regs.h" #include "migration/vmstate.h" @@ -317,13 +318,30 @@ static void pca955x_initfn(Object *obj) } } +static void pca955x_realize(DeviceState *dev, Error **errp) +{ + PCA955xState *s = PCA955X(dev); + + if (!s->description) { + s->description = g_strdup("pca-unspecified"); + } +} + +static Property pca955x_properties[] = { + DEFINE_PROP_STRING("description", PCA955xState, description), + DEFINE_PROP_END_OF_LIST(), +}; + static void pca955x_class_init(ObjectClass *klass, void *data) { + DeviceClass *dc = DEVICE_CLASS(klass); I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); k->event = pca955x_event; k->recv = pca955x_recv; k->send = pca955x_send; + dc->realize = pca955x_realize; + device_class_set_props(dc, pca955x_properties); } static const TypeInfo pca955x_info = { diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h index 90843b03b8..bf1a589137 100644 --- a/include/hw/misc/pca9552.h +++ b/include/hw/misc/pca9552.h @@ -27,6 +27,7 @@ typedef struct PCA955xState { uint8_t pointer; uint8_t regs[PCA955X_NR_REGS]; + char *description; /* For debugging purpose only */ } PCA955xState; #endif From b989b89f672597cc0e780b5ce95e604d4f49247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:20 +0200 Subject: [PATCH 09/57] hw/misc/pca9552: Trace GPIO High/Low events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a trivial representation of the PCA9552 GPIOs. Example booting obmc-phosphor-image: $ qemu-system-arm -M witherspoon-bmc -trace pca955x_gpio_status 1592689902.327837:pca955x_gpio_status pca-unspecified GPIOs 0-15 [*...............] 1592689902.329934:pca955x_gpio_status pca-unspecified GPIOs 0-15 [**..............] 1592689902.330717:pca955x_gpio_status pca-unspecified GPIOs 0-15 [***.............] 1592689902.331431:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****............] 1592689902.332163:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........*..] 1592689902.332888:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........**.] 1592689902.333629:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........***] 1592690032.793289:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........*.*] 1592690033.303163:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........***] 1592690033.812962:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........*.*] 1592690034.323234:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........***] 1592690034.832922:pca955x_gpio_status pca-unspecified GPIOs 0-15 [****.........*.*] We notice the GPIO #14 (front-power LED) starts to blink. This LED is described in the witherspoon device-tree [*]: front-power { retain-state-shutdown; default-state = "keep"; gpios = <&pca0 14 GPIO_ACTIVE_LOW>; }; [*] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts?id=b1f9be9392f0#n140 Suggested-by: Cédric Le Goater Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-7-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 39 +++++++++++++++++++++++++++++++++++++++ hw/misc/trace-events | 3 +++ 2 files changed, 42 insertions(+) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 2cc52b0205..41f8ad213d 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -13,12 +13,14 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/module.h" +#include "qemu/bitops.h" #include "hw/qdev-properties.h" #include "hw/misc/pca9552.h" #include "hw/misc/pca9552_regs.h" #include "migration/vmstate.h" #include "qapi/error.h" #include "qapi/visitor.h" +#include "trace.h" typedef struct PCA955xClass { /*< private >*/ @@ -49,6 +51,39 @@ static uint8_t pca955x_pin_get_config(PCA955xState *s, int pin) return extract32(s->regs[reg], shift, 2); } +/* Return INPUT status (bit #N belongs to GPIO #N) */ +static uint16_t pca955x_pins_get_status(PCA955xState *s) +{ + return (s->regs[PCA9552_INPUT1] << 8) | s->regs[PCA9552_INPUT0]; +} + +static void pca955x_display_pins_status(PCA955xState *s, + uint16_t previous_pins_status) +{ + PCA955xClass *k = PCA955X_GET_CLASS(s); + uint16_t pins_status, pins_changed; + int i; + + pins_status = pca955x_pins_get_status(s); + pins_changed = previous_pins_status ^ pins_status; + if (!pins_changed) { + return; + } + if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_STATUS)) { + char *buf = g_newa(char, k->pin_count + 1); + + for (i = 0; i < k->pin_count; i++) { + if (extract32(pins_status, i, 1)) { + buf[i] = '*'; + } else { + buf[i] = '.'; + } + } + buf[i] = '\0'; + trace_pca955x_gpio_status(s->description, buf); + } +} + static void pca955x_update_pin_input(PCA955xState *s) { PCA955xClass *k = PCA955X_GET_CLASS(s); @@ -98,6 +133,8 @@ static uint8_t pca955x_read(PCA955xState *s, uint8_t reg) static void pca955x_write(PCA955xState *s, uint8_t reg, uint8_t data) { + uint16_t pins_status; + switch (reg) { case PCA9552_PSC0: case PCA9552_PWM0: @@ -110,8 +147,10 @@ static void pca955x_write(PCA955xState *s, uint8_t reg, uint8_t data) case PCA9552_LS1: case PCA9552_LS2: case PCA9552_LS3: + pins_status = pca955x_pins_get_status(s); s->regs[reg] = data; pca955x_update_pin_input(s); + pca955x_display_pins_status(s, pins_status); break; case PCA9552_INPUT0: diff --git a/hw/misc/trace-events b/hw/misc/trace-events index 68a6d9f2ab..bd7bd37ea8 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -209,3 +209,6 @@ via1_adb_poll(uint8_t data, const char *vadbint, int status, int index, int size # grlib_ahb_apb_pnp.c grlib_ahb_pnp_read(uint64_t addr, uint32_t value) "AHB PnP read addr:0x%03"PRIx64" data:0x%08x" grlib_apb_pnp_read(uint64_t addr, uint32_t value) "APB PnP read addr:0x%03"PRIx64" data:0x%08x" + +# pca9552.c +pca955x_gpio_status(const char *description, const char *buf) "%s GPIOs 0-15 [%s]" From 15ce12cfdd3740d38a7ab19ad0f5c812ff649fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:21 +0200 Subject: [PATCH 10/57] hw/arm/aspeed: Describe each PCA9552 device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have 2 distinct PCA9552 devices. Set their description to distinguish them when looking at the trace events. Description name taken from: https://github.com/open-power/witherspoon-xml/blob/master/witherspoon.xml Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Corey Minyard Reviewed-by: Markus Armbruster Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-8-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/aspeed.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 665d04fbf6..379f9672a5 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -508,12 +508,15 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; uint8_t *eeprom_buf = g_malloc0(8 * 1024); + DeviceState *dev; /* Bus 3: TODO bmp280@77 */ /* Bus 3: TODO max31785@52 */ /* Bus 3: TODO dps310@76 */ - i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 3), TYPE_PCA9552, - 0x60); + dev = i2c_try_create_slave(TYPE_PCA9552, 0x60); + qdev_prop_set_string(dev, "description", "pca1"); + i2c_realize_and_unref(dev, aspeed_i2c_get_bus(DEVICE(&soc->i2c), 3), + &error_fatal); i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "tmp423", 0x4c); i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 5), "tmp423", 0x4c); @@ -528,8 +531,10 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc) smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), 0x51, eeprom_buf); - i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), TYPE_PCA9552, - 0x60); + dev = i2c_try_create_slave(TYPE_PCA9552, 0x60); + qdev_prop_set_string(dev, "description", "pca0"); + i2c_realize_and_unref(dev, aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), + &error_fatal); /* Bus 11: TODO ucd90160@64 */ } From d82ab2931d14c1d8aa27a330b03fe5bf944130cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:22 +0200 Subject: [PATCH 11/57] hw/misc/pca9552: Trace GPIO change events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emit a trace event when a GPIO change its state. Example booting obmc-phosphor-image: $ qemu-system-arm -M witherspoon-bmc -trace pca955x_gpio_change 1592690552.687372:pca955x_gpio_change pca1 GPIO id:0 status: 0 -> 1 1592690552.690169:pca955x_gpio_change pca1 GPIO id:1 status: 0 -> 1 1592690552.691673:pca955x_gpio_change pca1 GPIO id:2 status: 0 -> 1 1592690552.696886:pca955x_gpio_change pca1 GPIO id:3 status: 0 -> 1 1592690552.698614:pca955x_gpio_change pca1 GPIO id:13 status: 0 -> 1 1592690552.699833:pca955x_gpio_change pca1 GPIO id:14 status: 0 -> 1 1592690552.700842:pca955x_gpio_change pca1 GPIO id:15 status: 0 -> 1 1592690683.841921:pca955x_gpio_change pca1 GPIO id:14 status: 1 -> 0 1592690683.861660:pca955x_gpio_change pca1 GPIO id:14 status: 0 -> 1 1592690684.371460:pca955x_gpio_change pca1 GPIO id:14 status: 1 -> 0 1592690684.882115:pca955x_gpio_change pca1 GPIO id:14 status: 0 -> 1 1592690685.391411:pca955x_gpio_change pca1 GPIO id:14 status: 1 -> 0 1592690685.901391:pca955x_gpio_change pca1 GPIO id:14 status: 0 -> 1 1592690686.411678:pca955x_gpio_change pca1 GPIO id:14 status: 1 -> 0 1592690686.921279:pca955x_gpio_change pca1 GPIO id:14 status: 0 -> 1 We notice the GPIO #14 (front-power LED) starts to blink. This LED is described in the witherspoon device-tree [*]: front-power { retain-state-shutdown; default-state = "keep"; gpios = <&pca0 14 GPIO_ACTIVE_LOW>; }; [*] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm/boot/dts/aspeed-bmc-opp-witherspoon.dts?id=b1f9be9392f0#n140 Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-9-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 15 +++++++++++++++ hw/misc/trace-events | 1 + 2 files changed, 16 insertions(+) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 41f8ad213d..1c3ad57432 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -82,6 +82,21 @@ static void pca955x_display_pins_status(PCA955xState *s, buf[i] = '\0'; trace_pca955x_gpio_status(s->description, buf); } + if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_CHANGE)) { + for (i = 0; i < k->pin_count; i++) { + if (extract32(pins_changed, i, 1)) { + unsigned new_state = extract32(pins_status, i, 1); + + /* + * We display the state using the PCA logic ("active-high"). + * This is not the state of the LED, which signal might be + * wired "active-low" on the board. + */ + trace_pca955x_gpio_change(s->description, i, + !new_state, new_state); + } + } + } } static void pca955x_update_pin_input(PCA955xState *s) diff --git a/hw/misc/trace-events b/hw/misc/trace-events index bd7bd37ea8..ebea53735c 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -212,3 +212,4 @@ grlib_apb_pnp_read(uint64_t addr, uint32_t value) "APB PnP read addr:0x%03"PRIx6 # pca9552.c pca955x_gpio_status(const char *description, const char *buf) "%s GPIOs 0-15 [%s]" +pca955x_gpio_change(const char *description, unsigned id, unsigned prev_state, unsigned current_state) "%s GPIO id:%u status: %u -> %u" From 586f495b1e78c27e141ff432dd971eb41866fb80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 23 Jun 2020 09:27:23 +0200 Subject: [PATCH 12/57] hw/misc/pca9552: Model qdev output GPIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCA9552 has 16 GPIOs which can be used as input, output or PWM mode. QEMU models the output GPIO with the qemu_irq type. Let the device expose the 16 GPIOs to allow us to later connect LEDs to these outputs. Reviewed-by: Cédric Le Goater Signed-off-by: Philippe Mathieu-Daudé Tested-by: Cédric Le Goater Message-id: 20200623072723.6324-10-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/misc/pca9552.c | 6 ++++++ include/hw/misc/pca9552.h | 1 + 2 files changed, 7 insertions(+) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index 1c3ad57432..80caa9ec8f 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -17,6 +17,7 @@ #include "hw/qdev-properties.h" #include "hw/misc/pca9552.h" #include "hw/misc/pca9552_regs.h" +#include "hw/irq.h" #include "migration/vmstate.h" #include "qapi/error.h" #include "qapi/visitor.h" @@ -111,9 +112,11 @@ static void pca955x_update_pin_input(PCA955xState *s) switch (config) { case PCA9552_LED_ON: + qemu_set_irq(s->gpio[i], 1); s->regs[input_reg] |= 1 << input_shift; break; case PCA9552_LED_OFF: + qemu_set_irq(s->gpio[i], 0); s->regs[input_reg] &= ~(1 << input_shift); break; case PCA9552_LED_PWM0: @@ -374,11 +377,14 @@ static void pca955x_initfn(Object *obj) static void pca955x_realize(DeviceState *dev, Error **errp) { + PCA955xClass *k = PCA955X_GET_CLASS(dev); PCA955xState *s = PCA955X(dev); if (!s->description) { s->description = g_strdup("pca-unspecified"); } + + qdev_init_gpio_out(dev, s->gpio, k->pin_count); } static Property pca955x_properties[] = { diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h index bf1a589137..600356fbf9 100644 --- a/include/hw/misc/pca9552.h +++ b/include/hw/misc/pca9552.h @@ -27,6 +27,7 @@ typedef struct PCA955xState { uint8_t pointer; uint8_t regs[PCA955X_NR_REGS]; + qemu_irq gpio[PCA955X_PIN_COUNT_MAX]; char *description; /* For debugging purpose only */ } PCA955xState; From c7fd0baac0c24defec66263799faa8618327b352 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:30:59 -0700 Subject: [PATCH 13/57] target/arm: Add isar tests for mte Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index cf66b8c7fb..ff70115801 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3814,6 +3814,16 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; } +static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; +} + +static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; +} + static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && From 252e8c69669599b4bcff802df300726300292f47 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:00 -0700 Subject: [PATCH 14/57] target/arm: Improve masking of SCR RES0 bits Protect reads of aa64 id registers with ARM_CP_STATE_AA64. Use this as a simpler test than arm_el_is_aa64, since EL3 cannot change mode. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 972a766730..a29f0a28d8 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -2011,9 +2011,16 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) uint32_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); - if (arm_el_is_aa64(env, 3)) { + if (ri->state == ARM_CP_STATE_AA64) { value |= SCR_FW | SCR_AW; /* these two bits are RES1. */ valid_mask &= ~SCR_NET; + + if (cpu_isar_feature(aa64_lor, cpu)) { + valid_mask |= SCR_TLOR; + } + if (cpu_isar_feature(aa64_pauth, cpu)) { + valid_mask |= SCR_API | SCR_APK; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); } @@ -2032,12 +2039,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) valid_mask &= ~SCR_SMD; } } - if (cpu_isar_feature(aa64_lor, cpu)) { - valid_mask |= SCR_TLOR; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - valid_mask |= SCR_API | SCR_APK; - } /* Clear all-context RES0 bits. */ value &= valid_mask; From f00faf130d5dcf64b04f71a95f14745845ca1014 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:01 -0700 Subject: [PATCH 15/57] target/arm: Add support for MTE to SCTLR_ELx This does not attempt to rectify all of the res0 bits, but does clear the mte bits when not enabled. Since there is no high-part mapping of SCTLR, aa32 mode cannot write to these bits. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index a29f0a28d8..8a0fb01581 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4698,6 +4698,22 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = env_archcpu(env); + if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) { + /* M bit is RAZ/WI for PMSA with no MPU implemented */ + value &= ~SCTLR_M; + } + + /* ??? Lots of these bits are not implemented. */ + + if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) { + if (ri->opc1 == 6) { /* SCTLR_EL3 */ + value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA); + } else { + value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF | + SCTLR_ATA0 | SCTLR_ATA); + } + } + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. @@ -4705,13 +4721,8 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, return; } - if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) { - /* M bit is RAZ/WI for PMSA with no MPU implemented */ - value &= ~SCTLR_M; - } - raw_write(env, ri, value); - /* ??? Lots of these bits are not implemented. */ + /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); From 8ddb300bf60a5f3d358dd6fbf81174f6c03c1d9f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:02 -0700 Subject: [PATCH 16/57] target/arm: Add support for MTE to HCR_EL2 and SCR_EL3 Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 8a0fb01581..d6c326b58e 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -2021,6 +2021,9 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (cpu_isar_feature(aa64_pauth, cpu)) { valid_mask |= SCR_API | SCR_APK; } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= SCR_ATA; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); } @@ -5248,17 +5251,22 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_pauth, cpu)) { valid_mask |= HCR_API | HCR_APK; } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5; + } } /* Clear RES0 bits. */ value &= valid_mask; - /* These bits change the MMU setup: + /* + * These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups - * HCR_DC Disables stage1 and enables stage2 translation + * HCR_DC disables stage1 and enables stage2 translation + * HCR_DCT enables tagging on (disabled) stage1 translation */ - if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { + if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT)) { tlb_flush(CPU(cpu)); } env->cp15.hcr_el2 = value; From 14407ec2007e18536ed34772eef46f6e0a0e3d0e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:03 -0700 Subject: [PATCH 17/57] target/arm: Rename DISAS_UPDATE to DISAS_UPDATE_EXIT Emphasize that the is_jmp option exits to the main loop. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 8 ++++---- target/arm/translate-vfp.inc.c | 4 ++-- target/arm/translate.c | 12 ++++++------ target/arm/translate.h | 14 ++++++++------ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 4cef862c41..e4795ae100 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1616,7 +1616,7 @@ static void handle_msr_i(DisasContext *s, uint32_t insn, gen_helper_msr_i_daifclear(cpu_env, t1); tcg_temp_free_i32(t1); /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; break; default: @@ -1795,7 +1795,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { /* I/O operations must end the TB here (whether read or write) */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { /* @@ -1810,7 +1810,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } } @@ -14292,7 +14292,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, dc->base.pc_next); break; default: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_a64_set_pc_im(dc->base.pc_next); /* fall through */ case DISAS_EXIT: diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index bf31b18657..afa8a5f888 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -123,7 +123,7 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) * this to be the last insn in the TB). */ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; gen_io_start(); } gen_helper_v7m_preserve_fp_state(cpu_env); @@ -2860,6 +2860,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) tcg_temp_free_i32(fptr); /* End the TB, because we have updated FP control bits */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index 795964da1f..146ff5ddc2 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -2775,7 +2775,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_tgtmode); tcg_temp_free_i32(tcg_regno); tcg_temp_free_i32(tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) @@ -2797,7 +2797,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_tgtmode); tcg_temp_free_i32(tcg_regno); store_reg(s, rn, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Store value to PC as for an exception return (ie don't @@ -5114,7 +5114,7 @@ static void gen_srs(DisasContext *s, tcg_temp_free_i32(tmp); } tcg_temp_free_i32(addr); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Generate a label used for skipping this instruction */ @@ -8160,7 +8160,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a) } if (a->E != (s->be_data == MO_BE)) { gen_helper_setend(cpu_env); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } return true; } @@ -8873,7 +8873,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; case DISAS_NEXT: case DISAS_TOO_MANY: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: @@ -8900,7 +8900,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_JUMP: gen_goto_ptr(); break; - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: diff --git a/target/arm/translate.h b/target/arm/translate.h index 19650a9e2d..d5edef2943 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -148,7 +148,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ -#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ +/* CPU state was modified dynamically; exit to main loop for interrupts. */ +#define DISAS_UPDATE_EXIT DISAS_TARGET_1 /* These instructions trap after executing, so the A32/T32 decoder must * defer them until after the conditional execution state has been updated. * WFI also needs special handling when single-stepping. @@ -164,11 +165,12 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET DISAS_TARGET_8 -/* For instructions which want an immediate exit to the main loop, - * as opposed to attempting to use lookup_and_goto_ptr. Unlike - * DISAS_UPDATE this doesn't write the PC on exiting the translation - * loop so you need to ensure something (gen_a64_set_pc_im or runtime - * helper) has done so before we reach return from cpu_tb_exec. +/* + * For instructions which want an immediate exit to the main loop, as opposed + * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this + * doesn't write the PC on exiting the translation loop so you need to ensure + * something (gen_a64_set_pc_im or runtime helper) has done so before we reach + * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 From 329833286d7a1b0ef8c7daafe13c6ae32429694e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:04 -0700 Subject: [PATCH 18/57] target/arm: Add DISAS_UPDATE_NOCHAIN Add an option that writes back the PC, like DISAS_UPDATE_EXIT, but does not exit back to the main loop. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 3 +++ target/arm/translate.c | 4 ++++ target/arm/translate.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e4795ae100..027be7d8c2 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14298,6 +14298,9 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_EXIT: tcg_gen_exit_tb(NULL, 0); break; + case DISAS_UPDATE_NOCHAIN: + gen_a64_set_pc_im(dc->base.pc_next); + /* fall through */ case DISAS_JUMP: tcg_gen_lookup_and_goto_ptr(); break; diff --git a/target/arm/translate.c b/target/arm/translate.c index 146ff5ddc2..c39a929b93 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8874,6 +8874,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_NEXT: case DISAS_TOO_MANY: case DISAS_UPDATE_EXIT: + case DISAS_UPDATE_NOCHAIN: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: @@ -8897,6 +8898,9 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_TOO_MANY: gen_goto_tb(dc, 1, dc->base.pc_next); break; + case DISAS_UPDATE_NOCHAIN: + gen_set_pc_im(dc, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: gen_goto_ptr(); break; diff --git a/target/arm/translate.h b/target/arm/translate.h index d5edef2943..6dfe24cedc 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -173,6 +173,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 +/* CPU state was modified dynamically; no need to exit, but do not chain. */ +#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10 #ifdef TARGET_AARCH64 void a64_translate_init(void); From 4b779cebb3e5ab30b945181f1ba3932f5f8a1cb5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:05 -0700 Subject: [PATCH 19/57] target/arm: Add MTE system registers This is TFSRE0_EL1, TFSR_EL1, TFSR_EL2, TFSR_EL3, RGSR_EL1, GCR_EL1, GMID_EL1, and PSTATE.TCO. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-8-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 4 ++ target/arm/helper.c | 94 ++++++++++++++++++++++++++++++++++++++ target/arm/internals.h | 9 ++++ target/arm/translate-a64.c | 21 +++++++++ 4 files changed, 128 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index ff70115801..0a98b6a06d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -502,6 +502,9 @@ typedef struct CPUARMState { uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */ uint64_t vpidr_el2; /* Virtualization Processor ID Register */ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */ + uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */ + uint64_t gcr_el1; + uint64_t rgsr_el1; } cp15; struct { @@ -1282,6 +1285,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_SS (1U << 21) #define PSTATE_PAN (1U << 22) #define PSTATE_UAO (1U << 23) +#define PSTATE_TCO (1U << 25) #define PSTATE_V (1U << 28) #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) diff --git a/target/arm/helper.c b/target/arm/helper.c index d6c326b58e..b4842ea23e 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5881,6 +5881,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, + { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), + "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, + /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ }; @@ -6855,6 +6858,86 @@ static const ARMCPRegInfo dcpodp_reg[] = { }; #endif /*CONFIG_USER_ONLY*/ +static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) { + return CP_ACCESS_TRAP_EL2; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + + if (el < 2 && + arm_feature(env, ARM_FEATURE_EL2) && + !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && + arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ATA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pstate & PSTATE_TCO; +} + +static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO); +} + +static const ARMCPRegInfo mte_reginfo[] = { + { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) }, + { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, + { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL2_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) }, + { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) }, + { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) }, + { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) }, + { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL1_R, .accessfn = access_aa64_tid5, + .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS }, + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_NO_RAW, + .access = PL0_RW, .readfn = tco_read, .writefn = tco_write }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_tco_ro_reginfo[] = { + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_CONST, .access = PL0_RW, }, + REGINFO_SENTINEL +}; #endif static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, @@ -7980,6 +8063,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) } } #endif /*CONFIG_USER_ONLY*/ + + /* + * If full MTE is enabled, add all of the system registers. + * If only "instructions available at EL0" are enabled, + * then define only a RAZ/WI version of PSTATE.TCO. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mte_reginfo); + } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { + define_arm_cp_regs(cpu, mte_tco_ro_reginfo); + } #endif if (cpu_isar_feature(any_predinv, cpu)) { diff --git a/target/arm/internals.h b/target/arm/internals.h index 4bdbc3a8ac..56b4672685 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1159,6 +1159,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) if (isar_feature_aa64_uao(id)) { valid |= PSTATE_UAO; } + if (isar_feature_aa64_mte(id)) { + valid |= PSTATE_TCO; + } return valid; } @@ -1234,4 +1237,10 @@ void arm_log_exception(int idx); #endif /* !CONFIG_USER_ONLY */ +/* + * The log2 of the words in the tag block, for GMID_EL1.BS. + * The is the maximum, 256 bytes, which manipulates 64-bits of tags. + */ +#define GMID_EL1_BS 6 + #endif diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 027be7d8c2..d4793dd8fe 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1619,6 +1619,27 @@ static void handle_msr_i(DisasContext *s, uint32_t insn, s->base.is_jmp = DISAS_UPDATE_EXIT; break; + case 0x1c: /* TCO */ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (crm & 1) { + set_pstate_bits(PSTATE_TCO); + } else { + clear_pstate_bits(PSTATE_TCO); + } + t1 = tcg_const_i32(s->current_el); + gen_helper_rebuild_hflags_a64(cpu_env, t1); + tcg_temp_free_i32(t1); + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + s->base.is_jmp = DISAS_NEXT; + } else { + goto do_unallocated; + } + break; + default: do_unallocated: unallocated_encoding(s); From 81ae05fa2d21ac1a0054935b74342aa38a5ecef7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:06 -0700 Subject: [PATCH 20/57] target/arm: Add MTE bits to tb_flags Cache the composite ATA setting. Cache when MTE is fully enabled, i.e. access to tags are enabled and tag checks affect the PE. Do this for both the normal context and the UNPRIV context. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 12 ++++++++---- target/arm/helper.c | 40 ++++++++++++++++++++++++++++++++++++++ target/arm/internals.h | 18 +++++++++++++++++ target/arm/translate-a64.c | 4 ++++ target/arm/translate.h | 5 +++++ 5 files changed, 75 insertions(+), 4 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 0a98b6a06d..cb4f6ba69f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3187,10 +3187,10 @@ typedef ARMCPU ArchCPU; * | | | TBFLAG_A32 | | * | | +-----+----------+ TBFLAG_AM32 | * | TBFLAG_ANY | |TBFLAG_M32| | - * | | +-+----------+--------------| - * | | | TBFLAG_A64 | - * +--------------+---------+---------------------------+ - * 31 20 15 0 + * | +-----------+----------+--------------| + * | | TBFLAG_A64 | + * +--------------+-------------------------------------+ + * 31 20 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3257,6 +3257,10 @@ FIELD(TBFLAG_A64, BT, 9, 1) FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ FIELD(TBFLAG_A64, TBID, 12, 2) FIELD(TBFLAG_A64, UNPRIV, 14, 1) +FIELD(TBFLAG_A64, ATA, 15, 1) +FIELD(TBFLAG_A64, TCMA, 16, 2) +FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) +FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) /** * cpu_mmu_index: diff --git a/target/arm/helper.c b/target/arm/helper.c index b4842ea23e..2c6ec244af 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -10655,6 +10655,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) } } +static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) +{ + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 57, 2); + } else { + /* Replicate the single TCMA bit so we always have 2 bits. */ + return extract32(tcr, 30, 1) * 3; + } +} + ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { @@ -12679,6 +12689,36 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } } + if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { + /* + * Set MTE_ACTIVE if any access may be Checked, and leave clear + * if all accesses must be Unchecked: + * 1) If no TBI, then there are no tags in the address to check, + * 2) If Tag Check Override, then all accesses are Unchecked, + * 3) If Tag Check Fail == 0, then Checked access have no effect, + * 4) If no Allocation Tag Access, then all accesses are Unchecked. + */ + if (allocation_tag_access_enabled(env, el, sctlr)) { + flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1); + if (tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { + flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1); + } + } + /* And again for unprivileged accesses, if required. */ + if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + && tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & SCTLR_TCF0) + && allocation_tag_access_enabled(env, 0, sctlr)) { + flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1); + } + /* Cache TCMA as well as TBI. */ + flags = FIELD_DP32(flags, TBFLAG_A64, TCMA, + aa64_va_parameter_tcma(tcr, mmu_idx)); + } + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } diff --git a/target/arm/internals.h b/target/arm/internals.h index 56b4672685..53e249687b 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1198,6 +1198,24 @@ static inline int exception_target_el(CPUARMState *env) return target_el; } +/* Determine if allocation tags are available. */ +static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, + uint64_t sctlr) +{ + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ATA)) { + return false; + } + if (el < 2 + && arm_feature(env, ARM_FEATURE_EL2) + && !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return false; + } + sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); + return sctlr != 0; +} + #ifndef CONFIG_USER_ONLY /* Security attributes for an address, as returned by v8m_security_lookup. */ diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index d4793dd8fe..55f49585be 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14171,6 +14171,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); + dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); @@ -14182,6 +14183,9 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); + dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); + dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); + dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; diff --git a/target/arm/translate.h b/target/arm/translate.h index 6dfe24cedc..98bcc37c47 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -30,6 +30,7 @@ typedef struct DisasContext { ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ uint8_t tbii; /* TBI1|TBI0 for insns */ uint8_t tbid; /* TBI1|TBI0 for data */ + uint8_t tcma; /* TCMA1|TCMA0 for MTE */ bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ @@ -77,6 +78,10 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; + /* True if v8.5-MTE access to tags is enabled. */ + bool ata; + /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ + bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ From da54941f45b820cbaca72aa6efd5669b3dc86e2f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:07 -0700 Subject: [PATCH 21/57] target/arm: Implement the IRG instruction Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/Makefile.objs | 1 + target/arm/helper-a64.h | 2 ++ target/arm/internals.h | 5 +++ target/arm/mte_helper.c | 72 ++++++++++++++++++++++++++++++++++++++ target/arm/translate-a64.c | 18 ++++++++++ 5 files changed, 98 insertions(+) create mode 100644 target/arm/mte_helper.c diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs index 83febd232c..fa39fd7c83 100644 --- a/target/arm/Makefile.objs +++ b/target/arm/Makefile.objs @@ -86,3 +86,4 @@ obj-$(CONFIG_SOFTMMU) += psci.o obj-$(TARGET_AARCH64) += translate-a64.o helper-a64.o obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o obj-$(TARGET_AARCH64) += pauth_helper.o +obj-$(TARGET_AARCH64) += mte_helper.o diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 3df7c185aa..587ccbe42f 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -103,3 +103,5 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) + +DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) diff --git a/target/arm/internals.h b/target/arm/internals.h index 53e249687b..ae611a6ff5 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1261,4 +1261,9 @@ void arm_log_exception(int idx); */ #define GMID_EL1_BS 6 +static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) +{ + return deposit64(ptr, 56, 4, rtag); +} + #endif diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c new file mode 100644 index 0000000000..539a04de84 --- /dev/null +++ b/target/arm/mte_helper.c @@ -0,0 +1,72 @@ +/* + * ARM v8.5-MemTag Operations + * + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" + + +static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) +{ + if (exclude == 0xffff) { + return 0; + } + if (offset == 0) { + while (exclude & (1 << tag)) { + tag = (tag + 1) & 15; + } + } else { + do { + do { + tag = (tag + 1) & 15; + } while (exclude & (1 << tag)); + } while (--offset > 0); + } + return tag; +} + +uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) +{ + int rtag; + + /* + * Our IMPDEF choice for GCR_EL1.RRND==1 is to behave as if + * GCR_EL1.RRND==0, always producing deterministic results. + */ + uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); + int start = extract32(env->cp15.rgsr_el1, 0, 4); + int seed = extract32(env->cp15.rgsr_el1, 8, 16); + int offset, i; + + /* RandomTag */ + for (i = offset = 0; i < 4; ++i) { + /* NextRandomTagBit */ + int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ + extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); + seed = (top << 15) | (seed >> 1); + offset |= top << i; + } + rtag = choose_nonexcluded_tag(start, offset, exclude); + env->cp15.rgsr_el1 = rtag | (seed << 8); + + return address_with_allocation_tag(rn, rtag); +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 55f49585be..30683061f9 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -226,6 +226,12 @@ static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) return clean; } +/* Insert a zero tag into src, with the result at dst. */ +static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) +{ + tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -5284,6 +5290,18 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) case 3: /* SDIV */ handle_div(s, true, sf, rm, rn, rd); break; + case 4: /* IRG */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + if (s->ata) { + gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, + cpu_reg_sp(s, rn), cpu_reg(s, rm)); + } else { + gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), + cpu_reg_sp(s, rn)); + } + break; case 8: /* LSLV */ handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); break; From 21a8b343eaae63f6984f9a200092b0ea167647f1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:08 -0700 Subject: [PATCH 22/57] target/arm: Revise decoding for disas_add_sub_imm The current Arm ARM has adjusted the official decode of "Add/subtract (immediate)" so that the shift field is only bit 22, and bit 23 is part of the op1 field of the parent category "Data processing - immediate". Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-11-richard.henderson@linaro.org Suggested-by: Peter Maydell Signed-off-by: Richard Henderson Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 30683061f9..03aa092598 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3754,22 +3754,22 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) /* * Add/subtract (immediate) * - * 31 30 29 28 24 23 22 21 10 9 5 4 0 - * +--+--+--+-----------+-----+-------------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd | - * +--+--+--+-----------+-----+-------------+-----+-----+ + * 31 30 29 28 23 22 21 10 9 5 4 0 + * +--+--+--+-------------+--+-------------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | + * +--+--+--+-------------+--+-------------+-----+-----+ * * sf: 0 -> 32bit, 1 -> 64bit * op: 0 -> add , 1 -> sub * S: 1 -> set flags - * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12 + * sh: 1 -> LSL imm by 12 */ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) { int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); uint64_t imm = extract32(insn, 10, 12); - int shift = extract32(insn, 22, 2); + bool shift = extract32(insn, 22, 1); bool setflags = extract32(insn, 29, 1); bool sub_op = extract32(insn, 30, 1); bool is_64bit = extract32(insn, 31, 1); @@ -3778,15 +3778,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); TCGv_i64 tcg_result; - switch (shift) { - case 0x0: - break; - case 0x1: + if (shift) { imm <<= 12; - break; - default: - unallocated_encoding(s); - return; } tcg_result = tcg_temp_new_i64(); @@ -4174,7 +4167,7 @@ static void disas_data_proc_imm(DisasContext *s, uint32_t insn) case 0x20: case 0x21: /* PC-rel. addressing */ disas_pc_rel_adr(s, insn); break; - case 0x22: case 0x23: /* Add/subtract (immediate) */ + case 0x22: /* Add/subtract (immediate) */ disas_add_sub_imm(s, insn); break; case 0x24: /* Logical (immediate) */ From efbc78ad978763aedd11cb718eb1ff8db3fc9152 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:09 -0700 Subject: [PATCH 23/57] target/arm: Implement the ADDG, SUBG instructions Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 1 + target/arm/internals.h | 9 +++++++ target/arm/mte_helper.c | 10 ++++++++ target/arm/translate-a64.c | 51 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 587ccbe42f..6c116481e8 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -105,3 +105,4 @@ DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index ae611a6ff5..5c69d4e5a5 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1261,6 +1261,15 @@ void arm_log_exception(int idx); */ #define GMID_EL1_BS 6 +/* We associate one allocation tag per 16 bytes, the minimum. */ +#define LOG2_TAG_GRANULE 4 +#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) + +static inline int allocation_tag_from_addr(uint64_t ptr) +{ + return extract64(ptr, 56, 4); +} + static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) { return deposit64(ptr, 56, 4, rtag); diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 539a04de84..9ab9ed749d 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -70,3 +70,13 @@ uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) return address_with_allocation_tag(rn, rtag); } + +uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, + int32_t offset, uint32_t tag_offset) +{ + int start_tag = allocation_tag_from_addr(ptr); + uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); + int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); + + return address_with_allocation_tag(ptr + offset, rtag); +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 03aa092598..2ec02c8a5f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3808,6 +3808,54 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_result); } +/* + * Add/subtract (immediate, with tags) + * + * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * + * op: 0 -> add, 1 -> sub + */ +static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int uimm4 = extract32(insn, 10, 4); + int uimm6 = extract32(insn, 16, 6); + bool sub_op = extract32(insn, 30, 1); + TCGv_i64 tcg_rn, tcg_rd; + int imm; + + /* Test all of sf=1, S=0, o2=0, o3=0. */ + if ((insn & 0xa040c000u) != 0x80000000u || + !dc_isar_feature(aa64_mte_insn_reg, s)) { + unallocated_encoding(s); + return; + } + + imm = uimm6 << LOG2_TAG_GRANULE; + if (sub_op) { + imm = -imm; + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_rd = cpu_reg_sp(s, rd); + + if (s->ata) { + TCGv_i32 offset = tcg_const_i32(imm); + TCGv_i32 tag_offset = tcg_const_i32(uimm4); + + gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset); + tcg_temp_free_i32(tag_offset); + tcg_temp_free_i32(offset); + } else { + tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); + gen_address_with_allocation_tag0(tcg_rd, tcg_rd); + } +} + /* The input should be a value in the bottom e bits (with higher * bits zero); returns that value replicated into every element * of size e in a 64 bit integer. @@ -4170,6 +4218,9 @@ static void disas_data_proc_imm(DisasContext *s, uint32_t insn) case 0x22: /* Add/subtract (immediate) */ disas_add_sub_imm(s, insn); break; + case 0x23: /* Add/subtract (immediate, with tags) */ + disas_add_sub_imm_with_tags(s, insn); + break; case 0x24: /* Logical (immediate) */ disas_logic_imm(s, insn); break; From 438efea0bb639c9c2dfb42c8d9459e21aa183c8a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:10 -0700 Subject: [PATCH 24/57] target/arm: Implement the GMI instruction Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-13-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 2ec02c8a5f..ee9dfa8e43 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -5346,6 +5346,21 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) cpu_reg_sp(s, rn)); } break; + case 5: /* GMI */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 t1 = tcg_const_i64(1); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4); + tcg_gen_shl_i64(t1, t1, t2); + tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1); + + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + } + break; case 8: /* LSLV */ handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); break; From dad3015f55f8d48f84f0eae36021a9c6f9587e57 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:11 -0700 Subject: [PATCH 25/57] target/arm: Implement the SUBP instruction Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-14-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index ee9dfa8e43..abbcdbb53a 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -5315,19 +5315,39 @@ static void handle_crc32(DisasContext *s, */ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) { - unsigned int sf, rm, opcode, rn, rd; + unsigned int sf, rm, opcode, rn, rd, setflag; sf = extract32(insn, 31, 1); + setflag = extract32(insn, 29, 1); rm = extract32(insn, 16, 5); opcode = extract32(insn, 10, 6); rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - if (extract32(insn, 29, 1)) { + if (setflag && opcode != 0) { unallocated_encoding(s); return; } switch (opcode) { + case 0: /* SUBP(S) */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 tcg_n, tcg_m, tcg_d; + + tcg_n = read_cpu_reg_sp(s, rn, true); + tcg_m = read_cpu_reg_sp(s, rm, true); + tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); + tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); + tcg_d = cpu_reg(s, rd); + + if (setflag) { + gen_sub_CC(true, tcg_d, tcg_n, tcg_m); + } else { + tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); + } + } + break; case 2: /* UDIV */ handle_div(s, false, sf, rm, rn, rd); break; From 0d1762e931f8a694f261c604daba605bcda70928 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:12 -0700 Subject: [PATCH 26/57] target/arm: Define arm_cpu_do_unaligned_access for user-only Use the same code as system mode, so that we generate the same exception + syndrome for the unaligned access. For the moment, if MTE is enabled so that this path is reachable, this would generate a SIGSEGV in the user-only cpu_loop. Decoding the syndrome to produce the proper SIGBUS will be done later. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.c | 2 +- target/arm/tlb_helper.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index e44e18062c..d9b8ec791e 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2169,8 +2169,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->tlb_fill = arm_cpu_tlb_fill; cc->debug_excp_handler = arm_debug_excp_handler; cc->debug_check_watchpoint = arm_debug_check_watchpoint; -#if !defined(CONFIG_USER_ONLY) cc->do_unaligned_access = arm_cpu_do_unaligned_access; +#if !defined(CONFIG_USER_ONLY) cc->do_transaction_failed = arm_cpu_do_transaction_failed; cc->adjust_watchpoint_address = arm_adjust_watchpoint_address; #endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 7388494a55..522a6442a4 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -10,8 +10,6 @@ #include "internals.h" #include "exec/exec-all.h" -#if !defined(CONFIG_USER_ONLY) - static inline uint32_t merge_syn_data_abort(uint32_t template_syn, unsigned int target_el, bool same_el, bool ea, @@ -122,6 +120,8 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); } +#if !defined(CONFIG_USER_ONLY) + /* * arm_cpu_do_transaction_failed: handle a memory system error response * (eg "no device/memory present at address") by raising an external abort From c15294c1e36a7dd9b25bd54d98178e80f4b64bc1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:13 -0700 Subject: [PATCH 27/57] target/arm: Implement LDG, STG, ST2G instructions Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-16-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 7 ++ target/arm/helper.h | 2 + target/arm/mte_helper.c | 194 +++++++++++++++++++++++++++++++++++++ target/arm/op_helper.c | 16 +++ target/arm/translate-a64.c | 172 +++++++++++++++++++++++++++++++- 5 files changed, 386 insertions(+), 5 deletions(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 6c116481e8..2fa61b86fa 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -106,3 +106,10 @@ DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) +DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) diff --git a/target/arm/helper.h b/target/arm/helper.h index 2a20c8174c..759639a63a 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -96,6 +96,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) + DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 9ab9ed749d..7ec7930dfc 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -44,6 +44,40 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) return tag; } +/** + * allocation_tag_mem: + * @env: the cpu environment + * @ptr_mmu_idx: the addressing regime to use for the virtual address + * @ptr: the virtual address for which to look up tag memory + * @ptr_access: the access to use for the virtual address + * @ptr_size: the number of bytes in the normal memory access + * @tag_access: the access to use for the tag memory + * @tag_size: the number of bytes in the tag memory access + * @ra: the return address for exception handling + * + * Our tag memory is formatted as a sequence of little-endian nibbles. + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] + * for the higher addr. + * + * Here, resolve the physical address from the virtual address, and return + * a pointer to the corresponding tag byte. Exit with exception if the + * virtual address is not accessible for @ptr_access. + * + * The @ptr_size and @tag_size values may not have an obvious relation + * due to the alignment of @ptr, and the number of tag checks required. + * + * If there is no tag storage corresponding to @ptr, return NULL. + */ +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + int tag_size, uintptr_t ra) +{ + /* Tag storage not implemented. */ + return NULL; +} + uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) { int rtag; @@ -80,3 +114,163 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, return address_with_allocation_tag(ptr + offset, rtag); } + +static int load_tag1(uint64_t ptr, uint8_t *mem) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + return extract32(*mem, ofs, 4); +} + +uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + int rtag = 0; + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, 1, GETPC()); + + /* Load if page supports tags. */ + if (mem) { + rtag = load_tag1(ptr, mem); + } + + return address_with_allocation_tag(xt, rtag); +} + +static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) +{ + if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + cpu_mmu_index(env, false), ra); + g_assert_not_reached(); + } +} + +/* For use in a non-parallel context, store to the given nibble. */ +static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + *mem = deposit32(*mem, ofs, 4, tag); +} + +/* For use in a parallel context, atomically store to the given nibble. */ +static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + uint8_t old = atomic_read(mem); + + while (1) { + uint8_t new = deposit32(old, ofs, 4, tag); + uint8_t cmp = atomic_cmpxchg(mem, old, new); + if (likely(cmp == old)) { + return; + } + old = cmp; + } +} + +typedef void stg_store1(uint64_t, uint8_t *, int); + +static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + + check_tag_aligned(env, ptr, ra); + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page supports tags. */ + if (mem) { + store1(ptr, mem, allocation_tag_from_addr(xt)); + } +} + +void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + + check_tag_aligned(env, ptr, ra); + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); +} + +static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + int tag = allocation_tag_from_addr(xt); + uint8_t *mem1, *mem2; + + check_tag_aligned(env, ptr, ra); + + /* + * Trap if accessing an invalid page(s). + * This takes priority over !allocation_tag_access_enabled. + */ + if (ptr & TAG_GRANULE) { + /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + TAG_GRANULE, MMU_DATA_STORE, 1, ra); + mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, + MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page(s) support tags. */ + if (mem1) { + store1(TAG_GRANULE, mem1, tag); + } + if (mem2) { + store1(0, mem2, tag); + } + } else { + /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + if (mem1) { + tag |= tag << 4; + atomic_set(mem1, tag); + } + } +} + +void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + int in_page = -(ptr | TARGET_PAGE_MASK); + + check_tag_aligned(env, ptr, ra); + + if (likely(in_page >= 2 * TAG_GRANULE)) { + probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); + } else { + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); + probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); + } +} diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index eb0de080f1..b1065216b2 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -935,3 +935,19 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } + +void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, + uint32_t access_type, uint32_t mmu_idx, + uint32_t size) +{ + uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE); + uintptr_t ra = GETPC(); + + if (likely(size <= in_page)) { + probe_access(env, ptr, size, access_type, mmu_idx, ra); + } else { + probe_access(env, ptr, in_page, access_type, mmu_idx, ra); + probe_access(env, ptr + in_page, size - in_page, + access_type, mmu_idx, ra); + } +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index abbcdbb53a..436191c15c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -232,6 +232,19 @@ static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); } +static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, + MMUAccessType acc, int log2_size) +{ + TCGv_i32 t_acc = tcg_const_i32(acc); + TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s)); + TCGv_i32 t_size = tcg_const_i32(1 << log2_size); + + gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size); + tcg_temp_free_i32(t_acc); + tcg_temp_free_i32(t_idx); + tcg_temp_free_i32(t_size); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -3685,6 +3698,154 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) } } +/* + * Load/Store memory tags + * + * 31 30 29 24 22 21 12 10 5 0 + * +-----+-------------+-----+---+------+-----+------+------+ + * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | + * +-----+-------------+-----+---+------+-----+------+------+ + */ +static void disas_ldst_tag(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; + int op2 = extract32(insn, 10, 2); + int op1 = extract32(insn, 22, 2); + bool is_load = false, is_pair = false, is_zero = false; + int index = 0; + TCGv_i64 addr, clean_addr, tcg_rt; + + /* We checked insn bits [29:24,21] in the caller. */ + if (extract32(insn, 30, 2) != 3) { + goto do_unallocated; + } + + /* + * @index is a tri-state variable which has 3 states: + * < 0 : post-index, writeback + * = 0 : signed offset + * > 0 : pre-index, writeback + */ + switch (op1) { + case 0: + if (op2 != 0) { + /* STG */ + index = op2 - 2; + break; + } + goto do_unallocated; + case 1: + if (op2 != 0) { + /* STZG */ + is_zero = true; + index = op2 - 2; + } else { + /* LDG */ + is_load = true; + } + break; + case 2: + if (op2 != 0) { + /* ST2G */ + is_pair = true; + index = op2 - 2; + break; + } + goto do_unallocated; + case 3: + if (op2 != 0) { + /* STZ2G */ + is_pair = is_zero = true; + index = op2 - 2; + break; + } + goto do_unallocated; + + default: + do_unallocated: + unallocated_encoding(s); + return; + } + + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, rn, true); + if (index >= 0) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, offset); + } + + if (is_load) { + tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, rt); + if (s->ata) { + gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); + } else { + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_rt, addr); + } + } else { + tcg_rt = cpu_reg_sp(s, rt); + if (!s->ata) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(cpu_env, addr); + } else { + gen_helper_stg_stub(cpu_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); + } else { + gen_helper_stg_parallel(cpu_env, addr, tcg_rt); + } + } else { + if (is_pair) { + gen_helper_st2g(cpu_env, addr, tcg_rt); + } else { + gen_helper_stg(cpu_env, addr, tcg_rt); + } + } + } + + if (is_zero) { + TCGv_i64 clean_addr = clean_data_tbi(s, addr); + TCGv_i64 tcg_zero = tcg_const_i64(0); + int mem_index = get_mem_index(s); + int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + + tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, + MO_Q | MO_ALIGN_16); + for (i = 8; i < n; i += 8) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q); + } + tcg_temp_free_i64(tcg_zero); + } + + if (index != 0) { + /* pre-index or post-index */ + if (index < 0) { + /* post-index */ + tcg_gen_addi_i64(addr, addr, offset); + } + tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); + } +} + /* Loads and stores */ static void disas_ldst(DisasContext *s, uint32_t insn) { @@ -3709,13 +3870,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x0d: /* AdvSIMD load/store single structure */ disas_ldst_single_struct(s, insn); break; - case 0x19: /* LDAPR/STLR (unscaled immediate) */ - if (extract32(insn, 10, 2) != 0 || - extract32(insn, 21, 1) != 0) { + case 0x19: + if (extract32(insn, 21, 1) != 0) { + disas_ldst_tag(s, insn); + } else if (extract32(insn, 10, 2) == 0) { + disas_ldst_ldapr_stlr(s, insn); + } else { unallocated_encoding(s); - break; } - disas_ldst_ldapr_stlr(s, insn); break; default: unallocated_encoding(s); From 6439d67fc944cf29de94a160e9450a2063c7b515 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:14 -0700 Subject: [PATCH 28/57] target/arm: Implement the STGP instruction Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-17-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 436191c15c..e2295a371b 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2690,7 +2690,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) * +-----+-------+---+---+-------+---+-------+-------+------+------+ * * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW 01 + * LDPSW/STGP 01 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit * V: 0 -> GPR, 1 -> Vector * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, @@ -2715,6 +2715,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) bool is_signed = false; bool postindex = false; bool wback = false; + bool set_tag = false; TCGv_i64 clean_addr, dirty_addr; @@ -2727,6 +2728,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (is_vector) { size = 2 + opc; + } else if (opc == 1 && !is_load) { + /* STGP */ + if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { + unallocated_encoding(s); + return; + } + size = 3; + set_tag = true; } else { size = 2 + extract32(opc, 1, 1); is_signed = extract32(opc, 0, 1); @@ -2767,7 +2776,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) return; } - offset <<= size; + offset <<= (set_tag ? LOG2_TAG_GRANULE : size); if (rn == 31) { gen_check_sp_alignment(s); @@ -2777,8 +2786,22 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (!postindex) { tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - clean_addr = clean_data_tbi(s, dirty_addr); + if (set_tag) { + if (!s->ata) { + /* + * TODO: We could rely on the stores below, at least for + * system mode, if we arrange to add MO_ALIGN_16. + */ + gen_helper_stg_stub(cpu_env, dirty_addr); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); + } else { + gen_helper_stg(cpu_env, dirty_addr, dirty_addr); + } + } + + clean_addr = clean_data_tbi(s, dirty_addr); if (is_vector) { if (is_load) { do_fp_ld(s, rt, clean_addr, size); From a4157b80242bf1c8aa0ee77aae7458ba79012d5d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:15 -0700 Subject: [PATCH 29/57] target/arm: Restrict the values of DCZID.BS under TCG We can simplify our DC_ZVA if we recognize that the largest BS that we actually use in system mode is 64. Let us just assert that it fits within TARGET_PAGE_SIZE. For DC_GVA and STZGM, we want to be able to write whole bytes of tag memory, so assert that BS is >= 2 * TAG_GRANULE, or 32. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-18-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d9b8ec791e..d9876337c0 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1758,6 +1758,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } #endif + if (tcg_enabled()) { + int dcz_blocklen = 4 << cpu->dcz_blocksize; + + /* + * We only support DCZ blocklen that fits on one page. + * + * Architectually this is always true. However TARGET_PAGE_SIZE + * is variable and, for compatibility with -machine virt-2.7, + * is only 1KiB, as an artifact of legacy ARMv5 subpage support. + * But even then, while the largest architectural DCZ blocklen + * is 2KiB, no cpu actually uses such a large blocklen. + */ + assert(dcz_blocklen <= TARGET_PAGE_SIZE); + + /* + * We only support DCZ blocksize >= 2*TAG_GRANULE, which is to say + * both nibbles of each byte storing tag data may be written at once. + * Since TAG_GRANULE is 16, this means that blocklen must be >= 32. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + assert(dcz_blocklen >= 2 * TAG_GRANULE); + } + } + qemu_init_vcpu(cs); cpu_reset(cs); From e26d0d226892f67435cadcce86df0ddfb9943174 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:16 -0700 Subject: [PATCH 30/57] target/arm: Simplify DC_ZVA Now that we know that the operation is on a single page, we need not loop over pages while probing. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.c | 94 +++++++++++------------------------------ 1 file changed, 25 insertions(+), 69 deletions(-) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index bc0649a44a..8682630ff6 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -1119,85 +1119,41 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) * (which matches the usual QEMU behaviour of not implementing either * alignment faults or any memory attribute handling). */ - - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; + int blocklen = 4 << env_archcpu(env)->dcz_blocksize; uint64_t vaddr = vaddr_in & ~(blocklen - 1); + int mmu_idx = cpu_mmu_index(env, false); + void *mem; + + /* + * Trapless lookup. In addition to actual invalid page, may + * return NULL for I/O, watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); #ifndef CONFIG_USER_ONLY - { + if (unlikely(!mem)) { + uintptr_t ra = GETPC(); + /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); + mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { - /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. - */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); - } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); - return; - } + if (unlikely(!mem)) { /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. + * The only remaining reason for mem == NULL is I/O. + * Just do a series of byte writes as the architecture demands. */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } + for (int i = 0; i < blocklen; i++) { + cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); } - } - - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); + return; } } -#else - memset(g2h(vaddr), 0, blocklen); #endif + + memset(mem, 0, blocklen); } From 5f716a82388eb09754dd900e7dbb8ffa15897a28 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:17 -0700 Subject: [PATCH 31/57] target/arm: Implement the LDGM, STGM, STZGM instructions Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-20-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 3 ++ target/arm/mte_helper.c | 84 ++++++++++++++++++++++++++++++++++++++ target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++---- target/arm/translate.h | 2 + 4 files changed, 153 insertions(+), 8 deletions(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 2fa61b86fa..7b628d100e 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -113,3 +113,6 @@ DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 7ec7930dfc..27d4b4536c 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -274,3 +274,87 @@ void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); } } + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e2295a371b..7dc493774e 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3736,7 +3736,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; int op2 = extract32(insn, 10, 2); int op1 = extract32(insn, 22, 2); - bool is_load = false, is_pair = false, is_zero = false; + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; int index = 0; TCGv_i64 addr, clean_addr, tcg_rt; @@ -3756,9 +3756,14 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) if (op2 != 0) { /* STG */ index = op2 - 2; - break; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; } - goto do_unallocated; + break; case 1: if (op2 != 0) { /* STZG */ @@ -3774,17 +3779,27 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) /* ST2G */ is_pair = true; index = op2 - 2; - break; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; } - goto do_unallocated; + break; case 3: if (op2 != 0) { /* STZ2G */ is_pair = is_zero = true; index = op2 - 2; - break; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; } - goto do_unallocated; + break; default: do_unallocated: @@ -3792,7 +3807,9 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) return; } - if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + if (is_mult + ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } @@ -3806,6 +3823,44 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) tcg_gen_addi_i64(addr, addr, offset); } + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_helper_dc_zva(cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_rt, cpu_env, addr); + } else { + gen_helper_stgm(cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_rt, 0); + } + } + return; + } + if (is_load) { tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); tcg_rt = cpu_reg(s, rt); @@ -14472,6 +14527,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: diff --git a/target/arm/translate.h b/target/arm/translate.h index 98bcc37c47..16f2699ad7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -91,6 +91,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ From 5463df160ecee510e78493993eb1bd38b4838a10 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:18 -0700 Subject: [PATCH 32/57] target/arm: Implement the access tag cache flushes Like the regular data cache flushes, these are nops within qemu. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-21-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/target/arm/helper.c b/target/arm/helper.c index 2c6ec244af..d8c31d03da 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6929,6 +6929,32 @@ static const ARMCPRegInfo mte_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, .type = ARM_CP_NO_RAW, .access = PL0_RW, .readfn = tco_read, .writefn = tco_write }, + { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, REGINFO_SENTINEL }; @@ -6938,6 +6964,43 @@ static const ARMCPRegInfo mte_tco_ro_reginfo[] = { .type = ARM_CP_CONST, .access = PL0_RW, }, REGINFO_SENTINEL }; + +static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { + { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + REGINFO_SENTINEL +}; + #endif static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, @@ -8071,8 +8134,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) */ if (cpu_isar_feature(aa64_mte, cpu)) { define_arm_cp_regs(cpu, mte_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { define_arm_cp_regs(cpu, mte_tco_ro_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); } #endif From 9c7ab8fc8cb6d6e2fb7a82c1088691c7c23fa1b9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:19 -0700 Subject: [PATCH 33/57] target/arm: Move regime_el to internals.h We will shortly need this in mte_helper.c as well. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-22-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 36 ------------------------------------ target/arm/internals.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index d8c31d03da..d14313de66 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -9793,42 +9793,6 @@ void arm_cpu_do_interrupt(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -/* Return the exception level which controls this address translation regime */ -static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_SE3: - return 3; - case ARMMMUIdx_SE10_0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - } -} - uint64_t arm_sctlr(CPUARMState *env, int el) { /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ diff --git a/target/arm/internals.h b/target/arm/internals.h index 5c69d4e5a5..c36fcb151b 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -913,6 +913,42 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage2: + case ARMMMUIdx_E2: + return 2; + case ARMMMUIdx_SE3: + return 3; + case ARMMMUIdx_SE10_0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_SE10_1: + case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + return 1; + default: + g_assert_not_reached(); + } +} + /* Return the FSR value for a debug exception (watchpoint, hardware * breakpoint or BKPT insn) targeting the specified exception level. */ From 38659d311d05e6c5feff6bddcc1c33b60d3b86a1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:20 -0700 Subject: [PATCH 34/57] target/arm: Move regime_tcr to internals.h We will shortly need this in mte_helper.c as well. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-23-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 9 --------- target/arm/internals.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index d14313de66..33f902387b 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -9875,15 +9875,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, #endif /* !CONFIG_USER_ONLY */ -/* Return the TCR controlling this translation regime */ -static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - if (mmu_idx == ARMMMUIdx_Stage2) { - return &env->cp15.vtcr_el2; - } - return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; -} - /* Convert a possible stage1+2 MMU index into the appropriate * stage 1 MMU index */ diff --git a/target/arm/internals.h b/target/arm/internals.h index c36fcb151b..7c9abbabc9 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -949,6 +949,15 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) } } +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_Stage2) { + return &env->cp15.vtcr_el2; + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + /* Return the FSR value for a debug exception (watchpoint, hardware * breakpoint or BKPT insn) targeting the specified exception level. */ From 0a405be2b8fd9506a009b10d7d2d98c394b36db6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:21 -0700 Subject: [PATCH 35/57] target/arm: Add gen_mte_check1 Replace existing uses of check_data_tbi in translate-a64.c that perform a single logical memory access. Leave the helper blank for now to reduce the patch size. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-24-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 1 + target/arm/internals.h | 8 +++ target/arm/mte_helper.c | 8 +++ target/arm/translate-a64.c | 100 ++++++++++++++++++++++++++++--------- target/arm/translate-a64.h | 2 + 5 files changed, 95 insertions(+), 24 deletions(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 7b628d100e..2faa49d0a3 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -104,6 +104,7 @@ DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) +DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/arm/internals.h b/target/arm/internals.h index 7c9abbabc9..fb92ef6b84 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1310,6 +1310,14 @@ void arm_log_exception(int idx); #define LOG2_TAG_GRANULE 4 #define TAG_GRANULE (1 << LOG2_TAG_GRANULE) +/* Bits within a descriptor passed to the helper_mte_check* functions. */ +FIELD(MTEDESC, MIDX, 0, 4) +FIELD(MTEDESC, TBI, 4, 2) +FIELD(MTEDESC, TCMA, 6, 2) +FIELD(MTEDESC, WRITE, 8, 1) +FIELD(MTEDESC, ESIZE, 9, 5) +FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 27d4b4536c..ec12768dfc 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -358,3 +358,11 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) memset(mem, tag_pair, tag_bytes); } } + +/* + * Perform an MTE checked access for a single logical or atomic access. + */ +uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return ptr; +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 7dc493774e..4d0453c895 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -204,20 +204,20 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) } /* - * Return a "clean" address for ADDR according to TBID. - * This is always a fresh temporary, as we need to be able to - * increment this independently of a dirty write-back address. + * Handle MTE and/or TBI. + * + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode we do not have a TLB with which to implement this, so we must + * remove the top byte now. + * + * Always return a fresh temporary that we can increment independently + * of the write-back address. */ + static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { TCGv_i64 clean = new_tmp_a64(s); - /* - * In order to get the correct value in the FAR_ELx register, - * we must present the memory subsystem with the "dirty" address - * including the TBI. In system mode we can make this work via - * the TLB, dropping the TBI during translation. But for user-only - * mode we don't have that option, and must remove the top byte now. - */ #ifdef CONFIG_USER_ONLY gen_top_byte_ignore(s, clean, addr, s->tbid); #else @@ -245,6 +245,45 @@ static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, tcg_temp_free_i32(t_size); } +/* + * For MTE, check a single logical or atomic access. This probes a single + * address, the exact one specified. The size and alignment of the access + * is not relevant to MTE, per se, but watchpoints do require the size, + * and we want to recognize those before making any other changes to state. + */ +static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, + bool is_write, bool tag_checked, + int log2_size, bool is_unpriv, + int core_idx) +{ + if (tag_checked && s->mte_active[is_unpriv]) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size); + tcg_desc = tcg_const_i32(desc); + + ret = new_tmp_a64(s); + gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_desc); + + return ret; + } + return clean_data_tbi(s, addr); +} + +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size) +{ + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + false, get_mem_index(s)); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -2367,7 +2406,7 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, size | MO_ALIGN | s->be_data); } @@ -2385,7 +2424,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + + /* This is a single atomic access, despite the "pair". */ + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(); @@ -2510,7 +2551,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); return; @@ -2519,7 +2561,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, false); if (is_lasr) { @@ -2539,7 +2582,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2555,7 +2599,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); @@ -2569,7 +2614,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); return; } @@ -2587,7 +2633,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, true); if (is_lasr) { @@ -2881,6 +2928,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, bool iss_valid = !is_vector; bool post_index; bool writeback; + int memidx; TCGv_i64 clean_addr, dirty_addr; @@ -2938,7 +2986,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, if (!post_index) { tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); } - clean_addr = clean_data_tbi(s, dirty_addr); + + memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, + writeback || rn != 31, + size, is_unpriv, memidx); if (is_vector) { if (is_store) { @@ -2948,7 +3000,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); - int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { @@ -3045,7 +3096,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); if (is_vector) { if (is_store) { @@ -3130,7 +3181,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, dirty_addr = read_cpu_reg_sp(s, rn, 1); offset = imm12 << size; tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); if (is_vector) { if (is_store) { @@ -3223,7 +3274,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); if (o3_opc == 014) { /* @@ -3300,7 +3351,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, false, + is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index da0f59a2ce..daab6a9666 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -40,6 +40,8 @@ TCGv_ptr get_fpstatus_ptr(bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that From 73ceeb0011b23bac8bd2c09ebe3c18d034aa69ce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:22 -0700 Subject: [PATCH 36/57] target/arm: Add gen_mte_checkN Replace existing uses of check_data_tbi in translate-a64.c that perform multiple logical memory access. Leave the helper blank for now to reduce the patch size. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-25-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 1 + target/arm/mte_helper.c | 8 +++++ target/arm/translate-a64.c | 71 +++++++++++++++++++++++++++++--------- target/arm/translate-a64.h | 2 ++ 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 2faa49d0a3..005af678c7 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -105,6 +105,7 @@ DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index ec12768dfc..907a12b366 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -366,3 +366,11 @@ uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) { return ptr; } + +/* + * Perform an MTE checked access for multiple logical accesses. + */ +uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return ptr; +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 4d0453c895..52be0400d7 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -284,6 +284,34 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, false, get_mem_index(s)); } +/* + * For MTE, check multiple logical sequential accesses. + */ +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size) +{ + if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize); + desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size); + tcg_desc = tcg_const_i32(desc); + + ret = new_tmp_a64(s); + gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_desc); + + return ret; + } + return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -2848,7 +2876,10 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } } - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, + (wback || rn != 31) && !set_tag, + size, 2 << size); + if (is_vector) { if (is_load) { do_fp_ld(s, rt, clean_addr, size); @@ -3514,7 +3545,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian = s->be_data; - int ebytes; /* bytes per element */ + int total; /* total bytes */ int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ @@ -3584,19 +3615,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - /* Consecutive little-endian elements from a single register + total = rpt * selem * (is_q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, + size, total); + + /* + * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (selem == 1 && endian == MO_LE) { size = 3; } - ebytes = 1 << size; - elements = (is_q ? 16 : 8) / ebytes; - - tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(ebytes); + elements = (is_q ? 16 : 8) >> size; + tcg_ebytes = tcg_const_i64(1 << size); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { @@ -3630,7 +3668,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes); + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } @@ -3676,7 +3714,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int selem = (extract32(opc, 0, 1) << 1 | R) + 1; bool replicate = false; int index = is_q << 3 | S << 2 | size; - int ebytes, xs; + int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; if (extract32(insn, 31, 1)) { @@ -3730,16 +3768,17 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) return; } - ebytes = 1 << scale; - if (rn == 31) { gen_check_sp_alignment(s); } + total = selem << scale; tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(ebytes); + clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + scale, total); + + tcg_ebytes = tcg_const_i64(1 << scale); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ @@ -3766,7 +3805,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes); + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index daab6a9666..781c441399 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -42,6 +42,8 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, bool sve_access_check(DisasContext *s); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, int log2_size); +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int count, int log2_esize); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that From 2e34ff45f32cb032883616a1cc5ea8ac96f546d5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:23 -0700 Subject: [PATCH 37/57] target/arm: Implement helper_mte_check1 Fill out the stub that was added earlier. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-26-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/internals.h | 48 +++++++++++++++ target/arm/mte_helper.c | 132 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index fb92ef6b84..807830cc40 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1318,6 +1318,10 @@ FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, ESIZE, 9, 5) FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); @@ -1328,4 +1332,48 @@ static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) return deposit64(ptr, 56, 4, rtag); } +/* Return true if tbi bits mean that the access is checked. */ +static inline bool tbi_check(uint32_t desc, int bit55) +{ + return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1; +} + +/* Return true if tcma bits mean that the access is unchecked. */ +static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag) +{ + /* + * We had extracted bit55 and ptr_tag for other reasons, so fold + * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test. + */ + bool match = ((ptr_tag + bit55) & 0xf) == 0; + bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1; + return tcma && match; +} + +/* + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode, we do not have a TLB with which to implement this, so we must + * remove the top byte. + */ +static inline uint64_t useronly_clean_ptr(uint64_t ptr) +{ + /* TBI is known to be enabled. */ +#ifdef CONFIG_USER_ONLY + ptr = sextract64(ptr, 0, 56); +#endif + return ptr; +} + +static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr) +{ +#ifdef CONFIG_USER_ONLY + int64_t clean_ptr = sextract64(ptr, 0, 56); + if (tbi_check(desc, clean_ptr < 0)) { + ptr = clean_ptr; + } +#endif + return ptr; +} + #endif diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 907a12b366..c8a5e7c0ed 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -359,12 +359,142 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) } } +/* Record a tag check failure. */ +static void mte_check_fail(CPUARMState *env, int mmu_idx, + uint64_t dirty_ptr, uintptr_t ra) +{ + ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); + int el, reg_el, tcf, select; + uint64_t sctlr; + + reg_el = regime_el(env, arm_mmu_idx); + sctlr = env->cp15.sctlr_el[reg_el]; + + switch (arm_mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E20_0: + el = 0; + tcf = extract64(sctlr, 38, 2); + break; + default: + el = reg_el; + tcf = extract64(sctlr, 40, 2); + } + + switch (tcf) { + case 1: + /* + * Tag check fail causes a synchronous exception. + * + * In restore_state_to_opc, we set the exception syndrome + * for the load or store operation. Unwind first so we + * may overwrite that with the syndrome for the tag check. + */ + cpu_restore_state(env_cpu(env), ra, true); + env->exception.vaddress = dirty_ptr; + raise_exception(env, EXCP_DATA_ABORT, + syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), + exception_target_el(env)); + /* noreturn, but fall through to the assert anyway */ + + case 0: + /* + * Tag check fail does not affect the PE. + * We eliminate this case by not setting MTE_ACTIVE + * in tb_flags, so that we never make this runtime call. + */ + g_assert_not_reached(); + + case 2: + /* Tag check fail causes asynchronous flag set. */ + mmu_idx = arm_mmu_idx_el(env, el); + if (regime_has_2_ranges(mmu_idx)) { + select = extract64(dirty_ptr, 55, 1); + } else { + select = 0; + } + env->cp15.tfsr_el[el] |= 1 << select; + break; + + default: + /* Case 3: Reserved. */ + qemu_log_mask(LOG_GUEST_ERROR, + "Tag check failure with SCTLR_EL%d.TCF%s " + "set to reserved value %d\n", + reg_el, el ? "" : "0", tcf); + break; + } +} + /* * Perform an MTE checked access for a single logical or atomic access. */ +static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, int bit55) +{ + int mem_tag, mmu_idx, ptr_tag, size; + MMUAccessType type; + uint8_t *mem; + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + return true; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + size = FIELD_EX32(desc, MTEDESC, ESIZE); + + mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, + MMU_DATA_LOAD, 1, ra); + if (!mem) { + return true; + } + + mem_tag = load_tag1(ptr, mem); + return ptr_tag == mem_tag; +} + +/* + * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked. */ + if (unlikely(!tbi_check(desc, bit55))) { + return true; + } + + return mte_probe1_int(env, desc, ptr, 0, bit55); +} + +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + mte_check_fail(env, mmu_idx, ptr, ra); + } + + return useronly_clean_ptr(ptr); +} + uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return ptr; + return mte_check1(env, desc, ptr, GETPC()); } /* From 5add8248556a3c1006018d7d8e601c9572b280a9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:24 -0700 Subject: [PATCH 38/57] target/arm: Implement helper_mte_checkN Fill out the stub that was added earlier. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-27-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/internals.h | 2 + target/arm/mte_helper.c | 165 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index 807830cc40..c763a23dfb 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1321,6 +1321,8 @@ FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check1(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); static inline int allocation_tag_from_addr(uint64_t ptr) { diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index c8a5e7c0ed..abe6af6b79 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -500,7 +500,170 @@ uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) /* * Perform an MTE checked access for multiple logical accesses. */ + +/** + * checkN: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * A note about sizes: count is expected to be small. + * + * The most common use will be LDP/STP of two integer registers, + * which means 16 bytes of memory touching at most 2 tags, but + * often the access is aligned and thus just 1 tag. + * + * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, + * touching at most 5 tags. SVE LDR/STR (vector) with the default + * vector length is also 64 bytes; the maximum architectural length + * is 256 bytes touching at most 9 tags. + * + * The loop below uses 7 logical operations and 1 memory operation + * per tag pair. An implementation that loads an aligned word and + * uses masking to ignore adjacent tags requires 18 logical operations + * and thus does not begin to pay off until 6 tags. + * Which, according to the survey above, is unlikely to be common. + */ +static int checkN(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem++ ^ cmp; + + if (odd) { + goto start_odd; + } + + while (1) { + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + start_odd: + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem++ ^ cmp; + } + return n; +} + +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int mmu_idx, ptr_tag, bit55; + uint64_t ptr_last, ptr_end, prev_page, next_page; + uint64_t tag_first, tag_end; + uint64_t tag_byte_first, tag_byte_end; + uint32_t esize, total, tag_count, tag_size, n, c; + uint8_t *mem1, *mem2; + MMUAccessType type; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + esize = FIELD_EX32(desc, MTEDESC, ESIZE); + total = FIELD_EX32(desc, MTEDESC, TSIZE); + + /* Find the addr of the end of the access, and of the last element. */ + ptr_end = ptr + total; + ptr_last = ptr_end - esize; + + /* Round the bounds to the tag granule, and compute the number of tags. */ + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); + tag_count = (tag_end - tag_first) / TAG_GRANULE; + + /* Round the bounds to twice the tag granule, and compute the bytes. */ + tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); + tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + + /* Locate the page boundaries. */ + prev_page = ptr & TARGET_PAGE_MASK; + next_page = prev_page + TARGET_PAGE_SIZE; + + if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + /* Memory access stays on one page. */ + tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + MMU_DATA_LOAD, tag_size, ra); + if (!mem1) { + goto done; + } + /* Perform all of the comparisons. */ + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); + } else { + /* Memory access crosses to next page. */ + tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, + MMU_DATA_LOAD, tag_size, ra); + + tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, + ptr_end - next_page, + MMU_DATA_LOAD, tag_size, ra); + + /* + * Perform all of the comparisons. + * Note the possible but unlikely case of the operation spanning + * two pages that do not both have tagging enabled. + */ + n = c = (next_page - tag_first) / TAG_GRANULE; + if (mem1) { + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); + } + if (n == c) { + if (!mem2) { + goto done; + } + n += checkN(mem2, 0, ptr_tag, tag_count - c); + } + } + + /* + * If we failed, we know which granule. Compute the element that + * is first in that granule, and signal failure on that element. + */ + if (unlikely(n < tag_count)) { + uint64_t fail_ofs; + + fail_ofs = tag_first + n * TAG_GRANULE - ptr; + fail_ofs = ROUND_UP(fail_ofs, esize); + mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + } + + done: + return useronly_clean_ptr(ptr); +} + uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return ptr; + return mte_checkN(env, desc, ptr, GETPC()); } From 46dc1bc0601554823a42ad27f236da2ad8f3bdc6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:25 -0700 Subject: [PATCH 39/57] target/arm: Add helper_mte_check_zva Use a special helper for DC_ZVA, rather than the more general mte_checkN. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-28-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 1 + target/arm/mte_helper.c | 106 +++++++++++++++++++++++++++++++++++++ target/arm/translate-a64.c | 16 +++++- 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 005af678c7..5b0b699a50 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -106,6 +106,7 @@ DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index abe6af6b79..4f9bd3add3 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -667,3 +667,109 @@ uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) { return mte_checkN(env, desc, ptr, GETPC()); } + +/* + * Perform an MTE checked access for DC_ZVA. + */ +uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uintptr_t ra = GETPC(); + int log2_dcz_bytes, log2_tag_bytes; + int mmu_idx, bit55; + intptr_t dcz_bytes, tag_bytes, i; + void *mem; + uint64_t ptr_tag, mem_tag, align_ptr; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + /* + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make + * sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + align_ptr = ptr & -dcz_bytes; + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + (void) probe_write(env, ptr, 1, mmu_idx, ra); + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + if (!mem) { + goto done; + } + + /* + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus + * it is quite easy to perform all of the comparisons at once without + * any extra masking. + * + * The most common zva block size is 64; some of the thunderx cpus use + * a block size of 128. For user-only, aarch64_max_initfn will set the + * block size to 512. Fill out the other cases for future-proofing. + * + * In order to be able to find the first miscompare later, we want the + * tag bytes to be in little-endian order. + */ + switch (log2_tag_bytes) { + case 0: /* zva_blocksize 32 */ + mem_tag = *(uint8_t *)mem; + ptr_tag *= 0x11u; + break; + case 1: /* zva_blocksize 64 */ + mem_tag = cpu_to_le16(*(uint16_t *)mem); + ptr_tag *= 0x1111u; + break; + case 2: /* zva_blocksize 128 */ + mem_tag = cpu_to_le32(*(uint32_t *)mem); + ptr_tag *= 0x11111111u; + break; + case 3: /* zva_blocksize 256 */ + mem_tag = cpu_to_le64(*(uint64_t *)mem); + ptr_tag *= 0x1111111111111111ull; + break; + + default: /* zva_blocksize 512, 1024, 2048 */ + ptr_tag *= 0x1111111111111111ull; + i = 0; + do { + mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); + if (unlikely(mem_tag != ptr_tag)) { + goto fail; + } + i += 8; + align_ptr += 16 * TAG_GRANULE; + } while (i < tag_bytes); + goto done; + } + + if (likely(mem_tag == ptr_tag)) { + goto done; + } + + fail: + /* Locate the first nibble that differs. */ + i = ctz64(mem_tag ^ ptr_tag) >> 4; + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + + done: + return useronly_clean_ptr(ptr); +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 52be0400d7..a2a8280010 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1857,7 +1857,21 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + if (s->mte_active[0]) { + TCGv_i32 t_desc; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + t_desc = tcg_const_i32(desc); + + tcg_rt = new_tmp_a64(s); + gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt)); + tcg_temp_free_i32(t_desc); + } else { + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + } gen_helper_dc_zva(cpu_env, tcg_rt); return; default: From b2aa8879b884cd66acde4123899dd92a38fe6527 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:26 -0700 Subject: [PATCH 40/57] target/arm: Use mte_checkN for sve unpredicated loads Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-29-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 61 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index ac7b3119e5..11e0dfc210 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4342,71 +4342,76 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0, t1; + TCGv_i64 dirty_addr, clean_addr, t0, t1; - addr = tcg_temp_new_i64(); - t0 = tcg_temp_new_i64(); + dirty_addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(dirty_addr); - /* Note that unpredicated load/store of vector/predicate registers + /* + * Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian - * operations on larger quantities. There is no nice way to force - * a little-endian load for aarch64_be-linux-user out of line. - * + * operations on larger quantities. * Attempt to keep code expansion to a minimum by limiting the * amount of unrolling done. */ if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(); for (i = 0; i < len_align; i += 8) { - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); tcg_gen_st_i64(t0, cpu_env, vofs + i); + tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_const_local_ptr(0); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = tcg_temp_local_new_i64(); + tcg_gen_mov_i64(clean_addr, t0); + tcg_temp_free_i64(t0); + gen_set_label(loop); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ + t0 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tp = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(tp, i, imm); - tcg_gen_extu_ptr_i64(addr, tp); - tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn)); - - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ); - tcg_gen_add_ptr(tp, cpu_env, i); tcg_gen_addi_ptr(i, i, 8); tcg_gen_st_i64(t0, tp, vofs); tcg_temp_free_ptr(tp); + tcg_temp_free_i64(t0); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(i); } - /* Predicate register loads can be any multiple of 2. + /* + * Predicate register loads can be any multiple of 2. * Note that we still store the entire 64-bit unit into cpu_env. */ if (len_remain) { - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align); - + t0 = tcg_temp_new_i64(); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(addr, addr, 4); - tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(clean_addr, clean_addr, 4); + tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); tcg_temp_free_i64(t1); break; @@ -4415,9 +4420,9 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) g_assert_not_reached(); } tcg_gen_st_i64(t0, cpu_env, vofs + len_align); + tcg_temp_free_i64(t0); } - tcg_temp_free_i64(addr); - tcg_temp_free_i64(t0); + tcg_temp_free_i64(clean_addr); } /* Similarly for stores. */ From bba87d0a0f480805223a6428a7942a51733c488a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:27 -0700 Subject: [PATCH 41/57] target/arm: Use mte_checkN for sve unpredicated stores Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-30-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 61 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 11e0dfc210..4a613ca689 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4432,10 +4432,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0; - addr = tcg_temp_new_i64(); - t0 = tcg_temp_new_i64(); + dirty_addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(dirty_addr); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4448,33 +4450,35 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(t0, cpu_env, vofs + i); - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); - TCGv_ptr t2, i = tcg_const_local_ptr(0); + TCGv_ptr tp, i = tcg_const_local_ptr(0); + + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = tcg_temp_local_new_i64(); + tcg_gen_mov_i64(clean_addr, t0); + tcg_temp_free_i64(t0); gen_set_label(loop); - t2 = tcg_temp_new_ptr(); - tcg_gen_add_ptr(t2, cpu_env, i); - tcg_gen_ld_i64(t0, t2, vofs); - - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tcg_gen_addi_ptr(t2, i, imm); - tcg_gen_extu_ptr_i64(addr, t2); - tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn)); - tcg_temp_free_ptr(t2); - - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ); - + t0 = tcg_temp_new_i64(); + tp = tcg_temp_new_ptr(); + tcg_gen_add_ptr(tp, cpu_env, i); + tcg_gen_ld_i64(t0, tp, vofs); tcg_gen_addi_ptr(i, i, 8); + tcg_temp_free_ptr(tp); + + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_temp_free_i64(t0); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(i); @@ -4482,29 +4486,30 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Predicate register stores can be any multiple of 2. */ if (len_remain) { + t0 = tcg_temp_new_i64(); tcg_gen_ld_i64(t0, cpu_env, vofs + len_align); - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(addr, addr, 4); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); break; default: g_assert_not_reached(); } + tcg_temp_free_i64(t0); } - tcg_temp_free_i64(addr); - tcg_temp_free_i64(t0); + tcg_temp_free_i64(clean_addr); } static bool trans_LDR_zri(DisasContext *s, arg_rri *a) From 4ac430e1f1eb1d27913a9f800a5965b281ac1b76 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:28 -0700 Subject: [PATCH 42/57] target/arm: Use mte_check1 for sve LD1R Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-31-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 4a613ca689..4fa521989d 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4892,7 +4892,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) unsigned esz = dtype_esz[a->dtype]; unsigned msz = dtype_msz(a->dtype); TCGLabel *over = gen_new_label(); - TCGv_i64 temp; + TCGv_i64 temp, clean_addr; /* If the guarding predicate has no bits set, no load occurs. */ if (psz <= 8) { @@ -4915,7 +4915,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Load the data. */ temp = tcg_temp_new_i64(); tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); - tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s), + clean_addr = gen_mte_check1(s, temp, false, true, msz); + + tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), s->be_data | dtype_mop[a->dtype]); /* Broadcast to *all* elements. */ From c0ed9166b1aea86a2fbaada1195aacd1049f9e85 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:29 -0700 Subject: [PATCH 43/57] target/arm: Tidy trans_LD1R_zpri Move the variable declarations to the top of the function, but do not create a new label before sve_access_check. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-32-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 4fa521989d..a3a0b98fbc 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4883,17 +4883,19 @@ static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) /* Load and broadcast element. */ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) { - if (!sve_access_check(s)) { - return true; - } - unsigned vsz = vec_full_reg_size(s); unsigned psz = pred_full_reg_size(s); unsigned esz = dtype_esz[a->dtype]; unsigned msz = dtype_msz(a->dtype); - TCGLabel *over = gen_new_label(); + TCGLabel *over; TCGv_i64 temp, clean_addr; + if (!sve_access_check(s)) { + return true; + } + + over = gen_new_label(); + /* If the guarding predicate has no bits set, no load occurs. */ if (psz <= 8) { /* Reduce the pred_esz_masks value simply to reduce the From 149d3b31f3f0f7f9e1c3a77043450a95c7a7e93d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:30 -0700 Subject: [PATCH 44/57] target/arm: Add arm_tlb_bti_gp Introduce an lvalue macro to wrap target_tlb_bit0. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-33-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 13 +++++++++++++ target/arm/helper.c | 2 +- target/arm/translate-a64.c | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index cb4f6ba69f..c54f0ab18a 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3393,6 +3393,19 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* Helper for the macros below, validating the argument type. */ +static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) +{ + return x; +} + +/* + * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB. + * Using these should be a bit more self-documenting than using the + * generic target bits directly. + */ +#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) + /* * Naming convention for isar_feature functions: * Functions which test 32-bit ID registers should have _aa32_ in diff --git a/target/arm/helper.c b/target/arm/helper.c index 33f902387b..44a3f9fb48 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11079,7 +11079,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - txattrs->target_tlb_bit0 = true; + arm_tlb_bti_gp(txattrs) = true; } if (cacheattrs != NULL) { diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a2a8280010..7a3774bfda 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14434,7 +14434,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s) * table entry even for that case. */ return (tlb_hit(entry->addr_code, addr) && - env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0); + arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs)); #endif } From 206adacfb8d35e671e3619591608c475aa046b63 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:31 -0700 Subject: [PATCH 45/57] target/arm: Add mte helpers for sve scalar + int loads Because the elements are sequential, we can eliminate many tests all at once when the tag hits TCMA, or if the page(s) are not Tagged. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-34-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 1 + target/arm/helper-sve.h | 58 ++++++++++ target/arm/internals.h | 6 + target/arm/sve_helper.c | 218 ++++++++++++++++++++++++++++++------- target/arm/translate-sve.c | 186 ++++++++++++++++++++++--------- 5 files changed, 378 insertions(+), 91 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index c54f0ab18a..3bf0518ca4 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3405,6 +3405,7 @@ static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) * generic target bits directly. */ #define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) +#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1) /* * Naming convention for isar_feature functions: diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 7a200755ac..1bc1974fc2 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1196,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index c763a23dfb..3306c4f829 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1310,6 +1310,12 @@ void arm_log_exception(int idx); #define LOG2_TAG_GRANULE 4 #define TAG_GRANULE (1 << LOG2_TAG_GRANULE) +/* + * The SVE simd_data field, for memory ops, contains either + * rd (5 bits) or a shift count (2 bits). + */ +#define SVE_MTEDESC_SHIFT 5 + /* Bits within a descriptor passed to the helper_mte_check* functions. */ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index e590db6637..767ecb399f 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4393,15 +4393,89 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, #endif } +typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); + +static inline QEMU_ALWAYS_INLINE +void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra, + mte_check_fn *check) +{ + intptr_t mem_off, reg_off, reg_last; + + /* Process the page only if MemAttr == Tagged. */ + if (arm_tlb_mte_tagged(&info->page[0].attrs)) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_split; + if (reg_last < 0) { + reg_last = info->reg_off_last[0]; + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); + } + + mem_off = info->mem_off_first[1]; + if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } +} + +typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra); + +static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, + mtedesc, ra, mte_check1); +} + +static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, + mtedesc, ra, mte_checkN); +} + + /* * Common helper for all contiguous 1,2,3,4-register predicated stores. */ static inline QEMU_ALWAYS_INLINE void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, const int N, + const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn) + sve_ldst1_tlb_fn *tlb_fn, + sve_cont_ldst_mte_check_fn *mte_check_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -4426,7 +4500,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, BP_MEM_READ, retaddr); - /* TODO: MTE check. */ + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); + } flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { @@ -4532,26 +4613,67 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, } } -#define DO_LD1_1(NAME, ESZ) \ -void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ - sve_##NAME##_host, sve_##NAME##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); } -#define DO_LD1_2(NAME, ESZ, MSZ) \ -void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ -} \ -void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } DO_LD1_1(ld1bb, MO_8) @@ -4577,26 +4699,44 @@ DO_LD1_2(ld1dd, MO_64, MO_64) #undef DO_LD1_1 #undef DO_LD1_2 -#define DO_LDN_1(N) \ -void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ - sve_ld1bb_host, sve_ld1bb_tlb); \ +#define DO_LDN_1(N) \ +void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ + sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ + sve_ld1bb_host, sve_ld1bb_tlb); \ } -#define DO_LDN_2(N, SUFF, ESZ) \ -void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ - sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ -} \ -void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ - sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ +#define DO_LDN_2(N, SUFF, ESZ) \ +void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ +} \ +void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } DO_LDN_1(2) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index a3a0b98fbc..2620c965f0 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4575,18 +4575,32 @@ static const uint8_t dtype_esz[16] = { }; static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - int dtype, gen_helper_gvec_mem *fn) + int dtype, uint32_t mte_n, bool is_write, + gen_helper_gvec_mem *fn) { unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; TCGv_i32 t_desc; - int desc; + int desc = 0; - /* For e.g. LD4, there are not enough arguments to pass all 4 + /* + * For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. + * TODO: mte_n check here while callers are updated. */ - desc = simd_desc(vsz, vsz, zt); + if (mte_n && s->mte_active[0]) { + int msz = dtype_msz(dtype); + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz); + desc <<= SVE_MTEDESC_SHIFT; + } + desc = simd_desc(vsz, vsz, zt | desc); t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); @@ -4600,64 +4614,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[2][16][4] = { - /* Little-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + static gen_helper_gvec_mem * const fns[2][2][16][4] = { + { /* mte inactive, little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, - gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, - { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, - gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, - { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, - /* Big-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + /* mte inactive, big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, - gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, - { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, - gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, - { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + + { /* mte active, little-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r_mte, + gen_helper_sve_ld2hh_le_r_mte, + gen_helper_sve_ld3hh_le_r_mte, + gen_helper_sve_ld4hh_le_r_mte }, + { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r_mte, + gen_helper_sve_ld2ss_le_r_mte, + gen_helper_sve_ld3ss_le_r_mte, + gen_helper_sve_ld4ss_le_r_mte }, + { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r_mte, + gen_helper_sve_ld2dd_le_r_mte, + gen_helper_sve_ld3dd_le_r_mte, + gen_helper_sve_ld4dd_le_r_mte } }, + + /* mte active, big-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r_mte, + gen_helper_sve_ld2hh_be_r_mte, + gen_helper_sve_ld3hh_be_r_mte, + gen_helper_sve_ld4hh_be_r_mte }, + { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r_mte, + gen_helper_sve_ld2ss_be_r_mte, + gen_helper_sve_ld3ss_be_r_mte, + gen_helper_sve_ld4ss_be_r_mte }, + { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r_mte, + gen_helper_sve_ld2dd_be_r_mte, + gen_helper_sve_ld3dd_be_r_mte, + gen_helper_sve_ld4dd_be_r_mte } } }, }; - gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; + gen_helper_gvec_mem *fn + = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; - /* While there are holes in the table, they are not + /* + * While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, dtype, fn); + do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) @@ -4739,7 +4821,7 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 0, false, fns[s->be_data == MO_BE][a->dtype]); } return true; @@ -4798,7 +4880,7 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 0, false, fns[s->be_data == MO_BE][a->dtype]); } return true; @@ -5002,7 +5084,7 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, fn = fn_multiple[be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), 0, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) From 71b9f3948c75bb97641a3c8c7de96d1cb47cdc07 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:32 -0700 Subject: [PATCH 46/57] target/arm: Add mte helpers for sve scalar + int stores Because the elements are sequential, we can eliminate many tests all at once when the tag hits TCMA, or if the page(s) are not Tagged. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-35-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-sve.h | 47 +++++++++++ target/arm/sve_helper.c | 95 ++++++++++++++++------ target/arm/translate-sve.c | 162 ++++++++++++++++++++++++------------- 3 files changed, 226 insertions(+), 78 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 1bc1974fc2..1425f33c92 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1363,6 +1363,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 767ecb399f..ded9cedd18 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -5022,11 +5022,12 @@ DO_LDFF1_LDNF1_2(dd, MO_64, MO_64) */ static inline QEMU_ALWAYS_INLINE -void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, - const uintptr_t retaddr, const int esz, - const int msz, const int N, +void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn) + sve_ldst1_tlb_fn *tlb_fn, + sve_cont_ldst_mte_check_fn *mte_check_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -5048,7 +5049,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, BP_MEM_WRITE, retaddr); - /* TODO: MTE check. */ + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); + } flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { @@ -5142,26 +5150,67 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, } } -#define DO_STN_1(N, NAME, ESZ) \ -void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ - sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); } -#define DO_STN_2(N, NAME, ESZ, MSZ) \ -void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ - sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ -} \ -void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ - sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ +#define DO_STN_1(N, NAME, ESZ) \ +void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ +} + +#define DO_STN_2(N, NAME, ESZ, MSZ) \ +void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ +} \ +void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } DO_STN_1(1, bb, MO_8) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 2620c965f0..daac8589f3 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -5018,73 +5018,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][4][4] = { - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_le_r, - gen_helper_sve_st1hs_le_r, - gen_helper_sve_st1hd_le_r }, - { NULL, NULL, - gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_be_r, - gen_helper_sve_st1hs_be_r, - gen_helper_sve_st1hd_be_r }, - { NULL, NULL, - gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } }, + static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + { { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } } }, + + { { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_le_r_mte, + gen_helper_sve_st1hs_le_r_mte, + gen_helper_sve_st1hd_le_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r_mte, + gen_helper_sve_st1sd_le_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r_mte } }, + { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_be_r_mte, + gen_helper_sve_st1hs_be_r_mte, + gen_helper_sve_st1hd_be_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r_mte, + gen_helper_sve_st1sd_be_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_le_r, - gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_le_r, - gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_le_r, - gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_be_r, - gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_be_r, - gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_be_r, - gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } }, + static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + { { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } } }, + { { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_le_r_mte, + gen_helper_sve_st2ss_le_r_mte, + gen_helper_sve_st2dd_le_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_le_r_mte, + gen_helper_sve_st3ss_le_r_mte, + gen_helper_sve_st3dd_le_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_le_r_mte, + gen_helper_sve_st4ss_le_r_mte, + gen_helper_sve_st4dd_le_r_mte } }, + { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_be_r_mte, + gen_helper_sve_st2ss_be_r_mte, + gen_helper_sve_st2dd_be_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_be_r_mte, + gen_helper_sve_st3ss_be_r_mte, + gen_helper_sve_st3dd_be_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_be_r_mte, + gen_helper_sve_st4ss_be_r_mte, + gen_helper_sve_st4dd_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[be][msz][esz]; + fn = fn_single[s->mte_active[0]][be][msz][esz]; + nreg = 1; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[be][nreg - 1][msz]; + fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), 0, true, fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) From aa13f7c3c378fa41366b9fcd6c29af1c3d81126a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:33 -0700 Subject: [PATCH 47/57] target/arm: Add mte helpers for sve scalar + int ff/nf loads Because the elements are sequential, we can eliminate many tests all at once when the tag hits TCMA, or if the page(s) are not Tagged. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-36-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-sve.h | 98 ++++++++++++++++ target/arm/sve_helper.c | 99 ++++++++++++++-- target/arm/translate-sve.c | 232 +++++++++++++++++++++++++------------ 3 files changed, 343 insertions(+), 86 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 1425f33c92..f48752eb42 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1285,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1316,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index ded9cedd18..7aca4ad384 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4794,7 +4794,7 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) */ static inline QEMU_ALWAYS_INLINE void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, + uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc, const int esz, const int msz, const SVEContFault fault, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) @@ -4826,13 +4826,25 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, mem_off = info.mem_off_first[0]; flags = info.page[0].flags; + /* + * Disable MTE checking if the Tagged bit is not set. Since TBI must + * be set within MTEDESC for MTE, !mtedesc => !mte_active. + */ + if (arm_tlb_mte_tagged(&info.page[0].attrs)) { + mtedesc = 0; + } + if (fault == FAULT_FIRST) { + /* Trapping mte check for the first-fault element. */ + if (mtedesc) { + mte_check1(env, mtedesc, addr + mem_off, retaddr); + } + /* * Special handling of the first active element, * if it crosses a page boundary or is MMIO. */ bool is_split = mem_off == info.mem_off_split; - /* TODO: MTE check. */ if (unlikely(flags != 0) || unlikely(is_split)) { /* * Use the slow path for cross-page handling. @@ -4868,7 +4880,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, /* Watchpoint hit, see below. */ goto do_fault; } - /* TODO: MTE check. */ + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } /* * Use the slow path for cross-page handling. * This is RAM, without a watchpoint, and will not trap. @@ -4916,7 +4930,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, & BP_MEM_READ)) { goto do_fault; } - /* TODO: MTE check. */ + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } host_fn(vd, reg_off, host + mem_off); } reg_off += 1 << esz; @@ -4954,44 +4970,103 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +static inline QEMU_ALWAYS_INLINE +void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const SVEContFault fault, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc, + esz, msz, fault, host_fn, tlb_fn); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } -#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } DO_LDFF1_LDNF1_1(bb, MO_8) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index daac8589f3..e4fbe48493 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4773,104 +4773,188 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) { - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, - gen_helper_sve_ldff1sds_le_r, - gen_helper_sve_ldff1hh_le_r, - gen_helper_sve_ldff1hsu_le_r, - gen_helper_sve_ldff1hdu_le_r, + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, - gen_helper_sve_ldff1hds_le_r, - gen_helper_sve_ldff1hss_le_r, - gen_helper_sve_ldff1ss_le_r, - gen_helper_sve_ldff1sdu_le_r, + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_le_r }, + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, - /* Big-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, + /* mte inactive, big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, - gen_helper_sve_ldff1sds_be_r, - gen_helper_sve_ldff1hh_be_r, - gen_helper_sve_ldff1hsu_be_r, - gen_helper_sve_ldff1hdu_be_r, + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, - gen_helper_sve_ldff1hds_be_r, - gen_helper_sve_ldff1hss_be_r, - gen_helper_sve_ldff1ss_be_r, - gen_helper_sve_ldff1sdu_be_r, + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_be_r }, + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r } }, + + { /* mte active, little-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_le_r_mte, + gen_helper_sve_ldff1hh_le_r_mte, + gen_helper_sve_ldff1hsu_le_r_mte, + gen_helper_sve_ldff1hdu_le_r_mte, + + gen_helper_sve_ldff1hds_le_r_mte, + gen_helper_sve_ldff1hss_le_r_mte, + gen_helper_sve_ldff1ss_le_r_mte, + gen_helper_sve_ldff1sdu_le_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_le_r_mte }, + + /* mte active, big-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_be_r_mte, + gen_helper_sve_ldff1hh_be_r_mte, + gen_helper_sve_ldff1hsu_be_r_mte, + gen_helper_sve_ldff1hdu_be_r_mte, + + gen_helper_sve_ldff1hds_be_r_mte, + gen_helper_sve_ldff1hss_be_r_mte, + gen_helper_sve_ldff1ss_be_r_mte, + gen_helper_sve_ldff1sdu_be_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_be_r_mte } }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 0, false, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) { - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, - gen_helper_sve_ldnf1sds_le_r, - gen_helper_sve_ldnf1hh_le_r, - gen_helper_sve_ldnf1hsu_le_r, - gen_helper_sve_ldnf1hdu_le_r, + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, - gen_helper_sve_ldnf1hds_le_r, - gen_helper_sve_ldnf1hss_le_r, - gen_helper_sve_ldnf1ss_le_r, - gen_helper_sve_ldnf1sdu_le_r, + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_le_r }, + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, - /* Big-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, - gen_helper_sve_ldnf1sds_be_r, - gen_helper_sve_ldnf1hh_be_r, - gen_helper_sve_ldnf1hsu_be_r, - gen_helper_sve_ldnf1hdu_be_r, + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, - gen_helper_sve_ldnf1hds_be_r, - gen_helper_sve_ldnf1hss_be_r, - gen_helper_sve_ldnf1ss_be_r, - gen_helper_sve_ldnf1sdu_be_r, + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_be_r }, + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r } }, + + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_le_r_mte, + gen_helper_sve_ldnf1hh_le_r_mte, + gen_helper_sve_ldnf1hsu_le_r_mte, + gen_helper_sve_ldnf1hdu_le_r_mte, + + gen_helper_sve_ldnf1hds_le_r_mte, + gen_helper_sve_ldnf1hss_le_r_mte, + gen_helper_sve_ldnf1ss_le_r_mte, + gen_helper_sve_ldnf1sdu_le_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_le_r_mte }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_be_r_mte, + gen_helper_sve_ldnf1hh_be_r_mte, + gen_helper_sve_ldnf1hsu_be_r_mte, + gen_helper_sve_ldnf1hdu_be_r_mte, + + gen_helper_sve_ldnf1hds_be_r_mte, + gen_helper_sve_ldnf1hss_be_r_mte, + gen_helper_sve_ldnf1ss_be_r_mte, + gen_helper_sve_ldnf1sdu_be_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_be_r_mte } }, }; if (sve_access_check(s)) { @@ -4880,8 +4964,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 0, false, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } From 9473d0ecafcffc8b258892b1f9f18e037bdba958 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:34 -0700 Subject: [PATCH 48/57] target/arm: Handle TBI for sve scalar + int memory ops We still need to handle tbi for user-only when mte is inactive. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-37-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 2 +- target/arm/translate-a64.h | 1 + target/arm/translate-sve.c | 6 ++++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 7a3774bfda..e46c4a49e0 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -215,7 +215,7 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) * of the write-back address. */ -static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { TCGv_i64 clean = new_tmp_a64(s); #ifdef CONFIG_USER_ONLY diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 781c441399..49e4865918 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -40,6 +40,7 @@ TCGv_ptr get_fpstatus_ptr(bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, int log2_size); TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index e4fbe48493..04eda9a126 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4587,9 +4587,8 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, * For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. - * TODO: mte_n check here while callers are updated. */ - if (mte_n && s->mte_active[0]) { + if (s->mte_active[0]) { int msz = dtype_msz(dtype); desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); @@ -4599,7 +4598,10 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz); desc <<= SVE_MTEDESC_SHIFT; + } else { + addr = clean_data_tbi(s, addr); } + desc = simd_desc(vsz, vsz, zt | desc); t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); From d28d12f008ee44dc2cc2ee5d8f673be9febc951e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:35 -0700 Subject: [PATCH 49/57] target/arm: Add mte helpers for sve scatter/gather memory ops Because the elements are non-sequential, we cannot eliminate many tests straight away like we can for sequential operations. But we often have the PTE details handy, so we can test for Tagged. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-38-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-sve.h | 285 ++++++++++++++++ target/arm/sve_helper.c | 185 +++++++++-- target/arm/translate-sve.c | 650 +++++++++++++++++++++++++------------ 3 files changed, 872 insertions(+), 248 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index f48752eb42..63c4a087ca 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1617,6 +1617,115 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, @@ -1726,6 +1835,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, @@ -1793,4 +2011,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 7aca4ad384..ad974c2cc5 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -5354,7 +5354,8 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) static inline QEMU_ALWAYS_INLINE void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - int esize, int msize, zreg_off_fn *off_fn, + uint32_t mtedesc, int esize, int msize, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5382,7 +5383,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, BP_MEM_READ, retaddr); } - /* TODO: MTE check */ + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } host_fn(&scratch, reg_off, info.host); } else { /* Element crosses the page boundary. */ @@ -5393,7 +5396,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, msize, info.attrs, BP_MEM_READ, retaddr); } - /* TODO: MTE check */ + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } tlb_fn(env, &scratch, reg_off, addr, retaddr); } } @@ -5406,20 +5411,53 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, memcpy(vd, &scratch, reg_max); } +static inline QEMU_ALWAYS_INLINE +void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esize, msize, off_fn, host_fn, tlb_fn); +} + #define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ { \ - sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } #define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ { \ - sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } DO_LD1_ZPZ_S(bsu, zsu, MO_8) @@ -5498,7 +5536,8 @@ DO_LD1_ZPZ_D(dd_be, zd, MO_64) static inline QEMU_ALWAYS_INLINE void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - const int esz, const int msz, zreg_off_fn *off_fn, + uint32_t mtedesc, const int esz, const int msz, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5523,6 +5562,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, * Probe the first element, allowing faults. */ addr = base + (off_fn(vm, reg_off) << scale); + if (mtedesc) { + mte_check1(env, mtedesc, addr, retaddr); + } tlb_fn(env, vd, reg_off, addr, retaddr); /* After any fault, zero the other elements. */ @@ -5555,7 +5597,11 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, (env_cpu(env), addr, msize) & BP_MEM_READ)) { goto fault; } - /* TODO: MTE check. */ + if (mtedesc && + arm_tlb_mte_tagged(&info.attrs) && + !mte_probe1(env, mtedesc, addr)) { + goto fault; + } host_fn(vd, reg_off, info.host); } @@ -5568,20 +5614,58 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ -void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ - off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + const int esz, const int msz, + zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esz, msz, off_fn, host_fn, tlb_fn); } -#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ -void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ - off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ldff##MEM##_##OFS##_mte) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + +#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ldff##MEM##_##OFS##_mte) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } DO_LDFF1_ZPZ_S(bsu, zsu, MO_8) @@ -5653,7 +5737,8 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64) static inline QEMU_ALWAYS_INLINE void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - int esize, int msize, zreg_off_fn *off_fn, + uint32_t mtedesc, int esize, int msize, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5697,7 +5782,10 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, BP_MEM_WRITE, retaddr); } - /* TODO: MTE check. */ + + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } } i += 1; reg_off += esize; @@ -5727,20 +5815,53 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } while (reg_off < reg_max); } -#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ -void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ - off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esize, msize, off_fn, host_fn, tlb_fn); } -#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ -void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ +#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ - off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} + +#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } DO_ST1_ZPZ_S(bs, zsu, MO_8) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 04eda9a126..f318ca265f 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -5261,7 +5261,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) */ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, - int scale, TCGv_i64 scalar, int msz, + int scale, TCGv_i64 scalar, int msz, bool is_write, gen_helper_gvec_mem_scatter *fn) { unsigned vsz = vec_full_reg_size(s); @@ -5269,8 +5269,16 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); TCGv_i32 t_desc; - int desc; + int desc = 0; + if (s->mte_active[0]) { + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc <<= SVE_MTEDESC_SHIFT; + } desc = simd_desc(vsz, vsz, scale); t_desc = tcg_const_i32(desc); @@ -5285,176 +5293,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, tcg_temp_free_i32(t_desc); } -/* Indexed by [be][ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_le_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_le_zsu, - gen_helper_sve_ldss_le_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_le_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_le_zss, - gen_helper_sve_ldss_le_zss, } } }, +/* Indexed by [mte][be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const +gather_load_fn32[2][2][2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_le_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_le_zsu, - gen_helper_sve_ldffss_le_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_le_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_le_zss, - gen_helper_sve_ldffss_le_zss, } } } }, + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, - /* Big-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_be_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_be_zsu, - gen_helper_sve_ldss_be_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_be_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_be_zss, - gen_helper_sve_ldss_be_zss, } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_be_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_be_zsu, - gen_helper_sve_ldffss_be_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_be_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_be_zss, - gen_helper_sve_ldffss_be_zss, } } } }, + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_le_zsu_mte, + gen_helper_sve_ldss_le_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_le_zss_mte, + gen_helper_sve_ldss_le_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_le_zsu_mte, + gen_helper_sve_ldffss_le_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_le_zss_mte, + gen_helper_sve_ldffss_le_zss_mte, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_be_zsu_mte, + gen_helper_sve_ldss_be_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_be_zss_mte, + gen_helper_sve_ldss_be_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_be_zsu_mte, + gen_helper_sve_ldffss_be_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_be_zss_mte, + gen_helper_sve_ldffss_be_zss_mte, } } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_le_zsu, - gen_helper_sve_ldsds_le_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_le_zsu, - gen_helper_sve_ldsdu_le_zsu, - gen_helper_sve_lddd_le_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_le_zss, - gen_helper_sve_ldsds_le_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_le_zss, - gen_helper_sve_ldsdu_le_zss, - gen_helper_sve_lddd_le_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_le_zd, - gen_helper_sve_ldsds_le_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_le_zd, - gen_helper_sve_ldsdu_le_zd, - gen_helper_sve_lddd_le_zd, } } }, +static gen_helper_gvec_mem_scatter * const +gather_load_fn64[2][2][2][3][2][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_le_zsu, - gen_helper_sve_ldffsds_le_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_le_zsu, - gen_helper_sve_ldffsdu_le_zsu, - gen_helper_sve_ldffdd_le_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_le_zss, - gen_helper_sve_ldffsds_le_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_le_zss, - gen_helper_sve_ldffsdu_le_zss, - gen_helper_sve_ldffdd_le_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_le_zd, - gen_helper_sve_ldffsds_le_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_le_zd, - gen_helper_sve_ldffsdu_le_zd, - gen_helper_sve_ldffdd_le_zd, } } } }, + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, - /* Big-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_be_zsu, - gen_helper_sve_ldsds_be_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_be_zsu, - gen_helper_sve_ldsdu_be_zsu, - gen_helper_sve_lddd_be_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_be_zss, - gen_helper_sve_ldsds_be_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_be_zss, - gen_helper_sve_ldsdu_be_zss, - gen_helper_sve_lddd_be_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_be_zd, - gen_helper_sve_ldsds_be_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_be_zd, - gen_helper_sve_ldsdu_be_zd, - gen_helper_sve_lddd_be_zd, } } }, + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_le_zsu_mte, + gen_helper_sve_ldsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_le_zsu_mte, + gen_helper_sve_ldsdu_le_zsu_mte, + gen_helper_sve_lddd_le_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_le_zss_mte, + gen_helper_sve_ldsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_le_zss_mte, + gen_helper_sve_ldsdu_le_zss_mte, + gen_helper_sve_lddd_le_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_le_zd_mte, + gen_helper_sve_ldsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_le_zd_mte, + gen_helper_sve_ldsdu_le_zd_mte, + gen_helper_sve_lddd_le_zd_mte, } } }, - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_be_zsu, - gen_helper_sve_ldffsds_be_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_be_zsu, - gen_helper_sve_ldffsdu_be_zsu, - gen_helper_sve_ldffdd_be_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_be_zss, - gen_helper_sve_ldffsds_be_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_be_zss, - gen_helper_sve_ldffsdu_be_zss, - gen_helper_sve_ldffdd_be_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_be_zd, - gen_helper_sve_ldffsds_be_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_be_zd, - gen_helper_sve_ldffsdu_be_zd, - gen_helper_sve_ldffdd_be_zd, } } } }, + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_le_zsu_mte, + gen_helper_sve_ldffsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_le_zsu_mte, + gen_helper_sve_ldffsdu_le_zsu_mte, + gen_helper_sve_ldffdd_le_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_le_zss_mte, + gen_helper_sve_ldffsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_le_zss_mte, + gen_helper_sve_ldffsdu_le_zss_mte, + gen_helper_sve_ldffdd_le_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_le_zd_mte, + gen_helper_sve_ldffsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_le_zd_mte, + gen_helper_sve_ldffsdu_le_zd_mte, + gen_helper_sve_ldffdd_le_zd_mte, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_be_zsu_mte, + gen_helper_sve_ldsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_be_zsu_mte, + gen_helper_sve_ldsdu_be_zsu_mte, + gen_helper_sve_lddd_be_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_be_zss_mte, + gen_helper_sve_ldsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_be_zss_mte, + gen_helper_sve_ldsdu_be_zss_mte, + gen_helper_sve_lddd_be_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_be_zd_mte, + gen_helper_sve_ldsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_be_zd_mte, + gen_helper_sve_ldsdu_be_zd_mte, + gen_helper_sve_lddd_be_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_be_zsu_mte, + gen_helper_sve_ldffsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_be_zsu_mte, + gen_helper_sve_ldffsdu_be_zsu_mte, + gen_helper_sve_ldffdd_be_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_be_zss_mte, + gen_helper_sve_ldffsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_be_zss_mte, + gen_helper_sve_ldffsdu_be_zss_mte, + gen_helper_sve_ldffdd_be_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_be_zd_mte, + gen_helper_sve_ldffsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_be_zd_mte, + gen_helper_sve_ldffsdu_be_zd_mte, + gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (!sve_access_check(s)) { return true; @@ -5462,23 +5633,24 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, false, fn); return true; } static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5490,10 +5662,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5502,63 +5674,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); tcg_temp_free_i64(imm); return true; } -/* Indexed by [be][xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { - /* Little-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_le_zsu, - gen_helper_sve_stss_le_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_le_zss, - gen_helper_sve_stss_le_zss, } }, - /* Big-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_be_zsu, - gen_helper_sve_stss_be_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_be_zss, - gen_helper_sve_stss_be_zss, } }, +/* Indexed by [mte][be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } } }, + { /* MTE Active */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_le_zsu_mte, + gen_helper_sve_stss_le_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_le_zss_mte, + gen_helper_sve_stss_le_zss_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_be_zsu_mte, + gen_helper_sve_stss_be_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_be_zss_mte, + gen_helper_sve_stss_be_zss_mte, } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { - /* Little-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_le_zsu, - gen_helper_sve_stsd_le_zsu, - gen_helper_sve_stdd_le_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_le_zss, - gen_helper_sve_stsd_le_zss, - gen_helper_sve_stdd_le_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_le_zd, - gen_helper_sve_stsd_le_zd, - gen_helper_sve_stdd_le_zd, } }, - /* Big-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_be_zsu, - gen_helper_sve_stsd_be_zsu, - gen_helper_sve_stdd_be_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_be_zss, - gen_helper_sve_stsd_be_zss, - gen_helper_sve_stdd_be_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_be_zd, - gen_helper_sve_stsd_be_zd, - gen_helper_sve_stdd_be_zd, } }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } } }, + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_le_zsu_mte, + gen_helper_sve_stsd_le_zsu_mte, + gen_helper_sve_stdd_le_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_le_zss_mte, + gen_helper_sve_stsd_le_zss_mte, + gen_helper_sve_stdd_le_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_le_zd_mte, + gen_helper_sve_stsd_le_zd_mte, + gen_helper_sve_stdd_le_zd_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_be_zsu_mte, + gen_helper_sve_stsd_be_zsu_mte, + gen_helper_sve_stdd_be_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_be_zss_mte, + gen_helper_sve_stsd_be_zss_mte, + gen_helper_sve_stdd_be_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_be_zd_mte, + gen_helper_sve_stsd_be_zd_mte, + gen_helper_sve_stdd_be_zd_mte, } } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5568,23 +5785,24 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][a->xs][a->msz]; + fn = scatter_store_fn32[mte][be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][a->xs][a->msz]; + fn = scatter_store_fn64[mte][be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, true, fn); return true; } static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5596,10 +5814,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][0][a->msz]; + fn = scatter_store_fn32[mte][be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][2][a->msz]; + fn = scatter_store_fn64[mte][be][2][a->msz]; break; } assert(fn != NULL); @@ -5608,7 +5826,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); tcg_temp_free_i64(imm); return true; } From c4af8ba19b9d22aac79cab679a20b159af9d6809 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:36 -0700 Subject: [PATCH 50/57] target/arm: Complete TBI clearing for user-only for SVE There are a number of paths by which the TBI is still intact for user-only in the SVE helpers. Because we currently always set TBI for user-only, we do not need to pass down the actual TBI setting from above, and we can remove the top byte in the inner-most primitives, so that none are forgotten. Moreover, this keeps the "dirty" pointer around at the higher levels, where we need it for any MTE checking. Since the normal case, especially for user-only, goes through RAM, this clearing merely adds two insns per page lookup, which will be completely in the noise. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-39-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.c | 3 +++ target/arm/sve_helper.c | 19 +++++++++++++++++-- target/arm/translate-a64.c | 5 +++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d9876337c0..afe81e9b6c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -203,6 +203,9 @@ static void arm_cpu_reset(DeviceState *dev) * Enable TBI0 and TBI1. While the real kernel only enables TBI0, * turning on both here will produce smaller code and otherwise * make no difference to the user-level emulation. + * + * In sve_probe_page, we assume that this is set. + * Do not modify this without other changes. */ env->cp15.tcr_el[1].raw_tcr = (3ULL << 37); #else diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index ad974c2cc5..382fa82bc8 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -3966,14 +3966,16 @@ static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ target_ulong addr, uintptr_t ra) \ { \ - *(TYPEE *)(vd + H(reg_off)) = (TYPEM)TLB(env, addr, ra); \ + *(TYPEE *)(vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ } #define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ target_ulong addr, uintptr_t ra) \ { \ - TLB(env, addr, (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ } #define DO_LD_PRIM_1(NAME, H, TE, TM) \ @@ -4091,6 +4093,19 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault, int flags; addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, &info->host, retaddr); info->flags = flags; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e46c4a49e0..c20af6ee9d 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14634,6 +14634,11 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->features = env->features; dc->dcz_blocksize = arm_cpu->dcz_blocksize; +#ifdef CONFIG_USER_ONLY + /* In sve_probe_page, we assume TBI is enabled. */ + tcg_debug_assert(dc->tbid & 1); +#endif + /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: * generate code with no special handling for single-stepping (except From eb821168db798302bd124a3b000cebc23bd0a395 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:37 -0700 Subject: [PATCH 51/57] target/arm: Implement data cache set allocation tags This is DC GVA and DC GZVA, and the tag check for DC ZVA. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-40-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 4 +++- target/arm/helper.c | 16 ++++++++++++++++ target/arm/translate-a64.c | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 3bf0518ca4..513c38970c 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2360,7 +2360,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300) #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400) #define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500) -#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA +#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600) +#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700) +#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA #define ARM_CP_FPU 0x1000 #define ARM_CP_SVE 0x2000 #define ARM_CP_NO_GDB 0x4000 diff --git a/target/arm/helper.c b/target/arm/helper.c index 44a3f9fb48..23cf44fcf4 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6998,6 +6998,22 @@ static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_GVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3, + .access = PL0_W, .type = ARM_CP_DC_GVA, +#ifndef CONFIG_USER_ONLY + /* Avoid overhead of an access check that always passes in user-mode */ + .accessfn = aa64_zva_access, +#endif + }, + { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_DC_GZVA, +#ifndef CONFIG_USER_ONLY + /* Avoid overhead of an access check that always passes in user-mode */ + .accessfn = aa64_zva_access, +#endif + }, REGINFO_SENTINEL }; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index c20af6ee9d..73d753f11f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1874,6 +1874,45 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } gen_helper_dc_zva(cpu_env, tcg_rt); return; + case ARM_CP_DC_GVA: + { + TCGv_i64 clean_addr, tag; + + /* + * DC_GVA, like DC_ZVA, requires that we supply the original + * pointer for an invalid page. Probe that address first. + */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(); + tcg_gen_shri_i64(tag, tcg_rt, 56); + gen_helper_stzgm_tags(cpu_env, clean_addr, tag); + tcg_temp_free_i64(tag); + } + } + return; + case ARM_CP_DC_GZVA: + { + TCGv_i64 clean_addr, tag; + + /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_helper_dc_zva(cpu_env, clean_addr); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(); + tcg_gen_shri_i64(tag, tcg_rt, 56); + gen_helper_stzgm_tags(cpu_env, clean_addr, tag); + tcg_temp_free_i64(tag); + } + } + return; default: break; } From 34669338bd9d66255fceaa84c314251ca49ca8d5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:38 -0700 Subject: [PATCH 52/57] target/arm: Set PSTATE.TCO on exception entry D1.10 specifies that exception handlers begin with tag checks overridden. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-41-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/arm/helper.c b/target/arm/helper.c index 23cf44fcf4..d220612a20 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -9704,6 +9704,9 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) break; } } + if (cpu_isar_feature(aa64_mte, cpu)) { + new_mode |= PSTATE_TCO; + } pstate_write(env, PSTATE_DAIF | new_mode); env->aarch64 = 1; From 7e98e21c09871cddc20946c8f3f3595e93154ecb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:39 -0700 Subject: [PATCH 53/57] target/arm: Always pass cacheattr to get_phys_addr We need to check the memattr of a page in order to determine whether it is Tagged for MTE. Between Stage1 and Stage2, this becomes simpler if we always collect this data, instead of occasionally being presented with NULL. Use the nonnull attribute to allow the compiler to check that all pointer arguments are non-null. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-42-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 60 ++++++++++++++++++++--------------------- target/arm/internals.h | 3 ++- target/arm/m_helper.c | 11 +++++--- target/arm/tlb_helper.c | 4 ++- 4 files changed, 42 insertions(+), 36 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index d220612a20..2072db2f92 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -44,7 +44,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + __attribute__((nonnull)); #endif static void switch_mode(CPUARMState *env, int mode); @@ -11101,19 +11102,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, arm_tlb_bti_gp(txattrs) = true; } - if (cacheattrs != NULL) { - if (mmu_idx == ARMMMUIdx_Stage2) { - cacheattrs->attrs = convert_stage2_attrs(env, - extract32(attrs, 0, 4)); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); - } - cacheattrs->shareability = extract32(attrs, 6, 2); + if (mmu_idx == ARMMMUIdx_Stage2) { + cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4)); + } else { + /* Index into MAIR registers for cache attributes */ + uint8_t attrindx = extract32(attrs, 0, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + cacheattrs->attrs = extract64(mair, attrindx * 8, 8); } + cacheattrs->shareability = extract32(attrs, 6, 2); *phys_ptr = descaddr; *page_size_ptr = page_size; @@ -11948,28 +11946,29 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2, mmu_idx == ARMMMUIdx_E10_0, phys_ptr, attrs, &s2_prot, - page_size, fi, - cacheattrs != NULL ? &cacheattrs2 : NULL); + page_size, fi, &cacheattrs2); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ *prot &= s2_prot; - /* Combine the S1 and S2 cache attributes, if needed */ - if (!ret && cacheattrs != NULL) { - if (env->cp15.hcr_el2 & HCR_DC) { - /* - * HCR.DC forces the first stage attributes to - * Normal Non-Shareable, - * Inner Write-Back Read-Allocate Write-Allocate, - * Outer Write-Back Read-Allocate Write-Allocate. - */ - cacheattrs->attrs = 0xff; - cacheattrs->shareability = 0; - } - *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + /* If S2 fails, return early. */ + if (ret) { + return ret; } - return ret; + /* Combine the S1 and S2 cache attributes. */ + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + */ + cacheattrs->attrs = 0xff; + cacheattrs->shareability = 0; + } + *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + return 0; } else { /* * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. @@ -12094,11 +12093,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, bool ret; ARMMMUFaultInfo fi = {}; ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMCacheAttrs cacheattrs = {}; *attrs = (MemTxAttrs) {}; ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr, - attrs, &prot, &page_size, &fi, NULL); + attrs, &prot, &page_size, &fi, &cacheattrs); if (ret) { return -1; diff --git a/target/arm/internals.h b/target/arm/internals.h index 3306c4f829..ae99725d2b 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1294,7 +1294,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + __attribute__((nonnull)); void arm_log_exception(int idx); diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 5e8a795d20..036454234c 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -187,12 +187,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -279,13 +280,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; uint32_t value; if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -1928,6 +1930,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, V8M_SAttributes sattrs = {}; MemTxAttrs attrs = {}; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; MemTxResult txres; target_ulong page_size; hwaddr physaddr; @@ -1945,8 +1948,8 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, "...really SecureFault with SFSR.INVEP\n"); return false; } - if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, - &physaddr, &attrs, &prot, &page_size, &fi, NULL)) { + if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &physaddr, + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 522a6442a4..89d90465a3 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -166,6 +166,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, int prot, ret; MemTxAttrs attrs = {}; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; /* * Walk the page table and (if the mapping exists) add the page @@ -175,7 +176,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, */ ret = get_phys_addr(&cpu->env, address, access_type, core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &phys_addr, &attrs, &prot, &page_size, &fi, NULL); + &phys_addr, &attrs, &prot, &page_size, + &fi, &cacheattrs); if (likely(!ret)) { /* * Map a single [sub]page. Regions smaller than our declared From 337a03f07ff0f9e6295662f4094e03a045b60bdc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:40 -0700 Subject: [PATCH 54/57] target/arm: Cache the Tagged bit for a page in MemTxAttrs This "bit" is a particular value of the page's MemAttr. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-43-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 48 ++++++++++++++++++++++++++++++++++++++--- target/arm/tlb_helper.c | 5 +++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 2072db2f92..dc9c29f998 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11834,9 +11834,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) */ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) { - uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4); - uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4); + uint8_t s1lo, s2lo, s1hi, s2hi; ARMCacheAttrs ret; + bool tagged = false; + + if (s1.attrs == 0xf0) { + tagged = true; + s1.attrs = 0xff; + } + + s1lo = extract32(s1.attrs, 0, 4); + s2lo = extract32(s2.attrs, 0, 4); + s1hi = extract32(s1.attrs, 4, 4); + s2hi = extract32(s2.attrs, 4, 4); /* Combine shareability attributes (table D4-43) */ if (s1.shareability == 2 || s2.shareability == 2) { @@ -11884,6 +11894,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) } } + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ + if (tagged && ret.attrs == 0xff) { + ret.attrs = 0xf0; + } + return ret; } @@ -11963,8 +11978,11 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, * Normal Non-Shareable, * Inner Write-Back Read-Allocate Write-Allocate, * Outer Write-Back Read-Allocate Write-Allocate. + * Do not overwrite Tagged within attrs. */ - cacheattrs->attrs = 0xff; + if (cacheattrs->attrs != 0xf0) { + cacheattrs->attrs = 0xff; + } cacheattrs->shareability = 0; } *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); @@ -12029,6 +12047,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* Definitely a real MMU, not an MPU */ if (regime_translation_disabled(env, mmu_idx)) { + uint64_t hcr; + uint8_t memattr; + /* * MMU disabled. S1 addresses within aa64 translation regimes are * still checked for bounds -- see AArch64.TranslateAddressS1Off. @@ -12066,6 +12087,27 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; *page_size = TARGET_PAGE_SIZE; + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + hcr = arm_hcr_el2_eff(env); + cacheattrs->shareability = 0; + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } else if (access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + cacheattrs->shareability = 2; /* outer sharable */ + } else { + memattr = 0x00; /* Device, nGnRnE */ + } + cacheattrs->attrs = memattr; return 0; } diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 89d90465a3..b35dc8a011 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -188,6 +188,11 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, phys_addr &= TARGET_PAGE_MASK; address &= TARGET_PAGE_MASK; } + /* Notice and record tagged memory. */ + if (cpu_isar_feature(aa64_mte, cpu) && cacheattrs.attrs == 0xf0) { + arm_tlb_mte_tagged(&attrs) = true; + } + tlb_set_page_with_attrs(cs, address, phys_addr, attrs, prot, mmu_idx, page_size); return true; From 8bce44a2f6beb388a3f157652b46e99929839a96 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:41 -0700 Subject: [PATCH 55/57] target/arm: Create tagged ram when MTE is enabled Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20200626033144.790098-44-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- hw/arm/virt.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++-- target/arm/cpu.c | 52 +++++++++++++++++++++++++++++++++++++++++---- target/arm/cpu.h | 6 ++++++ 3 files changed, 107 insertions(+), 6 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 402c362c14..22ce6d6199 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1390,8 +1390,19 @@ static void create_platform_bus(VirtMachineState *vms) sysbus_mmio_get_region(s, 0)); } +static void create_tag_ram(MemoryRegion *tag_sysmem, + hwaddr base, hwaddr size, + const char *name) +{ + MemoryRegion *tagram = g_new(MemoryRegion, 1); + + memory_region_init_ram(tagram, NULL, name, size / 32, &error_fatal); + memory_region_add_subregion(tag_sysmem, base / 32, tagram); +} + static void create_secure_ram(VirtMachineState *vms, - MemoryRegion *secure_sysmem) + MemoryRegion *secure_sysmem, + MemoryRegion *secure_tag_sysmem) { MemoryRegion *secram = g_new(MemoryRegion, 1); char *nodename; @@ -1409,6 +1420,10 @@ static void create_secure_ram(VirtMachineState *vms, qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled"); qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay"); + if (secure_tag_sysmem) { + create_tag_ram(secure_tag_sysmem, base, size, "mach-virt.secure-tag"); + } + g_free(nodename); } @@ -1665,6 +1680,8 @@ static void machvirt_init(MachineState *machine) const CPUArchIdList *possible_cpus; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *secure_sysmem = NULL; + MemoryRegion *tag_sysmem = NULL; + MemoryRegion *secure_tag_sysmem = NULL; int n, virt_max_cpus; bool firmware_loaded; bool aarch64 = true; @@ -1819,6 +1836,35 @@ static void machvirt_init(MachineState *machine) "secure-memory", &error_abort); } + /* + * The cpu adds the property if and only if MemTag is supported. + * If it is, we must allocate the ram to back that up. + */ + if (object_property_find(cpuobj, "tag-memory", NULL)) { + if (!tag_sysmem) { + tag_sysmem = g_new(MemoryRegion, 1); + memory_region_init(tag_sysmem, OBJECT(machine), + "tag-memory", UINT64_MAX / 32); + + if (vms->secure) { + secure_tag_sysmem = g_new(MemoryRegion, 1); + memory_region_init(secure_tag_sysmem, OBJECT(machine), + "secure-tag-memory", UINT64_MAX / 32); + + /* As with ram, secure-tag takes precedence over tag. */ + memory_region_add_subregion_overlap(secure_tag_sysmem, 0, + tag_sysmem, -1); + } + } + + object_property_set_link(cpuobj, OBJECT(tag_sysmem), + "tag-memory", &error_abort); + if (vms->secure) { + object_property_set_link(cpuobj, OBJECT(secure_tag_sysmem), + "secure-tag-memory", &error_abort); + } + } + qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); object_unref(cpuobj); } @@ -1857,10 +1903,15 @@ static void machvirt_init(MachineState *machine) create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); if (vms->secure) { - create_secure_ram(vms, secure_sysmem); + create_secure_ram(vms, secure_sysmem, secure_tag_sysmem); create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); } + if (tag_sysmem) { + create_tag_ram(tag_sysmem, vms->memmap[VIRT_MEM].base, + machine->ram_size, "mach-virt.tag"); + } + vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64); create_rtc(vms); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index afe81e9b6c..5050e1843a 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1252,6 +1252,25 @@ void arm_cpu_post_init(Object *obj) if (kvm_enabled()) { kvm_arm_add_vcpu_properties(obj); } + +#ifndef CONFIG_USER_ONLY + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && + cpu_isar_feature(aa64_mte, cpu)) { + object_property_add_link(obj, "tag-memory", + TYPE_MEMORY_REGION, + (Object **)&cpu->tag_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_STRONG); + + if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { + object_property_add_link(obj, "secure-tag-memory", + TYPE_MEMORY_REGION, + (Object **)&cpu->secure_tag_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_STRONG); + } + } +#endif } static void arm_cpu_finalizefn(Object *obj) @@ -1741,18 +1760,43 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY MachineState *ms = MACHINE(qdev_get_machine()); unsigned int smp_cpus = ms->smp.cpus; + bool has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); - if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) { - cs->num_ases = 2; + /* + * We must set cs->num_ases to the final value before + * the first call to cpu_address_space_init. + */ + if (cpu->tag_memory != NULL) { + cs->num_ases = 3 + has_secure; + } else { + cs->num_ases = 1 + has_secure; + } + if (has_secure) { if (!cpu->secure_memory) { cpu->secure_memory = cs->memory; } cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory", cpu->secure_memory); - } else { - cs->num_ases = 1; } + + if (cpu->tag_memory != NULL) { + cpu_address_space_init(cs, ARMASIdx_TagNS, "cpu-tag-memory", + cpu->tag_memory); + if (has_secure) { + cpu_address_space_init(cs, ARMASIdx_TagS, "cpu-tag-memory", + cpu->secure_tag_memory); + } + } else if (cpu_isar_feature(aa64_mte, cpu)) { + /* + * Since there is no tag memory, we can't meaningfully support MTE + * to its fullest. To avoid problems later, when we would come to + * use the tag memory, downgrade support to insns only. + */ + cpu->isar.id_aa64pfr1 = + FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + } + cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory); /* No core_count specified, default to smp_cpus. */ diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 513c38970c..cf99dcca9f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -792,6 +792,10 @@ struct ARMCPU { /* MemoryRegion to use for secure physical accesses */ MemoryRegion *secure_memory; + /* MemoryRegion to use for allocation tag accesses */ + MemoryRegion *tag_memory; + MemoryRegion *secure_tag_memory; + /* For v8M, pointer to the IDAU interface provided by board/SoC */ Object *idau; @@ -2985,6 +2989,8 @@ typedef enum ARMMMUIdxBit { typedef enum ARMASIdx { ARMASIdx_NS = 0, ARMASIdx_S = 1, + ARMASIdx_TagNS = 2, + ARMASIdx_TagS = 3, } ARMASIdx; /* Return the Exception Level targeted by debug exceptions. */ From e4d5bf4fbd5abfc3727e711eda64a583cab4d637 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:42 -0700 Subject: [PATCH 56/57] target/arm: Add allocation tag storage for system mode Look up the physical address for the given virtual address, convert that to a tag physical address, and finally return the host address that backs it. Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-45-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 131 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 4f9bd3add3..5ea57d487a 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "internals.h" #include "exec/exec-all.h" +#include "exec/ram_addr.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -74,8 +75,138 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, int ptr_size, MMUAccessType tag_access, int tag_size, uintptr_t ra) { +#ifdef CONFIG_USER_ONLY /* Tag storage not implemented. */ return NULL; +#else + uintptr_t index; + CPUIOTLBEntry *iotlbentry; + int in_page, flags; + ram_addr_t ptr_ra; + hwaddr ptr_paddr, tag_paddr, xlat; + MemoryRegion *mr; + ARMASIdx tag_asi; + AddressSpace *tag_as; + void *host; + + /* + * Probe the first byte of the virtual address. This raises an + * exception for inaccessible pages, and resolves the virtual address + * into the softmmu tlb. + * + * When RA == 0, this is for mte_probe1. The page is expected to be + * valid. Indicate to probe_access_flags no-fault, then assert that + * we received a valid page. + */ + flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, + ra == 0, &host, ra); + assert(!(flags & TLB_INVALID_MASK)); + + /* + * Find the iotlbentry for ptr. This *must* be present in the TLB + * because we just found the mapping. + * TODO: Perhaps there should be a cputlb helper that returns a + * matching tlb entry + iotlb entry. + */ + index = tlb_index(env, ptr_mmu_idx, ptr); +# ifdef CONFIG_DEBUG_TCG + { + CPUTLBEntry *entry = tlb_entry(env, ptr_mmu_idx, ptr); + target_ulong comparator = (ptr_access == MMU_DATA_LOAD + ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, ptr)); + } +# endif + iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; + + /* If the virtual page MemAttr != Tagged, access unchecked. */ + if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { + return NULL; + } + + /* + * If not backed by host ram, there is no tag storage: access unchecked. + * This is probably a guest os bug though, so log it. + */ + if (unlikely(flags & TLB_MMIO)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " + "but is not backed by host ram\n", ptr); + return NULL; + } + + /* + * The Normal memory access can extend to the next page. E.g. a single + * 8-byte access to the last byte of a page will check only the last + * tag on the first page. + * Any page access exception has priority over tag check exception. + */ + in_page = -(ptr | TARGET_PAGE_MASK); + if (unlikely(ptr_size > in_page)) { + void *ignore; + flags |= probe_access_flags(env, ptr + in_page, ptr_access, + ptr_mmu_idx, ra == 0, &ignore, ra); + assert(!(flags & TLB_INVALID_MASK)); + } + + /* Any debug exception has priority over a tag check exception. */ + if (unlikely(flags & TLB_WATCHPOINT)) { + int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; + assert(ra != 0); + cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, + iotlbentry->attrs, wp, ra); + } + + /* + * Find the physical address within the normal mem space. + * The memory region lookup must succeed because TLB_MMIO was + * not set in the cputlb lookup above. + */ + mr = memory_region_from_host(host, &ptr_ra); + tcg_debug_assert(mr != NULL); + tcg_debug_assert(memory_region_is_ram(mr)); + ptr_paddr = ptr_ra; + do { + ptr_paddr += mr->addr; + mr = mr->container; + } while (mr); + + /* Convert to the physical address in tag space. */ + tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); + + /* Look up the address in tag space. */ + tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; + tag_as = cpu_get_address_space(env_cpu(env), tag_asi); + mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, + tag_access == MMU_DATA_STORE, + iotlbentry->attrs); + + /* + * Note that @mr will never be NULL. If there is nothing in the address + * space at @tag_paddr, the translation will return the unallocated memory + * region. For our purposes, the result must be ram. + */ + if (unlikely(!memory_region_is_ram(mr))) { + /* ??? Failure is a board configuration error. */ + qemu_log_mask(LOG_UNIMP, + "Tag Memory @ 0x%" HWADDR_PRIx " not found for " + "Normal Memory @ 0x%" HWADDR_PRIx "\n", + tag_paddr, ptr_paddr); + return NULL; + } + + /* + * Ensure the tag memory is dirty on write, for migration. + * Tag memory can never contain code or display memory (vga). + */ + if (tag_access == MMU_DATA_STORE) { + ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; + cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + } + + return memory_region_get_ram_ptr(mr) + xlat; +#endif } uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) From c7459633baa71d1781fde4a245d6ec9ce2f008cf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:43 -0700 Subject: [PATCH 57/57] target/arm: Enable MTE We now implement all of the components of MTE, without actually supporting any tagged memory. All MTE instructions will work, trivially, so we can enable support. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-46-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu64.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index a0c1d8894b..a2f4733eed 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -654,6 +654,11 @@ static void aarch64_max_initfn(Object *obj) t = cpu->isar.id_aa64pfr1; t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); + /* + * Begin with full support for MTE; will be downgraded to MTE=1 + * during realize if the board provides no tag memory. + */ + t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2); cpu->isar.id_aa64pfr1 = t; t = cpu->isar.id_aa64mmfr1;