; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0) ret <8 x i16> %0 } declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.s32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0) ret <4 x i32> %0 } declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32) define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0) ret <16 x i8> %0 } declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32) define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1) ret <8 x i16> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1) ret <4 x i32> %0 } define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) { ; CHECK-LABEL: test_vldrbq_gather_offset_u8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1) ret <16 x i8> %0 } define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.s16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1) ret <8 x i16> %2 } declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.s32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1) ret <4 x i32> %2 } declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>) define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1) ret <16 x i8> %2 } declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>) define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1) ret <8 x i16> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrbq_gather_offset_z_u8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1) ret <16 x i8> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [q0, #616] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) ret <2 x i64> %0 } declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [q0, #-336] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336) ret <2 x i64> %0 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrd.u64 q0, [q1, #576]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 store <2 x i64> %2, <2 x i64>* %addr, align 8 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 ret <2 x i64> %3 } declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328) %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 store <2 x i64> %2, <2 x i64>* %addr, align 8 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 ret <2 x i64> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 store <2 x i64> %4, <2 x i64>* %addr, align 8 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 ret <2 x i64> %5 } declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2) %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 store <2 x i64> %4, <2 x i64>* %addr, align 8 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 ret <2 x i64> %5 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_base_z_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [q0, #888] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) ret <2 x i64> %2 } declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_base_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) { ; CHECK-LABEL: test_vldrdq_gather_offset_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0) ret <2 x i64> %0 } declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) { ; CHECK-LABEL: test_vldrdq_gather_offset_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1) ret <2 x i64> %0 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_offset_z_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) ret <2 x i64> %2 } declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>) define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_offset_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) { ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0) ret <2 x i64> %0 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) { ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1) ret <2 x i64> %0 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_offset_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) ret <8 x half> %0 } declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32) define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) ret <8 x i16> %0 } declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrhq_gather_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0) ret <4 x i32> %0 } declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32) define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1) ret <8 x i16> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrhq_gather_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1) ret <4 x i32> %0 } define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_offset_z_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) ret <8 x half> %2 } declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>) define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_offset_z_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) ret <8 x i16> %2 } declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_offset_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.s32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1) ret <4 x i32> %2 } declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>) define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_offset_z_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1) ret <8 x i16> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_offset_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) ret <8 x half> %0 } define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) ret <8 x i16> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0) ret <4 x i32> %0 } define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1) ret <8 x i16> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1) ret <4 x i32> %0 } define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) ret <8 x half> %2 } define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) ret <8 x i16> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1) ret <8 x i16> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [q0, #12] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12) ret <4 x float> %0 } declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [q0, #400] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400) ret <4 x i32> %0 } declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [q0, #284] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284) ret <4 x i32> %0 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64) %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1 store <4 x i32> %2, <4 x i32>* %addr, align 8 %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0 ret <4 x float> %3 } declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80) %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 store <4 x i32> %2, <4 x i32>* %addr, align 8 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 ret <4 x i32> %3 } declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [q1, #480]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480) %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 store <4 x i32> %2, <4 x i32>* %addr, align 8 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 ret <4 x i32> %3 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2) %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1 store <4 x i32> %4, <4 x i32>* %addr, align 8 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0 ret <4 x float> %5 } declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2) %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 store <4 x i32> %4, <4 x i32>* %addr, align 8 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 ret <4 x i32> %5 } declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2) %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 store <4 x i32> %4, <4 x i32>* %addr, align 8 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 ret <4 x i32> %5 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_z_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1) ret <4 x float> %2 } declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [q0, #440] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1) ret <4 x i32> %2 } declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_base_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [q0, #300] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_offset_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) ret <4 x float> %0 } declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) ret <4 x i32> %0 } declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1) ret <4 x i32> %0 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_offset_z_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) ret <4 x float> %2 } declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_offset_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) ret <4 x i32> %2 } declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>) define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_offset_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) ret <4 x float> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) ret <4 x i32> %0 } define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1) ret <4 x i32> %0 } define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) ret <4 x float> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1) ret <4 x i32> %2 } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.8 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.8 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.8 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { ; CHECK-LABEL: test_vstrbq_scatter_offset_u8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrb.8 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_base_p_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [q0, #888] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_base_p_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [q0, #264] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_base_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [q0, #408] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value) ret void } declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_base_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [q0, #-472] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q0, [q1, #248]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) store <2 x i64> %3, <2 x i64>* %addr, align 8 ret void } declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q0, [q1, #136]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2) store <2 x i64> %3, <2 x i64>* %addr, align 8 ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vstrd.64 q0, [q1, #208]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) store <2 x i64> %1, <2 x i64>* %addr, align 8 ret void } declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vstrd.64 q0, [q1, #-168]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value) store <2 x i64> %1, <2 x i64>* %addr, align 8 ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_offset_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32) define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_offset_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { ; CHECK-LABEL: test_vstrhq_scatter_offset_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrhq_scatter_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrhq_scatter_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32) define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrhq_scatter_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrhq_scatter_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) ret void } define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [q0, #380] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value) ret void } declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_p_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [q0, #-400] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [q0, #48] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [q0, #-376] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [q0, #156] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value) ret void } declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [q0, #212] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vstrw.32 q0, [q1, #-412]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value) store <4 x i32> %1, <4 x i32>* %addr, align 8 ret void } declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [q1, #236]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2) store <4 x i32> %3, <4 x i32>* %addr, align 8 ret void } declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [q1, #328]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2) store <4 x i32> %3, <4 x i32>* %addr, align 8 ret void } declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [q1, #412]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2) store <4 x i32> %3, <4 x i32>* %addr, align 8 ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vstrw.32 q0, [q1, #-152]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value) store <4 x i32> %1, <4 x i32>* %addr, align 8 ret void } declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vstrw.32 q0, [q1, #64]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value) store <4 x i32> %1, <4 x i32>* %addr, align 8 ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { ; CHECK-LABEL: test_vstrwq_scatter_offset_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32) define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) ret void } declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32) define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) ret void } define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] ; CHECK-NEXT: bx lr entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) ret void }