; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define @masked_gather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv2i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i32, i32* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv2i64(i64* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i64, i64* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i64( %ptrs, i32 8, %mask, undef) ret %vals } define @masked_gather_nxv2f16(half* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr half, half* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv2bf16(bfloat* %base, %offsets, %mask) #0 { ; CHECK-LABEL: masked_gather_nxv2bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr bfloat, bfloat* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2bf16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv2f32(float* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr float, float* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_gather_nxv2f64(double* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr double, double* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) ret %vals } define @masked_sgather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } define @masked_sgather_nxv2i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i32, i32* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled packed 32-bit offsets ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define @masked_gather_nxv4i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv4i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i32, i32* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4i32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_gather_nxv4f16(half* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr half, half* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4f16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv4bf16(bfloat* %base, %offsets, %mask) #0 { ; CHECK-LABEL: masked_gather_nxv4bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr bfloat, bfloat* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4bf16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv4f32(float* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr float, float* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4f32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_sgather_nxv4i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } declare @llvm.masked.gather.nxv2i16(, i32, , ) declare @llvm.masked.gather.nxv2i32(, i32, , ) declare @llvm.masked.gather.nxv2i64(, i32, , ) declare @llvm.masked.gather.nxv2f16(, i32, , ) declare @llvm.masked.gather.nxv2bf16(, i32, , ) declare @llvm.masked.gather.nxv2f32(, i32, , ) declare @llvm.masked.gather.nxv2f64(, i32, , ) declare @llvm.masked.gather.nxv4i16(, i32, , ) declare @llvm.masked.gather.nxv4i32(, i32, , ) declare @llvm.masked.gather.nxv4f16(, i32, , ) declare @llvm.masked.gather.nxv4bf16(, i32, , ) declare @llvm.masked.gather.nxv4f32(, i32, , ) attributes #0 = { "target-features"="+sve,+bf16" }