; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx2 -interleaved-access -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx512f -mattr=+avx512bw -mattr=+avx512vl -interleaved-access -S | FileCheck %s define <32 x i8> @interleaved_load_vf32_i8_stride3(<96 x i8>* %ptr){ ; CHECK-LABEL: @interleaved_load_vf32_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <96 x i8>* [[PTR:%.*]] to <16 x i8>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 128 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]], align 16 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 4 ; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]], align 16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 5 ; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]], align 16 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> [[TMP9]], <32 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP13]], <32 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP15]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> [[TMP17]], <32 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <32 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP19]], <32 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <32 x i8> [[TMP21]], <32 x i8> [[TMP20]], <32 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP22]], <32 x i8> [[TMP21]], <32 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <32 x i8> [[TMP20]], <32 x i8> [[TMP22]], <32 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <32 x i8> [[TMP24]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <32 x i8> [[TMP23]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <32 x i8> [[TMP27]], [[TMP26]] ; CHECK-NEXT: [[ADD2:%.*]] = add <32 x i8> [[TMP25]], [[ADD1]] ; CHECK-NEXT: ret <32 x i8> [[ADD2]] ; %wide.vec = load <96 x i8>, <96 x i8>* %ptr %v1 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %v2 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %v3 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %add1 = add <32 x i8> %v1, %v2 %add2 = add <32 x i8> %v3, %add1 ret <32 x i8> %add2 } define <16 x i8> @interleaved_load_vf16_i8_stride3(<48 x i8>* %ptr){ ; CHECK-LABEL: @interleaved_load_vf16_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <48 x i8>* [[PTR:%.*]] to <16 x i8>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP8]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP11]], <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> [[TMP13]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <16 x i8> [[TMP18]], [[TMP17]] ; CHECK-NEXT: [[ADD2:%.*]] = add <16 x i8> [[TMP16]], [[ADD1]] ; CHECK-NEXT: ret <16 x i8> [[ADD2]] ; %wide.vec = load <48 x i8>, <48 x i8>* %ptr %v1 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %v2 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %v3 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %add1 = add <16 x i8> %v1, %v2 %add2 = add <16 x i8> %v3, %add1 ret <16 x i8> %add2 } define <8 x i8> @interleaved_load_vf8_i8_stride3(<24 x i8>* %ptr){ ; CHECK-LABEL: @interleaved_load_vf8_i8_stride3( ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, <24 x i8>* [[PTR:%.*]], align 32 ; CHECK-NEXT: [[V1:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[V2:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[V3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <8 x i8> [[V1]], [[V2]] ; CHECK-NEXT: [[ADD2:%.*]] = add <8 x i8> [[V3]], [[ADD1]] ; CHECK-NEXT: ret <8 x i8> [[ADD2]] ; %wide.vec = load <24 x i8>, <24 x i8>* %ptr %v1 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %v2 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %v3 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %add1 = add <8 x i8> %v1, %v2 %add2 = add <8 x i8> %v3, %add1 ret <8 x i8> %add2 } define <64 x i8> @interleaved_load_vf64_i8_stride3(<192 x i8>* %ptr){ ; CHECK-LABEL: @interleaved_load_vf64_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <192 x i8>* [[PTR:%.*]] to <16 x i8>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 4 ; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 5 ; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 6 ; CHECK-NEXT: [[TMP15:%.*]] = load <16 x i8>, <16 x i8>* [[TMP14]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 7 ; CHECK-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 8 ; CHECK-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 9 ; CHECK-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 10 ; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 11 ; CHECK-NEXT: [[TMP25:%.*]] = load <16 x i8>, <16 x i8>* [[TMP24]], align 1 ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> [[TMP9]], <32 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP13]], <32 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP21]], <32 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP17]], <16 x i8> [[TMP23]], <32 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <16 x i8> [[TMP19]], <16 x i8> [[TMP25]], <32 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <32 x i8> [[TMP26]], <32 x i8> [[TMP29]], <64 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <32 x i8> [[TMP27]], <32 x i8> [[TMP30]], <64 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <32 x i8> [[TMP28]], <32 x i8> [[TMP31]], <64 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <64 x i8> [[TMP32]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <64 x i8> [[TMP33]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <64 x i8> [[TMP34]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <64 x i8> [[TMP37]], <64 x i8> [[TMP35]], <64 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <64 x i8> [[TMP35]], <64 x i8> [[TMP36]], <64 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <64 x i8> [[TMP36]], <64 x i8> [[TMP37]], <64 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <64 x i8> [[TMP39]], <64 x i8> [[TMP38]], <64 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <64 x i8> [[TMP40]], <64 x i8> [[TMP39]], <64 x i32> ; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <64 x i8> [[TMP38]], <64 x i8> [[TMP40]], <64 x i32> ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <64 x i8> [[TMP42]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <64 x i8> [[TMP41]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <64 x i8> [[TMP45]], [[TMP44]] ; CHECK-NEXT: [[ADD2:%.*]] = add <64 x i8> [[TMP43]], [[ADD1]] ; CHECK-NEXT: ret <64 x i8> [[ADD2]] ; %wide.vec = load <192 x i8>, <192 x i8>* %ptr, align 1 %v1 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %v2 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %v3 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %add1 = add <64 x i8> %v1, %v2 %add2 = add <64 x i8> %v3, %add1 ret <64 x i8> %add2 }