; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; ; ASHR - Immediate ; define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0) ret <2 x i64> %1 } define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) ret <2 x i64> %1 } define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) ret <2 x i64> %1 } define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0) ret <4 x i64> %1 } define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) ret <4 x i64> %1 } define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) ret <4 x i64> %1 } define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0) ret <32 x i16> %1 } define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) ret <32 x i16> %1 } define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) ret <32 x i16> %1 } define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0) ret <16 x i32> %1 } define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) ret <16 x i32> %1 } define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) ret <16 x i32> %1 } define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0) ret <8 x i64> %1 } define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) ret <8 x i64> %1 } define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) ret <8 x i64> %1 } ; ; LSHR - Immediate ; define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_64( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_64( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0) ret <2 x i64> %1 } define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) ret <2 x i64> %1 } define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_64( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64) ret <2 x i64> %1 } define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_64( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_64( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0) ret <4 x i64> %1 } define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) ret <4 x i64> %1 } define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_64( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64) ret <4 x i64> %1 } define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0) ret <32 x i16> %1 } define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) ret <32 x i16> %1 } define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_64( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64) ret <32 x i16> %1 } define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0) ret <16 x i32> %1 } define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) ret <16 x i32> %1 } define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_64( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64) ret <16 x i32> %1 } define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0) ret <8 x i64> %1 } define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) ret <8 x i64> %1 } define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_64( ; CHECK-NEXT: ret <8 x i64> zeroinitializer ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64) ret <8 x i64> %1 } ; ; SHL - Immediate ; define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_64( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_64( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0) ret <2 x i64> %1 } define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) ret <2 x i64> %1 } define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_64( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64) ret <2 x i64> %1 } define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_64( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_64( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0) ret <4 x i64> %1 } define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) ret <4 x i64> %1 } define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_64( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64) ret <4 x i64> %1 } define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0) ret <32 x i16> %1 } define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) ret <32 x i16> %1 } define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_64( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64) ret <32 x i16> %1 } define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0) ret <16 x i32> %1 } define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) ret <16 x i32> %1 } define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_64( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64) ret <16 x i32> %1 } define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0) ret <8 x i64> %1 } define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) ret <8 x i64> %1 } define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_64( ; CHECK-NEXT: ret <8 x i64> zeroinitializer ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64) ret <8 x i64> %1 } ; ; ASHR - Constant Vector ; define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer) ret <4 x i64> %1 } define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15_splat( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_64( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } ; ; LSHR - Constant Vector ; define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15_splat( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_64( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15_splat( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_64( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_64( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15_splat( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_64( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15_splat( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_64( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) ret <4 x i64> %1 } define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_64( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15_splat( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_64( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15_splat( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_64( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_64( ; CHECK-NEXT: ret <8 x i64> zeroinitializer ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } ; ; SHL - Constant Vector ; define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15_splat( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_64( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15_splat( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_64( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_64( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15_splat( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_64( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15_splat( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_64( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer) ret <4 x i64> %1 } define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_64( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_15_512_splat( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_64( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) ret <32 x i16> %1 } define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15_splat( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_64( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) ret <16 x i32> %1 } define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_15( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_64( ; CHECK-NEXT: ret <8 x i64> zeroinitializer ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) ret <8 x i64> %1 } ; ; ASHR - Constant Per-Element Vector ; define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 } define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrav_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) ret <16 x i32> %1 } define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrav_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrav_d_512_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1) ret <8 x i32> %2 } define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrav_d_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = insertelement <16 x i32> , i32 undef, i32 1 %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1) ret <16 x i32> %2 } define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_128_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_256_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 } define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_128_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_256_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 undef, i64 0 %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_512_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrav_q_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = insertelement <8 x i64> , i64 undef, i64 0 %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1) ret <8 x i64> %2 } define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_128_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_128_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = insertelement <8 x i16> , i16 undef, i64 0 %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_256_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_256_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = insertelement <16 x i16> , i16 undef, i64 0 %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1) ret <16 x i16> %2 } define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_512_allbig( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrav_w_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = insertelement <32 x i16> , i16 undef, i64 0 %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1) ret <32 x i16> %2 } ; ; LSHR - Constant Per-Element Vector ; define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 } define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[V:%.*]], <4 x i32> ) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[V:%.*]], <8 x i32> ) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_allbig( ; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_allbig( ; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1) ret <8 x i32> %2 } define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 } define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[V:%.*]], <2 x i64> ) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[V:%.*]], <4 x i64> ) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_allbig( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_allbig( ; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } ; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input. define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_undef( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrlv_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrlv_d_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[V:%.*]], <16 x i32> ) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrlv_d_512_allbig( ; CHECK-NEXT: ret <16 x i32> ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrlv_d_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = insertelement <16 x i32> , i32 undef, i32 1 %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1) ret <16 x i32> %2 } define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrlv_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrlv_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrlv_q_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[V:%.*]], <8 x i64> ) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrlv_q_512_allbig( ; CHECK-NEXT: ret <8 x i64> ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrlv_q_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = insertelement <8 x i64> , i64 undef, i64 0 %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1) ret <8 x i64> %2 } define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_128_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> [[V:%.*]], <8 x i16> ) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_128_allbig( ; CHECK-NEXT: ret <8 x i16> ; %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = insertelement <8 x i16> , i16 undef, i64 0 %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_256_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> [[V:%.*]], <16 x i16> ) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_256_allbig( ; CHECK-NEXT: ret <16 x i16> ; %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = insertelement <16 x i16> , i16 undef, i64 0 %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1) ret <16 x i16> %2 } define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[V:%.*]], <32 x i16> ) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_512_allbig( ; CHECK-NEXT: ret <32 x i16> ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrlv_w_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = insertelement <32 x i16> , i16 undef, i64 0 %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1) ret <32 x i16> %2 } ; ; SHL - Constant Per-Element Vector ; define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_0( ; CHECK-NEXT: ret <4 x i32> [[V:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_0( ; CHECK-NEXT: ret <8 x i32> [[V:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 } define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[V:%.*]], <4 x i32> ) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[V:%.*]], <8 x i32> ) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_allbig( ; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_allbig( ; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 } define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1) ret <8 x i32> %2 } define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_0( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_0( ; CHECK-NEXT: ret <4 x i64> [[V:%.*]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 } define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[V:%.*]], <2 x i64> ) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[V:%.*]], <4 x i64> ) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_allbig( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_allbig( ; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 } ; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input. define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_undef( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psllv_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[V:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psllv_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psllv_d_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[V:%.*]], <16 x i32> ) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psllv_d_512_allbig( ; CHECK-NEXT: ret <16 x i32> ; %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> ) ret <16 x i32> %1 } define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psllv_d_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = insertelement <16 x i32> , i32 undef, i32 1 %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1) ret <16 x i32> %2 } define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psllv_q_512_0( ; CHECK-NEXT: ret <8 x i64> [[V:%.*]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) ret <8 x i64> %1 } define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psllv_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psllv_q_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[V:%.*]], <8 x i64> ) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psllv_q_512_allbig( ; CHECK-NEXT: ret <8 x i64> ; %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> ) ret <8 x i64> %1 } define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psllv_q_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = insertelement <8 x i64> , i64 undef, i64 0 %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1) ret <8 x i64> %2 } define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_128_0( ; CHECK-NEXT: ret <8 x i16> [[V:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_128_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> [[V:%.*]], <8 x i16> ) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_128_allbig( ; CHECK-NEXT: ret <8 x i16> ; %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_128_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = insertelement <8 x i16> , i16 undef, i64 0 %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_256_0( ; CHECK-NEXT: ret <16 x i16> [[V:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_256_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> [[V:%.*]], <16 x i16> ) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_256_allbig( ; CHECK-NEXT: ret <16 x i16> ; %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_256_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = insertelement <16 x i16> , i16 undef, i64 0 %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1) ret <16 x i16> %2 } define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[V:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_512_big( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[V:%.*]], <32 x i16> ) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_512_allbig( ; CHECK-NEXT: ret <32 x i16> ; %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psllv_w_512_undef( ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = insertelement <32 x i16> , i16 undef, i64 0 %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1) ret <32 x i16> %2 } ; ; Vector Masked Shift Amounts ; define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psra_w_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = and <8 x i16> %a, %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psra_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <8 x i32> [[TMP3]] ; %1 = and <4 x i32> %a, %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] ; %1 = and <2 x i64> %a, %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psrl_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %1 = and <4 x i32> %a, %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrl_q_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i64> [[TMP3]] ; %1 = and <2 x i64> %a, %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psrl_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = and <8 x i16> %a, %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psll_q_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = and <2 x i64> %a, %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psll_w_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = and <8 x i16> %a, %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <16 x i32> @avx512_psll_d_512_masked(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psll_d_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <16 x i32> [[TMP3]] ; %1 = and <4 x i32> %a, %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrai_w_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = and i32 %a, 15 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 %1) ret <8 x i16> %2 } define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrai_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = and i32 %a, 31 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 %1) ret <8 x i32> %2 } define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrai_q_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] ; %1 = and i32 %a, 63 %2 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 %1) ret <8 x i64> %2 } define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) { ; CHECK-LABEL: @sse2_psrli_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = and i32 %a, 31 %2 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 %1) ret <4 x i32> %2 } define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) { ; CHECK-LABEL: @avx2_psrli_q_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <4 x i64> [[TMP3]] ; %1 = and i32 %a, 63 %2 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx512_psrli_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = and i32 %a, 15 %2 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 %1) ret <32 x i16> %2 } define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) { ; CHECK-LABEL: @sse2_pslli_q_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = and i32 %a, 63 %2 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 %1) ret <2 x i64> %2 } define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { ; CHECK-LABEL: @avx2_pslli_w_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = and i32 %a, 15 %2 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 %1) ret <16 x i16> %2 } define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { ; CHECK-LABEL: @avx512_pslli_d_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]] ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = and i32 %a, 31 %2 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 %1) ret <16 x i32> %2 } define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = and <4 x i32> %a, %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %1 = and <4 x i32> %a, %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2) ret <4 x i32> %3 } define <8 x i32> @avx2_psrav_d_256_masked(<8 x i32> %v, <8 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = and <8 x i32> %a, %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1) ret <8 x i32> %2 } define <32 x i16> @avx512_psrav_w_512_masked(<32 x i16> %v, <32 x i16> %a) { ; CHECK-LABEL: @avx512_psrav_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <32 x i16> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = and <32 x i16> %a, %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1) ret <32 x i16> %2 } define <2 x i64> @avx2_psrlv_q_128_masked(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrlv_q_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = and <2 x i64> %a, %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <8 x i32> @avx2_psrlv_d_256_masked(<8 x i32> %v, <8 x i32> %a) { ; CHECK-LABEL: @avx2_psrlv_d_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = and <8 x i32> %a, %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1) ret <8 x i32> %2 } define <8 x i64> @avx512_psrlv_q_512_masked(<8 x i64> %v, <8 x i64> %a) { ; CHECK-LABEL: @avx512_psrlv_q_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i64> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %1 = and <8 x i64> %a, %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1) ret <8 x i64> %2 } define <4 x i32> @avx2_psllv_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psllv_d_128_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = and <4 x i32> %a, %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <4 x i64> @avx2_psllv_q_256_masked(<4 x i64> %v, <4 x i64> %a) { ; CHECK-LABEL: @avx2_psllv_q_256_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = and <4 x i64> %a, %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psllv_w_512_masked(<32 x i16> %v, <32 x i16> %a) { ; CHECK-LABEL: @avx512_psllv_w_512_masked( ; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i16> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = and <32 x i16> %a, %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1) ret <32 x i16> %2 } ; ; Vector Demanded Bits ; define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psra_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psra_w_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <8 x i16> %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) ret <8 x i16> %3 } define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psra_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psra_d_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = bitcast <8 x i16> %1 to <4 x i32> %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) ret <4 x i32> %3 } define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psra_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psra_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_128_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_256_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psra_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psra_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psrl_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psrl_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psrl_q_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psrl_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { ; CHECK-LABEL: @avx2_psrl_w_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) ret <16 x i16> %3 } define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrl_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrl_d_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) ret <8 x i32> %3 } define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrl_q_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psrl_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) { ; CHECK-LABEL: @avx512_psrl_w_512_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2) ret <32 x i16> %3 } define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psrl_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psrl_d_512_var_bc( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2) ret <16 x i32> %3 } define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psrl_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psll_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psll_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psll_q_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psll_w_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psll_d_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psll_q_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psll_w_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psll_d_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psll_q_512_var( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } ; ; Constant Folding ; define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { ; CHECK-LABEL: @test_sse2_psra_w_0( ; CHECK-NEXT: ret <8 x i16> [[A:%.*]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0) %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> ) %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0) ret <8 x i16> %3 } define <8 x i16> @test_sse2_psra_w_8() { ; CHECK-LABEL: @test_sse2_psra_w_8( ; CHECK-NEXT: ret <8 x i16> ; %1 = bitcast <2 x i64> to <8 x i16> %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3) %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> ) %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) ret <8 x i16> %4 } define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { ; CHECK-LABEL: @test_sse2_psra_d_0( ; CHECK-NEXT: ret <4 x i32> [[A:%.*]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0) %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> ) %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0) ret <4 x i32> %3 } define <4 x i32> @sse2_psra_d_8() { ; CHECK-LABEL: @sse2_psra_d_8( ; CHECK-NEXT: ret <4 x i32> ; %1 = bitcast <2 x i64> to <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3) %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> ) %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) ret <4 x i32> %4 } define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { ; CHECK-LABEL: @test_avx2_psra_w_0( ; CHECK-NEXT: ret <16 x i16> [[A:%.*]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> ) %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) ret <16 x i16> %3 } define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { ; CHECK-LABEL: @test_avx2_psra_w_8( ; CHECK-NEXT: ret <16 x i16> ; %1 = bitcast <4 x i64> to <16 x i16> %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3) %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> ) %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2) ret <16 x i16> %4 } define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { ; CHECK-LABEL: @test_avx2_psra_d_0( ; CHECK-NEXT: ret <8 x i32> [[A:%.*]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> ) %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) ret <8 x i32> %3 } define <8 x i32> @test_avx2_psra_d_8() { ; CHECK-LABEL: @test_avx2_psra_d_8( ; CHECK-NEXT: ret <8 x i32> ; %1 = bitcast <4 x i64> to <8 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3) %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> ) %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2) ret <8 x i32> %4 } define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) { ; CHECK-LABEL: @test_avx512_psra_w_512_0( ; CHECK-NEXT: ret <32 x i16> [[A:%.*]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0) %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> ) %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0) ret <32 x i16> %3 } define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) { ; CHECK-LABEL: @test_avx512_psra_w_512_8( ; CHECK-NEXT: ret <32 x i16> ; %1 = bitcast <8 x i64> to <32 x i16> %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3) %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> ) %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2) ret <32 x i16> %4 } define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) { ; CHECK-LABEL: @test_avx512_psra_d_512_0( ; CHECK-NEXT: ret <16 x i32> [[A:%.*]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0) %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> ) %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0) ret <16 x i32> %3 } define <16 x i32> @test_avx512_psra_d_512_8() { ; CHECK-LABEL: @test_avx512_psra_d_512_8( ; CHECK-NEXT: ret <16 x i32> ; %1 = bitcast <8 x i64> to <16 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3) %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> ) %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2) ret <16 x i32> %4 } ; ; Old Tests ; define <2 x i64> @test_sse2_1() { ; CHECK-LABEL: @test_sse2_1( ; CHECK-NEXT: ret <2 x i64> ; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> , <8 x i16> %4) %6 = bitcast <8 x i16> %5 to <4 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) %9 = bitcast <4 x i32> %8 to <2 x i64> %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) %11 = bitcast <2 x i64> %10 to <8 x i16> %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) %13 = bitcast <8 x i16> %12 to <4 x i32> %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 } define <4 x i64> @test_avx2_1() { ; CHECK-LABEL: @test_avx2_1( ; CHECK-NEXT: ret <4 x i64> ; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> , <8 x i16> %4) %6 = bitcast <16 x i16> %5 to <8 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) %9 = bitcast <8 x i32> %8 to <4 x i64> %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) %11 = bitcast <4 x i64> %10 to <16 x i16> %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) %13 = bitcast <16 x i16> %12 to <8 x i32> %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 } define <2 x i64> @test_sse2_0() { ; CHECK-LABEL: @test_sse2_0( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> , <8 x i16> %4) %6 = bitcast <8 x i16> %5 to <4 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) %9 = bitcast <4 x i32> %8 to <2 x i64> %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) %11 = bitcast <2 x i64> %10 to <8 x i16> %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) %13 = bitcast <8 x i16> %12 to <4 x i32> %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 } define <4 x i64> @test_avx2_0() { ; CHECK-LABEL: @test_avx2_0( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> , <8 x i16> %4) %6 = bitcast <16 x i16> %5 to <8 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) %9 = bitcast <8 x i32> %8 to <4 x i64> %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) %11 = bitcast <4 x i64> %10 to <16 x i16> %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) %13 = bitcast <16 x i16> %12 to <8 x i32> %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 } define <2 x i64> @test_sse2_psrl_1() { ; CHECK-LABEL: @test_sse2_psrl_1( ; CHECK-NEXT: ret <2 x i64> ; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> , <8 x i16> %4) %6 = bitcast <8 x i16> %5 to <4 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) %9 = bitcast <4 x i32> %8 to <2 x i64> %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) %11 = bitcast <2 x i64> %10 to <8 x i16> %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) %13 = bitcast <8 x i16> %12 to <4 x i32> %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 } define <4 x i64> @test_avx2_psrl_1() { ; CHECK-LABEL: @test_avx2_psrl_1( ; CHECK-NEXT: ret <4 x i64> ; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> , <8 x i16> %4) %6 = bitcast <16 x i16> %5 to <8 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) %9 = bitcast <8 x i32> %8 to <4 x i64> %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) %11 = bitcast <4 x i64> %10 to <16 x i16> %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) %13 = bitcast <16 x i16> %12 to <8 x i32> %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 } define <2 x i64> @test_sse2_psrl_0() { ; CHECK-LABEL: @test_sse2_psrl_0( ; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> , <8 x i16> %4) %6 = bitcast <8 x i16> %5 to <4 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) %9 = bitcast <4 x i32> %8 to <2 x i64> %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) %11 = bitcast <2 x i64> %10 to <8 x i16> %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) %13 = bitcast <8 x i16> %12 to <4 x i32> %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 } define <4 x i64> @test_avx2_psrl_0() { ; CHECK-LABEL: @test_avx2_psrl_0( ; CHECK-NEXT: ret <4 x i64> zeroinitializer ; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 %3 = insertelement <2 x i64> %2, i64 0, i32 1 %4 = bitcast <2 x i64> %3 to <8 x i16> %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> , <8 x i16> %4) %6 = bitcast <16 x i16> %5 to <8 x i32> %7 = bitcast <2 x i64> %3 to <4 x i32> %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) %9 = bitcast <8 x i32> %8 to <4 x i64> %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) %11 = bitcast <4 x i64> %10 to <16 x i16> %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) %13 = bitcast <16 x i16> %12 to <8 x i32> %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 } declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1 declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1 declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1 declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1 declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1 declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1 declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1 declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1 declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1 declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1 declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1 declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1 declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1 declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1 declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1 declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1 declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1 declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1 declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1 declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1 declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1 declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1 declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1 declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1 declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1 declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1 declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1 declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1 attributes #1 = { nounwind readnone }