; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; ; UNDEF Elts ; define <8 x i16> @undef_packssdw_128() { ; CHECK-LABEL: @undef_packssdw_128( ; CHECK-NEXT: ret <8 x i16> undef ; %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef) ret <8 x i16> %1 } define <8 x i16> @undef_packusdw_128() { ; CHECK-LABEL: @undef_packusdw_128( ; CHECK-NEXT: ret <8 x i16> undef ; %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef) ret <8 x i16> %1 } define <16 x i8> @undef_packsswb_128() { ; CHECK-LABEL: @undef_packsswb_128( ; CHECK-NEXT: ret <16 x i8> undef ; %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef) ret <16 x i8> %1 } define <16 x i8> @undef_packuswb_128() { ; CHECK-LABEL: @undef_packuswb_128( ; CHECK-NEXT: ret <16 x i8> undef ; %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef) ret <16 x i8> %1 } define <16 x i16> @undef_packssdw_256() { ; CHECK-LABEL: @undef_packssdw_256( ; CHECK-NEXT: ret <16 x i16> undef ; %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef) ret <16 x i16> %1 } define <16 x i16> @undef_packusdw_256() { ; CHECK-LABEL: @undef_packusdw_256( ; CHECK-NEXT: ret <16 x i16> undef ; %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef) ret <16 x i16> %1 } define <32 x i8> @undef_packsswb_256() { ; CHECK-LABEL: @undef_packsswb_256( ; CHECK-NEXT: ret <32 x i8> undef ; %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef) ret <32 x i8> %1 } define <32 x i8> @undef_packuswb_256() { ; CHECK-LABEL: @undef_packuswb_256( ; CHECK-NEXT: ret <32 x i8> undef ; %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef) ret <32 x i8> %1 } define <32 x i16> @undef_packssdw_512() { ; CHECK-LABEL: @undef_packssdw_512( ; CHECK-NEXT: ret <32 x i16> undef ; %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef) ret <32 x i16> %1 } define <32 x i16> @undef_packusdw_512() { ; CHECK-LABEL: @undef_packusdw_512( ; CHECK-NEXT: ret <32 x i16> undef ; %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef) ret <32 x i16> %1 } define <64 x i8> @undef_packsswb_512() { ; CHECK-LABEL: @undef_packsswb_512( ; CHECK-NEXT: ret <64 x i8> undef ; %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef) ret <64 x i8> %1 } define <64 x i8> @undef_packuswb_512() { ; CHECK-LABEL: @undef_packuswb_512( ; CHECK-NEXT: ret <64 x i8> undef ; %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef) ret <64 x i8> %1 } ; ; Constant Folding ; define <8 x i16> @fold_packssdw_128() { ; CHECK-LABEL: @fold_packssdw_128( ; CHECK-NEXT: ret <8 x i16> ; %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> , <4 x i32> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @fold_packusdw_128() { ; CHECK-LABEL: @fold_packusdw_128( ; CHECK-NEXT: ret <8 x i16> ; %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> ) ret <8 x i16> %1 } define <16 x i8> @fold_packsswb_128() { ; CHECK-LABEL: @fold_packsswb_128( ; CHECK-NEXT: ret <16 x i8> ; %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef) ret <16 x i8> %1 } define <16 x i8> @fold_packuswb_128() { ; CHECK-LABEL: @fold_packuswb_128( ; CHECK-NEXT: ret <16 x i8> ; %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> ) ret <16 x i8> %1 } define <16 x i16> @fold_packssdw_256() { ; CHECK-LABEL: @fold_packssdw_256( ; CHECK-NEXT: ret <16 x i16> ; %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> , <8 x i32> undef) ret <16 x i16> %1 } define <16 x i16> @fold_packusdw_256() { ; CHECK-LABEL: @fold_packusdw_256( ; CHECK-NEXT: ret <16 x i16> ; %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> , <8 x i32> ) ret <16 x i16> %1 } define <32 x i8> @fold_packsswb_256() { ; CHECK-LABEL: @fold_packsswb_256( ; CHECK-NEXT: ret <32 x i8> ; %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer) ret <32 x i8> %1 } define <32 x i8> @fold_packuswb_256() { ; CHECK-LABEL: @fold_packuswb_256( ; CHECK-NEXT: ret <32 x i8> ; %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> ) ret <32 x i8> %1 } define <32 x i16> @fold_packssdw_512() { ; CHECK-LABEL: @fold_packssdw_512( ; CHECK-NEXT: ret <32 x i16> ; %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> , <16 x i32> undef) ret <32 x i16> %1 } define <32 x i16> @fold_packusdw_512() { ; CHECK-LABEL: @fold_packusdw_512( ; CHECK-NEXT: ret <32 x i16> ; %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> , <16 x i32> ) ret <32 x i16> %1 } define <64 x i8> @fold_packsswb_512() { ; CHECK-LABEL: @fold_packsswb_512( ; CHECK-NEXT: ret <64 x i8> ; %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer) ret <64 x i8> %1 } define <64 x i8> @fold_packuswb_512() { ; CHECK-LABEL: @fold_packuswb_512( ; CHECK-NEXT: ret <64 x i8> ; %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> ) ret <64 x i8> %1 } ; ; Demanded Elts ; define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> ret <8 x i16> %4 } define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 %2 = insertelement <4 x i32> %a1, i32 0, i32 3 %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2) %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> ret <8 x i16> %4 } define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @elts_packsswb_128( ; CHECK-NEXT: ret <16 x i8> zeroinitializer ; %1 = insertelement <8 x i16> %a0, i16 0, i32 0 %2 = insertelement <8 x i16> %a1, i16 0, i32 0 %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> ret <16 x i8> %4 } define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @elts_packuswb_128( ; CHECK-NEXT: ret <16 x i8> undef ; %1 = insertelement <8 x i16> undef, i16 0, i32 0 %2 = insertelement <8 x i16> undef, i16 0, i32 0 %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> ret <16 x i8> %4 } define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> ret <16 x i16> %4 } define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2) %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> ret <16 x i16> %4 } define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @elts_packsswb_256( ; CHECK-NEXT: ret <32 x i8> zeroinitializer ; %1 = insertelement <16 x i16> %a0, i16 0, i32 0 %2 = insertelement <16 x i16> %a1, i16 0, i32 8 %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> ret <32 x i8> %4 } define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @elts_packuswb_256( ; CHECK-NEXT: ret <32 x i8> undef ; %1 = insertelement <16 x i16> undef, i16 0, i32 1 %2 = insertelement <16 x i16> undef, i16 0, i32 0 %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer ret <32 x i8> %4 } define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> ret <32 x i16> %4 } define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2) %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> ret <32 x i16> %4 } define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @elts_packsswb_512( ; CHECK-NEXT: ret <64 x i8> zeroinitializer ; %1 = insertelement <32 x i16> %a0, i16 0, i32 0 %2 = insertelement <32 x i16> %a1, i16 0, i32 8 %3 = insertelement <32 x i16> %1, i16 0, i32 16 %4 = insertelement <32 x i16> %2, i16 0, i32 24 %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> ret <64 x i8> %6 } define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @elts_packuswb_512( ; CHECK-NEXT: ret <64 x i8> undef ; %1 = insertelement <32 x i16> undef, i16 0, i32 1 %2 = insertelement <32 x i16> undef, i16 0, i32 0 %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer ret <64 x i8> %4 } ; ; Truncation (without Saturation) ; define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = ashr <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) ret <8 x i16> %3 } define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = lshr <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) ret <8 x i16> %3 } define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_128( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; %1 = ashr <8 x i16> %a0, %2 = and <8 x i16> %a1, %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) ret <16 x i8> %3 } define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_128( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; %1 = lshr <8 x i16> %a0, %2 = and <8 x i16> %a1, %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) ret <16 x i8> %3 } define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = ashr <8 x i32> %a0, %2 = ashr <8 x i32> %a1, %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) ret <16 x i16> %3 } define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = lshr <8 x i32> %a0, %2 = and <8 x i32> %a1, %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) ret <16 x i16> %3 } define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_256( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; %1 = ashr <16 x i16> %a0, %2 = and <16 x i16> %a1, %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) ret <32 x i8> %3 } define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_256( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; %1 = lshr <16 x i16> %a0, %2 = and <16 x i16> %a1, %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) ret <32 x i8> %3 } define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = ashr <16 x i32> %a0, %2 = ashr <16 x i32> %a1, %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) ret <32 x i16> %3 } define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = lshr <16 x i32> %a0, %2 = and <16 x i32> %a1, %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) ret <32 x i16> %3 } define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_512( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; %1 = ashr <32 x i16> %a0, %2 = and <32 x i16> %a1, %3 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %1, <32 x i16> %2) ret <64 x i8> %3 } define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_512( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; %1 = lshr <32 x i16> %a0, %2 = and <32 x i16> %a1, %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) ret <64 x i8> %3 } ; ; Signed Pack Comparison Results ; define <8 x i16> @cmp_packssdw_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { ; CHECK-LABEL: @cmp_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) ; CHECK-NEXT: ret <8 x i16> [[TMP5]] ; %1 = icmp eq <4 x i32> %a0, %a1 %2 = icmp eq <4 x i32> %a2, %a3 %3 = sext <4 x i1> %1 to <4 x i32> %4 = sext <4 x i1> %2 to <4 x i32> %5 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %3, <4 x i32> %4) ret <8 x i16> %5 } define <16 x i8> @cmp_packsswb_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { ; CHECK-LABEL: @cmp_packsswb_128( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]]) ; CHECK-NEXT: ret <16 x i8> [[TMP5]] ; %1 = icmp eq <8 x i16> %a0, %a1 %2 = icmp eq <8 x i16> %a2, %a3 %3 = sext <8 x i1> %1 to <8 x i16> %4 = sext <8 x i1> %2 to <8 x i16> %5 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4) ret <16 x i8> %5 } define <16 x i16> @cmp_packssdw_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { ; CHECK-LABEL: @cmp_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i32> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP3]], <8 x i32> [[TMP4]]) ; CHECK-NEXT: ret <16 x i16> [[TMP5]] ; %1 = icmp eq <8 x i32> %a0, %a1 %2 = icmp eq <8 x i32> %a2, %a3 %3 = sext <8 x i1> %1 to <8 x i32> %4 = sext <8 x i1> %2 to <8 x i32> %5 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %3, <8 x i32> %4) ret <16 x i16> %5 } define <32 x i8> @cmp_packsswb_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { ; CHECK-LABEL: @cmp_packsswb_256( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i16> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i16> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP3]], <16 x i16> [[TMP4]]) ; CHECK-NEXT: ret <32 x i8> [[TMP5]] ; %1 = icmp eq <16 x i16> %a0, %a1 %2 = icmp eq <16 x i16> %a2, %a3 %3 = sext <16 x i1> %1 to <16 x i16> %4 = sext <16 x i1> %2 to <16 x i16> %5 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %3, <16 x i16> %4) ret <32 x i8> %5 } define <32 x i16> @cmp_packssdw_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, <16 x i32> %a3) { ; CHECK-LABEL: @cmp_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i32> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP3]], <16 x i32> [[TMP4]]) ; CHECK-NEXT: ret <32 x i16> [[TMP5]] ; %1 = icmp eq <16 x i32> %a0, %a1 %2 = icmp eq <16 x i32> %a2, %a3 %3 = sext <16 x i1> %1 to <16 x i32> %4 = sext <16 x i1> %2 to <16 x i32> %5 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %3, <16 x i32> %4) ret <32 x i16> %5 } define <64 x i8> @cmp_packsswb_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2, <32 x i16> %a3) { ; CHECK-LABEL: @cmp_packsswb_512( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <32 x i16> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <32 x i16> [[A2:%.*]], [[A3:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <32 x i1> [[TMP1]] to <32 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = sext <32 x i1> [[TMP2]] to <32 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP3]], <32 x i16> [[TMP4]]) ; CHECK-NEXT: ret <64 x i8> [[TMP5]] ; %1 = icmp eq <32 x i16> %a0, %a1 %2 = icmp eq <32 x i16> %a2, %a3 %3 = sext <32 x i1> %1 to <32 x i16> %4 = sext <32 x i1> %2 to <32 x i16> %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) ret <64 x i8> %5 } declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone