; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86 define i16 @mask16(i16 %x) { ; CHECK-LABEL: mask16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: notl %eax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask16: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: notl %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 ret i16 %ret } define i32 @mask16_zext(i16 %x) { ; CHECK-LABEL: mask16_zext: ; CHECK: ## %bb.0: ; CHECK-NEXT: notl %edi ; CHECK-NEXT: movzwl %di, %eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask16_zext: ; X86: ## %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF ; X86-NEXT: retl %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %m2 = bitcast <16 x i1> %m1 to i16 %ret = zext i16 %m2 to i32 ret i32 %ret } define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: notb %al ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask8: ; X86: ## %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: notb %al ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %ret = bitcast <8 x i1> %m1 to i8 ret i8 %ret } define i32 @mask8_zext(i8 %x) { ; CHECK-LABEL: mask8_zext: ; CHECK: ## %bb.0: ; CHECK-NEXT: notb %dil ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask8_zext: ; X86: ## %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: notb %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %m2 = bitcast <8 x i1> %m1 to i8 %ret = zext i8 %m2 to i32 ret i32 %ret } define void @mask16_mem(i16* %ptr) { ; CHECK-LABEL: mask16_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw (%rdi), %k0 ; CHECK-NEXT: knotw %k0, %k0 ; CHECK-NEXT: kmovw %k0, (%rdi) ; CHECK-NEXT: retq ; ; X86-LABEL: mask16_mem: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw (%eax), %k0 ; X86-NEXT: knotw %k0, %k0 ; X86-NEXT: kmovw %k0, (%eax) ; X86-NEXT: retl %x = load i16, i16* %ptr, align 4 %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 store i16 %ret, i16* %ptr, align 4 ret void } define void @mask8_mem(i8* %ptr) { ; KNL-LABEL: mask8_mem: ; KNL: ## %bb.0: ; KNL-NEXT: notb (%rdi) ; KNL-NEXT: retq ; ; SKX-LABEL: mask8_mem: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: knotb %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mask8_mem: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: notb (%rdi) ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mask8_mem: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovb (%rdi), %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: retq ; ; X86-LABEL: mask8_mem: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb (%eax), %k0 ; X86-NEXT: knotb %k0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %x = load i8, i8* %ptr, align 4 %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %ret = bitcast <8 x i1> %m1 to i8 store i8 %ret, i8* %ptr, align 4 ret void } define i16 @mand16(i16 %x, i16 %y) { ; CHECK-LABEL: mand16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: andl %esi, %ecx ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mand16: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: andl %ecx, %edx ; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %edx, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> %mc = and <16 x i1> %ma, %mb %md = xor <16 x i1> %ma, %mb %me = or <16 x i1> %mc, %md %ret = bitcast <16 x i1> %me to i16 ret i16 %ret } define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { ; KNL-LABEL: mand16_mem: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k0 ; KNL-NEXT: kmovw (%rsi), %k1 ; KNL-NEXT: kandw %k1, %k0, %k2 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: korw %k0, %k2, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: mand16_mem: ; SKX: ## %bb.0: ; SKX-NEXT: kmovw (%rdi), %k0 ; SKX-NEXT: kmovw (%rsi), %k1 ; SKX-NEXT: kandw %k1, %k0, %k2 ; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: korw %k0, %k2, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mand16_mem: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: kmovw (%rsi), %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k2 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: korw %k0, %k2, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mand16_mem: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-NEXT: kmovw (%rsi), %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k2 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 ; AVX512DQ-NEXT: korw %k0, %k2, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: mand16_mem: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw (%ecx), %k0 ; X86-NEXT: kmovw (%eax), %k1 ; X86-NEXT: kandw %k1, %k0, %k2 ; X86-NEXT: kxorw %k1, %k0, %k0 ; X86-NEXT: korw %k0, %k2, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %ma = load <16 x i1>, <16 x i1>* %x %mb = load <16 x i1>, <16 x i1>* %y %mc = and <16 x i1> %ma, %mb %md = xor <16 x i1> %ma, %mb %me = or <16 x i1> %mc, %md %ret = bitcast <16 x i1> %me to i16 ret i16 %ret } define i8 @shuf_test1(i16 %v) nounwind { ; KNL-LABEL: shuf_test1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftrw $8, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: shuf_test1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kshiftrw $8, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: shuf_test1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuf_test1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: shuf_test1: ; X86: ## %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %v1 = bitcast i16 %v to <16 x i1> %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> %mask1 = bitcast <8 x i1> %mask to i8 ret i8 %mask1 } define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; KNL-LABEL: zext_test1: ; KNL: ## %bb.0: ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $5, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: zext_test1: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftrw $5, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: zext_test1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andl $1, %eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: zext_test1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: andl $1, %eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: zext_test1: ; X86: ## %bb.0: ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; X86-NEXT: kshiftrw $5, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i32 ret i32 %res } define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; KNL-LABEL: zext_test2: ; KNL: ## %bb.0: ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $5, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: zext_test2: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftrw $5, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: zext_test2: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andl $1, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: zext_test2: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: andl $1, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: zext_test2: ; X86: ## %bb.0: ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; X86-NEXT: kshiftrw $5, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i16 ret i16 %res } define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; KNL-LABEL: zext_test3: ; KNL: ## %bb.0: ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $5, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: zext_test3: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; SKX-NEXT: kshiftrw $5, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: andb $1, %al ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: zext_test3: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftrw $5, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: andb $1, %al ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: zext_test3: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: andb $1, %al ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: zext_test3: ; X86: ## %bb.0: ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 ; X86-NEXT: kshiftrw $5, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: andb $1, %al ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i8 ret i8 %res } define i8 @conv1(<8 x i1>* %R) { ; CHECK-LABEL: conv1: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movb $-1, (%rdi) ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $-2, %al ; CHECK-NEXT: retq ; ; X86-LABEL: conv1: ; X86: ## %bb.0: ## %entry ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb $-1, (%eax) ; X86-NEXT: movb $-2, (%esp) ; X86-NEXT: movb $-2, %al ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl entry: store <8 x i1> , <8 x i1>* %R %maskPtr = alloca <8 x i1> store <8 x i1> , <8 x i1>* %maskPtr %mask = load <8 x i1>, <8 x i1>* %maskPtr %mask_convert = bitcast <8 x i1> %mask to i8 ret i8 %mask_convert } define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; KNL-LABEL: test4: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1} ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test4: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test4: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1} ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test4: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test4: ; X86: ## %bb.0: ; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ; X86-NEXT: vpmovm2d %k0, %xmm0 ; X86-NEXT: vzeroupper ; X86-NEXT: retl %x_gt_y = icmp sgt <4 x i64> %x, %y %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 %resse = sext <4 x i1>%res to <4 x i32> ret <4 x i32> %resse } define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; KNL-LABEL: test5: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 ; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1} ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test5: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1} ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test5: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1} ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test5: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2 ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k0 {%k1} ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test5: ; X86: ## %bb.0: ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 ; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1} ; X86-NEXT: vpmovm2q %k0, %xmm0 ; X86-NEXT: retl %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 %resse = sext <2 x i1>%res to <2 x i64> ret <2 x i64> %resse }define void @test6(<16 x i1> %mask) { allocas: %a= and <16 x i1> %mask, %b = bitcast <16 x i1> %a to i16 %c = icmp eq i16 %b, 0 br i1 %c, label %true, label %false true: ret void false: ret void } define void @test7(<8 x i1> %mask) { ; KNL-LABEL: test7: ; KNL: ## %bb.0: ## %allocas ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: orb $85, %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test7: ; SKX: ## %bb.0: ## %allocas ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: orb $85, %al ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test7: ; AVX512BW: ## %bb.0: ## %allocas ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: orb $85, %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test7: ; AVX512DQ: ## %bb.0: ## %allocas ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: orb $85, %al ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test7: ; X86: ## %bb.0: ## %allocas ; X86-NEXT: vpsllw $15, %xmm0, %xmm0 ; X86-NEXT: vpmovw2m %xmm0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: orb $85, %al ; X86-NEXT: retl allocas: %a= or <8 x i1> %mask, %b = bitcast <8 x i1> %a to i8 %c = icmp eq i8 %b, 0 br i1 %c, label %true, label %false true: ret void false: ret void } define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; KNL-LABEL: test8: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: jg LBB17_1 ; KNL-NEXT: ## %bb.2: ; KNL-NEXT: kxorw %k0, %k0, %k1 ; KNL-NEXT: jmp LBB17_3 ; KNL-NEXT: LBB17_1: ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 ; KNL-NEXT: LBB17_3: ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test8: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: jg LBB17_1 ; SKX-NEXT: ## %bb.2: ; SKX-NEXT: kxorw %k0, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB17_1: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test8: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi ; AVX512BW-NEXT: jg LBB17_1 ; AVX512BW-NEXT: ## %bb.2: ; AVX512BW-NEXT: kxorw %k0, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB17_1: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi ; AVX512DQ-NEXT: jg LBB17_1 ; AVX512DQ-NEXT: ## %bb.2: ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 ; AVX512DQ-NEXT: jmp LBB17_3 ; AVX512DQ-NEXT: LBB17_1: ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: LBB17_3: ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test8: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: jg LBB17_1 ; X86-NEXT: ## %bb.2: ; X86-NEXT: kxorw %k0, %k0, %k0 ; X86-NEXT: vpmovm2b %k0, %xmm0 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB17_1: ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; X86-NEXT: vpmovm2b %k0, %xmm0 ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cond = icmp sgt i32 %a1, %b1 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer %cmp2 = icmp ult <16 x i32> %b, zeroinitializer %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 %res = sext <16 x i1> %mix to <16 x i8> ret <16 x i8> %res } define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; KNL-LABEL: test9: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: jg LBB18_1 ; KNL-NEXT: ## %bb.2: ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: jmp LBB18_3 ; KNL-NEXT: LBB18_1: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: LBB18_3: ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test9: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: jg LBB18_1 ; SKX-NEXT: ## %bb.2: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 ; SKX-NEXT: jmp LBB18_3 ; SKX-NEXT: LBB18_1: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: LBB18_3: ; SKX-NEXT: vpmovb2m %xmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test9: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi ; AVX512BW-NEXT: jg LBB18_1 ; AVX512BW-NEXT: ## %bb.2: ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 ; AVX512BW-NEXT: jmp LBB18_3 ; AVX512BW-NEXT: LBB18_1: ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512BW-NEXT: LBB18_3: ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test9: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi ; AVX512DQ-NEXT: jg LBB18_1 ; AVX512DQ-NEXT: ## %bb.2: ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 ; AVX512DQ-NEXT: jmp LBB18_3 ; AVX512DQ-NEXT: LBB18_1: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: LBB18_3: ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test9: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: jg LBB18_1 ; X86-NEXT: ## %bb.2: ; X86-NEXT: vpsllw $7, %xmm1, %xmm0 ; X86-NEXT: jmp LBB18_3 ; X86-NEXT: LBB18_1: ; X86-NEXT: vpsllw $7, %xmm0, %xmm0 ; X86-NEXT: LBB18_3: ; X86-NEXT: vpmovb2m %xmm0, %k0 ; X86-NEXT: vpmovm2b %k0, %xmm0 ; X86-NEXT: retl %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b ret <16 x i1>%c }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b ret <8 x i1>%c } define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; KNL-LABEL: test11: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: jg LBB20_1 ; KNL-NEXT: ## %bb.2: ; KNL-NEXT: vpslld $31, %xmm1, %xmm0 ; KNL-NEXT: jmp LBB20_3 ; KNL-NEXT: LBB20_1: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: LBB20_3: ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test11: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: jg LBB20_1 ; SKX-NEXT: ## %bb.2: ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 ; SKX-NEXT: jmp LBB20_3 ; SKX-NEXT: LBB20_1: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: LBB20_3: ; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test11: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi ; AVX512BW-NEXT: jg LBB20_1 ; AVX512BW-NEXT: ## %bb.2: ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 ; AVX512BW-NEXT: jmp LBB20_3 ; AVX512BW-NEXT: LBB20_1: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: LBB20_3: ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test11: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi ; AVX512DQ-NEXT: jg LBB20_1 ; AVX512DQ-NEXT: ## %bb.2: ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 ; AVX512DQ-NEXT: jmp LBB20_3 ; AVX512DQ-NEXT: LBB20_1: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: LBB20_3: ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test11: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: jg LBB20_1 ; X86-NEXT: ## %bb.2: ; X86-NEXT: vpslld $31, %xmm1, %xmm0 ; X86-NEXT: jmp LBB20_3 ; X86-NEXT: LBB20_1: ; X86-NEXT: vpslld $31, %xmm0, %xmm0 ; X86-NEXT: LBB20_3: ; X86-NEXT: vpmovd2m %xmm0, %k0 ; X86-NEXT: vpmovm2d %k0, %xmm0 ; X86-NEXT: retl %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b ret <4 x i1>%c } define i32 @test12(i32 %x, i32 %y) { ; CHECK-LABEL: test12: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; ; X86-LABEL: test12: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 0 %c = select i1 %b, i32 %x, i32 %y ret i32 %c } define i32 @test13(i32 %x, i32 %y) { ; CHECK-LABEL: test13: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq ; ; X86-LABEL: test13: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 3 %c = select i1 %b, i32 %x, i32 %y ret i32 %c } ; Make sure we don't crash on a large vector. define i32 @test13_crash(i32 %x, i32 %y) { ; CHECK-LABEL: test13_crash: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; ; X86-LABEL: test13_crash: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl %a = bitcast i128 2184568686868686868686868686 to <128 x i1> %b = extractelement <128 x i1> %a, i32 3 %c = select i1 %b, i32 %x, i32 %y ret i32 %c } define <4 x i1> @test14() { ; CHECK-LABEL: test14: ; CHECK: ## %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1] ; CHECK-NEXT: retq ; ; X86-LABEL: test14: ; X86: ## %bb.0: ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1] ; X86-NEXT: retl %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 2 %c = insertelement <4 x i1> , i1 %b, i32 1 ret <4 x i1> %c } define <16 x i1> @test15(i32 %x, i32 %y) { ; KNL-LABEL: test15: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: movl $21845, %eax ## imm = 0x5555 ; KNL-NEXT: movl $1, %ecx ; KNL-NEXT: cmovgl %eax, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test15: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: movl $21845, %eax ## imm = 0x5555 ; SKX-NEXT: movl $1, %ecx ; SKX-NEXT: cmovgl %eax, %ecx ; SKX-NEXT: kmovd %ecx, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test15: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi ; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555 ; AVX512BW-NEXT: movl $1, %ecx ; AVX512BW-NEXT: cmovgl %eax, %ecx ; AVX512BW-NEXT: kmovd %ecx, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test15: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi ; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555 ; AVX512DQ-NEXT: movl $1, %ecx ; AVX512DQ-NEXT: cmovgl %eax, %ecx ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test15: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl $21845, %eax ## imm = 0x5555 ; X86-NEXT: movl $1, %ecx ; X86-NEXT: cmovgl %eax, %ecx ; X86-NEXT: kmovd %ecx, %k0 ; X86-NEXT: vpmovm2b %k0, %xmm0 ; X86-NEXT: retl %a = bitcast i16 21845 to <16 x i1> %b = bitcast i16 1 to <16 x i1> %mask = icmp sgt i32 %x, %y %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b ret <16 x i1> %c } define <64 x i8> @test16(i64 %x) { ; ; KNL-LABEL: test16: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: movl %edi, %ecx ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi ; KNL-NEXT: shrq $48, %rax ; KNL-NEXT: shrl $16, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: movw $-33, %ax ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb $1, %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $10, %k4, %k4 ; KNL-NEXT: korw %k4, %k0, %k4 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test16: ; SKX: ## %bb.0: ; SKX-NEXT: kmovq %rdi, %k0 ; SKX-NEXT: movq $-33, %rax ; SKX-NEXT: kmovq %rax, %k1 ; SKX-NEXT: kandq %k1, %k0, %k0 ; SKX-NEXT: movb $1, %al ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftlq $63, %k1, %k1 ; SKX-NEXT: kshiftrq $58, %k1, %k1 ; SKX-NEXT: korq %k1, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test16: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovq %rdi, %k0 ; AVX512BW-NEXT: movq $-33, %rax ; AVX512BW-NEXT: kmovq %rax, %k1 ; AVX512BW-NEXT: kandq %k1, %k0, %k0 ; AVX512BW-NEXT: movb $1, %al ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 ; AVX512BW-NEXT: korq %k1, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test16: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: movl %edi, %ecx ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: shrq $32, %rdi ; AVX512DQ-NEXT: shrq $48, %rax ; AVX512DQ-NEXT: shrl $16, %ecx ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kmovw %edi, %k3 ; AVX512DQ-NEXT: movw $-33, %ax ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kandw %k4, %k1, %k1 ; AVX512DQ-NEXT: movb $1, %al ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k1, %k1 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test16: ; X86: ## %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kshiftrq $6, %k0, %k1 ; X86-NEXT: kshiftlq $6, %k1, %k1 ; X86-NEXT: kshiftlq $59, %k0, %k0 ; X86-NEXT: kshiftrq $59, %k0, %k0 ; X86-NEXT: movb $1, %al ; X86-NEXT: kmovd %eax, %k2 ; X86-NEXT: kshiftlq $63, %k2, %k2 ; X86-NEXT: kshiftrq $58, %k2, %k2 ; X86-NEXT: korq %k2, %k1, %k1 ; X86-NEXT: korq %k1, %k0, %k0 ; X86-NEXT: vpmovm2b %k0, %zmm0 ; X86-NEXT: retl %a = bitcast i64 %x to <64 x i1> %b = insertelement <64 x i1>%a, i1 true, i32 5 %c = sext <64 x i1>%b to <64 x i8> ret <64 x i8>%c } define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; ; KNL-LABEL: test17: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: movl %edi, %ecx ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi ; KNL-NEXT: shrq $48, %rax ; KNL-NEXT: shrl $16, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al ; KNL-NEXT: movw $-33, %cx ; KNL-NEXT: kmovw %ecx, %k4 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $10, %k4, %k4 ; KNL-NEXT: korw %k4, %k0, %k4 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test17: ; SKX: ## %bb.0: ; SKX-NEXT: kmovq %rdi, %k0 ; SKX-NEXT: cmpl %edx, %esi ; SKX-NEXT: setg %al ; SKX-NEXT: movq $-33, %rcx ; SKX-NEXT: kmovq %rcx, %k1 ; SKX-NEXT: kandq %k1, %k0, %k0 ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftlq $63, %k1, %k1 ; SKX-NEXT: kshiftrq $58, %k1, %k1 ; SKX-NEXT: korq %k1, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test17: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovq %rdi, %k0 ; AVX512BW-NEXT: cmpl %edx, %esi ; AVX512BW-NEXT: setg %al ; AVX512BW-NEXT: movq $-33, %rcx ; AVX512BW-NEXT: kmovq %rcx, %k1 ; AVX512BW-NEXT: kandq %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 ; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 ; AVX512BW-NEXT: korq %k1, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test17: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: movl %edi, %ecx ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: shrq $32, %rdi ; AVX512DQ-NEXT: shrq $48, %rax ; AVX512DQ-NEXT: shrl $16, %ecx ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kmovw %edi, %k3 ; AVX512DQ-NEXT: cmpl %edx, %esi ; AVX512DQ-NEXT: setg %al ; AVX512DQ-NEXT: movw $-33, %cx ; AVX512DQ-NEXT: kmovw %ecx, %k4 ; AVX512DQ-NEXT: kandw %k4, %k1, %k1 ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k1, %k1 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test17: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: setg %al ; X86-NEXT: kshiftrq $6, %k0, %k1 ; X86-NEXT: kshiftlq $6, %k1, %k1 ; X86-NEXT: kshiftlq $59, %k0, %k0 ; X86-NEXT: kshiftrq $59, %k0, %k0 ; X86-NEXT: kmovd %eax, %k2 ; X86-NEXT: kshiftlq $63, %k2, %k2 ; X86-NEXT: kshiftrq $58, %k2, %k2 ; X86-NEXT: korq %k2, %k1, %k1 ; X86-NEXT: korq %k1, %k0, %k0 ; X86-NEXT: vpmovm2b %k0, %zmm0 ; X86-NEXT: retl %a = bitcast i64 %x to <64 x i1> %b = icmp sgt i32 %y, %z %c = insertelement <64 x i1>%a, i1 %b, i32 5 %d = sext <64 x i1>%c to <64 x i8> ret <64 x i8>%d } define <8 x i1> @test18(i8 %a, i16 %y) { ; KNL-LABEL: test18: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k2 ; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: movw $-65, %ax ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: kshiftlw $6, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $9, %k0, %k0 ; KNL-NEXT: kshiftrw $9, %k0, %k0 ; KNL-NEXT: kshiftlw $7, %k2, %k1 ; KNL-NEXT: korw %k1, %k0, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test18: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: kshiftrw $9, %k1, %k1 ; SKX-NEXT: movb $-65, %al ; SKX-NEXT: kmovd %eax, %k3 ; SKX-NEXT: kandb %k3, %k0, %k0 ; SKX-NEXT: kshiftlb $6, %k1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $1, %k0, %k0 ; SKX-NEXT: kshiftrb $1, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k2, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test18: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kshiftrw $8, %k1, %k2 ; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 ; AVX512BW-NEXT: movw $-65, %ax ; AVX512BW-NEXT: kmovd %eax, %k3 ; AVX512BW-NEXT: kandw %k3, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $6, %k1, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $9, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $9, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test18: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 ; AVX512DQ-NEXT: movb $-65, %al ; AVX512DQ-NEXT: kmovw %eax, %k3 ; AVX512DQ-NEXT: kandb %k3, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test18: ; X86: ## %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kshiftrw $8, %k1, %k2 ; X86-NEXT: kshiftrw $9, %k1, %k1 ; X86-NEXT: movb $-65, %al ; X86-NEXT: kmovd %eax, %k3 ; X86-NEXT: kandb %k3, %k0, %k0 ; X86-NEXT: kshiftlb $6, %k1, %k1 ; X86-NEXT: korb %k1, %k0, %k0 ; X86-NEXT: kshiftlb $1, %k0, %k0 ; X86-NEXT: kshiftrb $1, %k0, %k0 ; X86-NEXT: kshiftlb $7, %k2, %k1 ; X86-NEXT: korb %k1, %k0, %k0 ; X86-NEXT: vpmovm2w %k0, %xmm0 ; X86-NEXT: retl %b = bitcast i8 %a to <8 x i1> %b1 = bitcast i16 %y to <16 x i1> %el1 = extractelement <16 x i1>%b1, i32 8 %el2 = extractelement <16 x i1>%b1, i32 9 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 ret <8 x i1>%d } define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: ## %bb.0: ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test21: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 ; SKX-NEXT: vpmovb2m %ymm1, %k1 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test21: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test21: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512DQ-NEXT: vpsllw $15, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpsraw $15, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512DQ-NEXT: vpandq %zmm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test21: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $7, %ymm1, %ymm1 ; X86-NEXT: vpmovb2m %ymm1, %k1 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; KNL-LABEL: test22: ; KNL: ## %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftlw $12, %k0, %k0 ; KNL-NEXT: kshiftrw $12, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test22: ; SKX: ## %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test22: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test22: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test22: ; X86: ## %bb.0: ; X86-NEXT: vpslld $31, %xmm0, %xmm0 ; X86-NEXT: vpmovd2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl store <4 x i1> %a, <4 x i1>* %addr ret void } define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; KNL-LABEL: test23: ; KNL: ## %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test23: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX-NEXT: vpmovq2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test23: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test23: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kshiftlb $6, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test23: ; X86: ## %bb.0: ; X86-NEXT: vpsllq $63, %xmm0, %xmm0 ; X86-NEXT: vpmovq2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl store <2 x i1> %a, <2 x i1>* %addr ret void } define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; KNL-LABEL: store_v1i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rsi) ; KNL-NEXT: retq ; ; SKX-LABEL: store_v1i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rsi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_v1i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rsi) ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_v1i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rsi) ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v1i1: ; X86: ## %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: knotw %k0, %k0 ; X86-NEXT: kshiftlb $7, %k0, %k0 ; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %x = xor <1 x i1> %c, store <1 x i1> %x, <1 x i1>* %ptr, align 4 ret void } define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; KNL-LABEL: store_v2i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_v2i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX-NEXT: vpmovq2m %xmm0, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kshiftlb $6, %k0, %k0 ; SKX-NEXT: kshiftrb $6, %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_v2i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $6, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v2i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllq $63, %xmm0, %xmm0 ; X86-NEXT: vpmovq2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: knotw %k0, %k0 ; X86-NEXT: kshiftlb $6, %k0, %k0 ; X86-NEXT: kshiftrb $6, %k0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %x = xor <2 x i1> %c, store <2 x i1> %x, <2 x i1>* %ptr, align 4 ret void } define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; KNL-LABEL: store_v4i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftlw $12, %k0, %k0 ; KNL-NEXT: kshiftrw $12, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_v4i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kshiftlb $4, %k0, %k0 ; SKX-NEXT: kshiftrb $4, %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_v4i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v4i1: ; X86: ## %bb.0: ; X86-NEXT: vpslld $31, %xmm0, %xmm0 ; X86-NEXT: vpmovd2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: knotw %k0, %k0 ; X86-NEXT: kshiftlb $4, %k0, %k0 ; X86-NEXT: kshiftrb $4, %k0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %x = xor <4 x i1> %c, store <4 x i1> %x, <4 x i1>* %ptr, align 4 ret void } define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; KNL-LABEL: store_v8i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_v8i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: knotb %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_v8i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_v8i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v8i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $15, %xmm0, %xmm0 ; X86-NEXT: vpmovw2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: knotb %k0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %x = xor <8 x i1> %c, store <8 x i1> %x, <8 x i1>* %ptr, align 4 ret void } define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; KNL-LABEL: store_v16i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_v16i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: vpmovb2m %xmm0, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kmovw %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_v16i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovw %k0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_v16i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v16i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $7, %xmm0, %xmm0 ; X86-NEXT: vpmovb2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: knotw %k0, %k0 ; X86-NEXT: kmovw %k0, (%eax) ; X86-NEXT: retl %x = xor <16 x i1> %c, store <16 x i1> %x, <16 x i1>* %ptr, align 4 ret void } ;void f2(int); ;void f1(int c) ;{ ; static int v = 0; ; if (v == 0) ; v = 1; ; else ; v = 0; ; f2(v); ;} @f1.v = internal unnamed_addr global i1 false, align 4 define void @f1(i32 %c) { ; CHECK-LABEL: f1: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movzbl {{.*}}(%rip), %edi ; CHECK-NEXT: xorl $1, %edi ; CHECK-NEXT: movb %dil, {{.*}}(%rip) ; CHECK-NEXT: jmp _f2 ## TAILCALL ; ; X86-LABEL: f1: ; X86: ## %bb.0: ## %entry ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: movzbl _f1.v, %eax ; X86-NEXT: xorl $1, %eax ; X86-NEXT: movb %al, _f1.v ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll _f2 ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl entry: %.b1 = load i1, i1* @f1.v, align 4 %not..b1 = xor i1 %.b1, true store i1 %not..b1, i1* @f1.v, align 4 %0 = zext i1 %not..b1 to i32 tail call void @f2(i32 %0) #2 ret void } declare void @f2(i32) #1 define void @store_i16_i1(i16 %x, i1 *%y) { ; CHECK-LABEL: store_i16_i1: ; CHECK: ## %bb.0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movb %dil, (%rsi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_i16_i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: andl $1, %ecx ; X86-NEXT: movb %cl, (%eax) ; X86-NEXT: retl %c = trunc i16 %x to i1 store i1 %c, i1* %y ret void } define void @store_i8_i1(i8 %x, i1 *%y) { ; CHECK-LABEL: store_i8_i1: ; CHECK: ## %bb.0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movb %dil, (%rsi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_i8_i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $1, %cl ; X86-NEXT: movb %cl, (%eax) ; X86-NEXT: retl %c = trunc i8 %x to i1 store i1 %c, i1* %y ret void } define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { ; KNL-LABEL: test_build_vec_v32i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1: ; SKX: ## %bb.0: ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v32i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v32i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v32i1: ; X86: ## %bb.0: ; X86-NEXT: vandps LCPI40_0, %zmm0, %zmm0 ; X86-NEXT: retl %ret = select <32 x i1> , <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize { ; KNL-LABEL: test_build_vec_v32i1_optsize: ; KNL: ## %bb.0: ; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1_optsize: ; SKX: ## %bb.0: ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v32i1_optsize: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v32i1_optsize: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v32i1_optsize: ; X86: ## %bb.0: ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl %ret = select <32 x i1> , <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 { ; KNL-LABEL: test_build_vec_v32i1_pgso: ; KNL: ## %bb.0: ; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1_pgso: ; SKX: ## %bb.0: ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v32i1_pgso: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v32i1_pgso: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v32i1_pgso: ; X86: ## %bb.0: ; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495 ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl %ret = select <32 x i1> , <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; KNL-LABEL: test_build_vec_v64i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v64i1: ; SKX: ## %bb.0: ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v64i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v64i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v64i1: ; X86: ## %bb.0: ; X86-NEXT: vandps LCPI43_0, %zmm0, %zmm0 ; X86-NEXT: retl %ret = select <64 x i1> , <64 x i8> %x, <64 x i8> zeroinitializer ret <64 x i8> %ret } define void @ktest_1(<8 x double> %in, double * %base) { ; KNL-LABEL: ktest_1: ; KNL: ## %bb.0: ; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB44_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovapd %zmm0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB44_2: ## %L2 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_1: ; SKX: ## %bb.0: ; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 ; SKX-NEXT: ktestb %k0, %k1 ; SKX-NEXT: je LBB44_2 ; SKX-NEXT: ## %bb.1: ## %L1 ; SKX-NEXT: vmovapd %zmm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB44_2: ## %L2 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: je LBB44_2 ; AVX512BW-NEXT: ## %bb.1: ## %L1 ; AVX512BW-NEXT: vmovapd %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB44_2: ## %L2 ; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: ktestb %k0, %k1 ; AVX512DQ-NEXT: je LBB44_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB44_2: ## %L2 ; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcmpgtpd (%eax), %zmm0, %k1 ; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z} ; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 ; X86-NEXT: ktestb %k0, %k1 ; X86-NEXT: je LBB44_2 ; X86-NEXT: ## %bb.1: ## %L1 ; X86-NEXT: vmovapd %zmm0, (%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB44_2: ## %L2 ; X86-NEXT: vmovapd %zmm0, 8(%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl %addr1 = getelementptr double, double * %base, i64 0 %addr2 = getelementptr double, double * %base, i64 1 %vaddr1 = bitcast double* %addr1 to <8 x double>* %vaddr2 = bitcast double* %addr2 to <8 x double>* %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 %sel1 = fcmp ogt <8 x double>%in, %val1 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer %sel2 = fcmp olt <8 x double> %in, %val3 %sel3 = and <8 x i1> %sel1, %sel2 %int_sel3 = bitcast <8 x i1> %sel3 to i8 %res = icmp eq i8 %int_sel3, zeroinitializer br i1 %res, label %L2, label %L1 L1: store <8 x double> %in, <8 x double>* %vaddr1 br label %End L2: store <8 x double> %in, <8 x double>* %vaddr2 br label %End End: ret void } define void @ktest_2(<32 x float> %in, float * %base) { ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: ; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; KNL-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} ; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 ; KNL-NEXT: vcmpltps %zmm2, %zmm1, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: korw %k0, %k1, %k0 ; KNL-NEXT: kortestw %k2, %k0 ; KNL-NEXT: je LBB45_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovaps %zmm0, (%rdi) ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB45_2: ## %L2 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi) ; KNL-NEXT: vmovaps %zmm1, 68(%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_2: ; SKX: ## %bb.0: ; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ; SKX-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 ; SKX-NEXT: kortestd %k1, %k0 ; SKX-NEXT: je LBB45_2 ; SKX-NEXT: ## %bb.1: ## %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB45_2: ## %L2 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_2: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ; AVX512BW-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0 ; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1 ; AVX512BW-NEXT: kortestd %k1, %k0 ; AVX512BW-NEXT: je LBB45_2 ; AVX512BW-NEXT: ## %bb.1: ## %L1 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) ; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB45_2: ## %L2 ; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi) ; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm1, %k3 ; AVX512DQ-NEXT: korw %k3, %k2, %k2 ; AVX512DQ-NEXT: korw %k0, %k1, %k0 ; AVX512DQ-NEXT: kortestw %k2, %k0 ; AVX512DQ-NEXT: je LBB45_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB45_2: ## %L2 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_2: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcmpgtps (%eax), %zmm0, %k1 ; X86-NEXT: vcmpgtps 64(%eax), %zmm1, %k2 ; X86-NEXT: kunpckwd %k1, %k2, %k0 ; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z} ; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z} ; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1 ; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; X86-NEXT: kunpckwd %k1, %k2, %k1 ; X86-NEXT: kortestd %k1, %k0 ; X86-NEXT: je LBB45_2 ; X86-NEXT: ## %bb.1: ## %L1 ; X86-NEXT: vmovaps %zmm0, (%eax) ; X86-NEXT: vmovaps %zmm1, 64(%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB45_2: ## %L2 ; X86-NEXT: vmovaps %zmm0, 4(%eax) ; X86-NEXT: vmovaps %zmm1, 68(%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl %addr1 = getelementptr float, float * %base, i64 0 %addr2 = getelementptr float, float * %base, i64 1 %vaddr1 = bitcast float* %addr1 to <32 x float>* %vaddr2 = bitcast float* %addr2 to <32 x float>* %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 %sel1 = fcmp ogt <32 x float>%in, %val1 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer %sel2 = fcmp olt <32 x float> %in, %val3 %sel3 = or <32 x i1> %sel1, %sel2 %int_sel3 = bitcast <32 x i1> %sel3 to i32 %res = icmp eq i32 %int_sel3, zeroinitializer br i1 %res, label %L2, label %L1 L1: store <32 x float> %in, <32 x float>* %vaddr1 br label %End L2: store <32 x float> %in, <32 x float>* %vaddr2 br label %End End: ret void } define <8 x i64> @load_8i1(<8 x i1>* %a) { ; KNL-LABEL: load_8i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; SKX-LABEL: load_8i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: vpmovm2q %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_8i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k1 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_8i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovb (%rdi), %k0 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_8i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb (%eax), %k0 ; X86-NEXT: vpmovm2q %k0, %zmm0 ; X86-NEXT: retl %b = load <8 x i1>, <8 x i1>* %a %c = sext <8 x i1> %b to <8 x i64> ret <8 x i64> %c } define <16 x i32> @load_16i1(<16 x i1>* %a) { ; KNL-LABEL: load_16i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; SKX-LABEL: load_16i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovw (%rdi), %k0 ; SKX-NEXT: vpmovm2d %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_16i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k1 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_16i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_16i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw (%eax), %k0 ; X86-NEXT: vpmovm2d %k0, %zmm0 ; X86-NEXT: retl %b = load <16 x i1>, <16 x i1>* %a %c = sext <16 x i1> %b to <16 x i32> ret <16 x i32> %c } define <2 x i16> @load_2i1(<2 x i1>* %a) { ; KNL-LABEL: load_2i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: load_2i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_2i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_2i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovb (%rdi), %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_2i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb (%eax), %k0 ; X86-NEXT: vpmovm2w %k0, %xmm0 ; X86-NEXT: retl %b = load <2 x i1>, <2 x i1>* %a %c = sext <2 x i1> %b to <2 x i16> ret <2 x i16> %c } define <4 x i16> @load_4i1(<4 x i1>* %a) { ; KNL-LABEL: load_4i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: load_4i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_4i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_4i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovb (%rdi), %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_4i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb (%eax), %k0 ; X86-NEXT: vpmovm2w %k0, %xmm0 ; X86-NEXT: retl %b = load <4 x i1>, <4 x i1>* %a %c = sext <4 x i1> %b to <4 x i16> ret <4 x i16> %c } define <32 x i16> @load_32i1(<32 x i1>* %a) { ; KNL-LABEL: load_32i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: kmovw 2(%rdi), %k2 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdw %zmm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: load_32i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd (%rdi), %k0 ; SKX-NEXT: vpmovm2w %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_32i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_32i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1 ; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_32i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovd (%eax), %k0 ; X86-NEXT: vpmovm2w %k0, %zmm0 ; X86-NEXT: retl %b = load <32 x i1>, <32 x i1>* %a %c = sext <32 x i1> %b to <32 x i16> ret <32 x i16> %c } define <64 x i8> @load_64i1(<64 x i1>* %a) { ; KNL-LABEL: load_64i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: kmovw 2(%rdi), %k2 ; KNL-NEXT: kmovw 4(%rdi), %k3 ; KNL-NEXT: kmovw 6(%rdi), %k4 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: load_64i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovq (%rdi), %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: load_64i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovq (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: load_64i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-NEXT: kmovw 2(%rdi), %k1 ; AVX512DQ-NEXT: kmovw 4(%rdi), %k2 ; AVX512DQ-NEXT: kmovw 6(%rdi), %k3 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq ; ; X86-LABEL: load_64i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovq (%eax), %k0 ; X86-NEXT: vpmovm2b %k0, %zmm0 ; X86-NEXT: retl %b = load <64 x i1>, <64 x i1>* %a %c = sext <64 x i1> %b to <64 x i8> ret <64 x i8> %c } define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; KNL-LABEL: store_8i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_8i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_8i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_8i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_8i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $15, %xmm0, %xmm0 ; X86-NEXT: vpmovw2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl store <8 x i1> %v, <8 x i1>* %a ret void } define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; KNL-LABEL: store_8i1_1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_8i1_1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_8i1_1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_8i1_1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_8i1_1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpsllw $15, %xmm0, %xmm0 ; X86-NEXT: vpmovw2m %xmm0, %k0 ; X86-NEXT: kmovb %k0, (%eax) ; X86-NEXT: retl %v1 = trunc <8 x i16> %v to <8 x i1> store <8 x i1> %v1, <8 x i1>* %a ret void } define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; KNL-LABEL: store_16i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_16i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: vpmovb2m %xmm0, %k0 ; SKX-NEXT: kmovw %k0, (%rdi) ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_16i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: kmovw %k0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_16i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_16i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $7, %xmm0, %xmm0 ; X86-NEXT: vpmovb2m %xmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %k0, (%eax) ; X86-NEXT: retl store <16 x i1> %v, <16 x i1>* %a ret void } define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; KNL-LABEL: store_32i1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_32i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 ; SKX-NEXT: vpmovb2m %ymm0, %k0 ; SKX-NEXT: kmovd %k0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_32i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_32i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_32i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $7, %ymm0, %ymm0 ; X86-NEXT: vpmovb2m %ymm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovd %k0, (%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl store <32 x i1> %v, <32 x i1>* %a ret void } define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; KNL-LABEL: store_32i1_1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwd %ymm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: store_32i1_1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 ; SKX-NEXT: vpmovw2m %zmm0, %k0 ; SKX-NEXT: kmovd %k0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_32i1_1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_32i1_1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_32i1_1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpsllw $15, %zmm0, %zmm0 ; X86-NEXT: vpmovw2m %zmm0, %k0 ; X86-NEXT: kmovd %k0, (%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v1 = trunc <32 x i16> %v to <32 x i1> store <32 x i1> %v1, <32 x i1>* %a ret void } define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: ; KNL-NEXT: movw $-3, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k2 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %edx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-5, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-9, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %r8d, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $12, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-17, %ax ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kmovw %r9d, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $11, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-33, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k3 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $10, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-65, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $9, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-129, %ax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k4 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-257, %ax ## imm = 0xFEFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $7, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-513, %ax ## imm = 0xFDFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k5 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $6, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-1025, %ax ## imm = 0xFBFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $5, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $4, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-4097, %ax ## imm = 0xEFFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $3, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-8193, %ax ## imm = 0xDFFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $2, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: movw $-16385, %ax ## imm = 0xBFFF ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $14, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $14, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $7, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $6, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $5, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $4, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $3, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $2, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $14, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; KNL-NEXT: kandw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $14, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k6, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $7, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $6, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $5, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $4, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $3, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $2, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $14, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k7, %k7 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k7, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $7, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k3, %k6, %k6 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $6, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k4, %k6, %k5 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k5, %k4 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $4, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 ; KNL-NEXT: kandw %k2, %k4, %k3 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $3, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k3, %k2 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $2, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k2, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kmovw %k1, 6(%rdi) ; KNL-NEXT: kmovw %k0, 4(%rdi) ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; KNL-NEXT: kmovw %k0, 2(%rdi) ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: retq ; ; SKX-LABEL: store_64i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 ; SKX-NEXT: kmovq %k0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: store_64i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: kmovq %k0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: movw $-3, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %esi, %k0 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k2 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %edx, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-5, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %ecx, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-9, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %r8d, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-17, %ax ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kmovw %r9d, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-33, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k3 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-65, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-129, %ax ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k4 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-257, %ax ## imm = 0xFEFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-513, %ax ## imm = 0xFDFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k5 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-1025, %ax ## imm = 0xFBFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-4097, %ax ## imm = 0xEFFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-8193, %ax ## imm = 0xDFFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: movw $-16385, %ax ## imm = 0xBFFF ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $14, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $5, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $4, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $3, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $2, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $14, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k0, %k0 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k7, %k7 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k7, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kandw %k3, %k6, %k6 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kandw %k4, %k6, %k5 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k5, %k5 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k5, %k4 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 ; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5 ; AVX512DQ-NEXT: korw %k5, %k4, %k4 ; AVX512DQ-NEXT: kandw %k2, %k4, %k3 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k3, %k3 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k3, %k2 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k3 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 ; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3 ; AVX512DQ-NEXT: korw %k3, %k2, %k2 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k2, %k1 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 ; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, 6(%rdi) ; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_64i1: ; X86: ## %bb.0: ; X86-NEXT: vpsllw $7, %zmm0, %zmm0 ; X86-NEXT: vpmovb2m %zmm0, %k0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovq %k0, (%eax) ; X86-NEXT: vzeroupper ; X86-NEXT: retl store <64 x i1> %v, <64 x i1>* %a ret void } define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { ; KNL-LABEL: test_bitcast_v8i1_zext: ; KNL: ## %bb.0: ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: addl %eax, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_bitcast_v8i1_zext: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; SKX-NEXT: kmovb %k0, %eax ; SKX-NEXT: addl %eax, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_bitcast_v8i1_zext: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movzbl %al, %eax ; AVX512BW-NEXT: addl %eax, %eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_bitcast_v8i1_zext: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, %eax ; AVX512DQ-NEXT: addl %eax, %eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_bitcast_v8i1_zext: ; X86: ## %bb.0: ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; X86-NEXT: kmovb %k0, %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> %mask1 = bitcast <8 x i1> %mask to i8 %val = zext i8 %mask1 to i32 %val1 = add i32 %val, %val ret i32 %val1 } define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { ; CHECK-LABEL: test_bitcast_v16i1_zext: ; CHECK: ## %bb.0: ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addl %eax, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq ; ; X86-LABEL: test_bitcast_v16i1_zext: ; X86: ## %bb.0: ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask1 = bitcast <16 x i1> %v1 to i16 %val = zext i16 %mask1 to i32 %val1 = add i32 %val, %val ret i32 %val1 } define i16 @test_v16i1_add(i16 %x, i16 %y) { ; KNL-LABEL: test_v16i1_add: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v16i1_add: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v16i1_add: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v16i1_add: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v16i1_add: ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = add <16 x i1> %m0, %m1 %ret = bitcast <16 x i1> %m2 to i16 ret i16 %ret } define i16 @test_v16i1_sub(i16 %x, i16 %y) { ; KNL-LABEL: test_v16i1_sub: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v16i1_sub: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v16i1_sub: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v16i1_sub: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v16i1_sub: ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = sub <16 x i1> %m0, %m1 %ret = bitcast <16 x i1> %m2 to i16 ret i16 %ret } define i16 @test_v16i1_mul(i16 %x, i16 %y) { ; KNL-LABEL: test_v16i1_mul: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v16i1_mul: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kandw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v16i1_mul: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v16i1_mul: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v16i1_mul: ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kandw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = mul <16 x i1> %m0, %m1 %ret = bitcast <16 x i1> %m2 to i16 ret i16 %ret } define i8 @test_v8i1_add(i8 %x, i8 %y) { ; KNL-LABEL: test_v8i1_add: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_add: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorb %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_add: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_add: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v8i1_add: ; X86: ## %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorb %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = add <8 x i1> %m0, %m1 %ret = bitcast <8 x i1> %m2 to i8 ret i8 %ret } define i8 @test_v8i1_sub(i8 %x, i8 %y) { ; KNL-LABEL: test_v8i1_sub: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_sub: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorb %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_sub: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_sub: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v8i1_sub: ; X86: ## %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorb %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = sub <8 x i1> %m0, %m1 %ret = bitcast <8 x i1> %m2 to i8 ret i8 %ret } define i8 @test_v8i1_mul(i8 %x, i8 %y) { ; KNL-LABEL: test_v8i1_mul: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_mul: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_mul: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_mul: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v8i1_mul: ; X86: ## %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kandb %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = mul <8 x i1> %m0, %m1 %ret = bitcast <8 x i1> %m2 to i8 ret i8 %ret } ; Make sure we don't emit a ktest for signed comparisons. define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) { ; KNL-LABEL: ktest_signed: ; KNL: ## %bb.0: ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testw %ax, %ax ; KNL-NEXT: jle LBB66_1 ; KNL-NEXT: ## %bb.2: ## %bb.2 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB66_1: ## %bb.1 ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_signed: ; SKX: ## %bb.0: ; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: testw %ax, %ax ; SKX-NEXT: jle LBB66_1 ; SKX-NEXT: ## %bb.2: ## %bb.2 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB66_1: ## %bb.1 ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_signed: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testw %ax, %ax ; AVX512BW-NEXT: jle LBB66_1 ; AVX512BW-NEXT: ## %bb.2: ## %bb.2 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB66_1: ## %bb.1 ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_signed: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: testw %ax, %ax ; AVX512DQ-NEXT: jle LBB66_1 ; AVX512DQ-NEXT: ## %bb.2: ## %bb.2 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB66_1: ## %bb.1 ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_signed: ; X86: ## %bb.0: ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: testw %ax, %ax ; X86-NEXT: jle LBB66_1 ; X86-NEXT: ## %bb.2: ## %bb.2 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB66_1: ## %bb.1 ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <16 x i32> %x, zeroinitializer %b = icmp eq <16 x i32> %y, zeroinitializer %c = and <16 x i1> %a, %b %d = bitcast <16 x i1> %c to i16 %e = icmp sgt i16 %d, 0 br i1 %e, label %bb.2, label %bb.1 bb.1: call void @foo() br label %bb.2 bb.2: ret void } declare void @foo() ; Make sure we can use the C flag from kortest to check for all ones. define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: ktest_allones: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; CHECK-NEXT: kortestw %k0, %k0 ; CHECK-NEXT: jb LBB67_2 ; CHECK-NEXT: ## %bb.1: ## %bb.1 ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq _foo ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: LBB67_2: ## %bb.2 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq ; ; X86-LABEL: ktest_allones: ; X86: ## %bb.0: ; X86-NEXT: vpord %zmm1, %zmm0, %zmm0 ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; X86-NEXT: kortestw %k0, %k0 ; X86-NEXT: jb LBB67_2 ; X86-NEXT: ## %bb.1: ## %bb.1 ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: LBB67_2: ## %bb.2 ; X86-NEXT: vzeroupper ; X86-NEXT: retl %a = icmp eq <16 x i32> %x, zeroinitializer %b = icmp eq <16 x i32> %y, zeroinitializer %c = and <16 x i1> %a, %b %d = bitcast <16 x i1> %c to i16 %e = icmp eq i16 %d, -1 br i1 %e, label %bb.2, label %bb.1 bb.1: call void @foo() br label %bb.2 bb.2: ret void } ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask. ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this. define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) { ; KNL-LABEL: mask_widening: ; KNL: ## %bb.0: ## %entry ; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftlw $12, %k0, %k0 ; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; KNL-NEXT: retq ; ; SKX-LABEL: mask_widening: ; SKX: ## %bb.0: ## %entry ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mask_widening: ; AVX512BW: ## %bb.0: ## %entry ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mask_widening: ; AVX512DQ: ## %bb.0: ## %entry ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512DQ-NEXT: retq ; ; X86-LABEL: mask_widening: ; X86: ## %bb.0: ## %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-64, %esp ; X86-NEXT: subl $64, %esp ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; X86-NEXT: vmovdqa64 8(%ebp), %zmm0 ; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1} ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl entry: %0 = bitcast <2 x i64> %a to <4 x i32> %1 = bitcast <2 x i64> %b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i64> %f to <16 x i32> %5 = bitcast <8 x i64> %e to <16 x i32> %6 = shufflevector <8 x i1> %3, <8 x i1> , <16 x i32> %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5 %8 = bitcast <16 x i32> %7 to <8 x i64> ret <8 x i64> %8 } define void @store_v128i1_constant(<128 x i1>* %R) { ; CHECK-LABEL: store_v128i1_constant: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD ; CHECK-NEXT: movq %rax, 8(%rdi) ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD ; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_v128i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403] ; X86-NEXT: vmovaps %xmm0, (%eax) ; X86-NEXT: retl entry: store <128 x i1> , <128 x i1>* %R ret void } define void @store_v64i1_constant(<64 x i1>* %R) { ; CHECK-LABEL: store_v64i1_constant: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD ; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_v64i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B ; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD ; X86-NEXT: retl entry: store <64 x i1> , <64 x i1>* %R ret void } define void @store_v2i1_constant(<2 x i1>* %R) { ; CHECK-LABEL: store_v2i1_constant: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movb $1, (%rdi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_v2i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb $1, (%eax) ; X86-NEXT: retl entry: store <2 x i1> , <2 x i1>* %R ret void } define void @store_v4i1_constant(<4 x i1>* %R) { ; CHECK-LABEL: store_v4i1_constant: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movb $5, (%rdi) ; CHECK-NEXT: retq ; ; X86-LABEL: store_v4i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb $5, (%eax) ; X86-NEXT: retl entry: store <4 x i1> , <4 x i1>* %R ret void } ; Make sure we bring the -1 constant into the mask domain. define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) { ; CHECK-LABEL: mask_not_cast: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1 ; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1} ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq ; ; X86-LABEL: mask_not_cast: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpcmpnleud %zmm3, %zmm2, %k1 ; X86-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1} ; X86-NEXT: vmovdqu32 %zmm0, (%eax) {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl %6 = and <8 x i64> %2, %1 %7 = bitcast <8 x i64> %6 to <16 x i32> %8 = icmp ne <16 x i32> %7, zeroinitializer %9 = bitcast <16 x i1> %8 to i16 %10 = bitcast <8 x i64> %3 to <16 x i32> %11 = bitcast <8 x i64> %4 to <16 x i32> %12 = icmp ule <16 x i32> %10, %11 %13 = bitcast <16 x i1> %12 to i16 %14 = xor i16 %13, -1 %15 = and i16 %14, %9 %16 = bitcast <8 x i64> %1 to <16 x i32> %17 = bitcast i8* %0 to <16 x i32>* %18 = bitcast i16 %15 to <16 x i1> tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2 ret void } declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { ; KNL-LABEL: ktest_3: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: korw %k3, %k2, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB74_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB74_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_3: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 ; SKX-NEXT: ktestb %k1, %k0 ; SKX-NEXT: je LBB74_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB74_1: ## %bar ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_3: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: korw %k3, %k2, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: je LBB74_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB74_1: ## %bar ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_3: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3 ; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2 ; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: korb %k3, %k2, %k1 ; AVX512DQ-NEXT: ktestb %k1, %k0 ; AVX512DQ-NEXT: je LBB74_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB74_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_3: ; X86: ## %bb.0: ; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0 ; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1 ; X86-NEXT: korb %k1, %k0, %k0 ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2 ; X86-NEXT: korb %k2, %k1, %k1 ; X86-NEXT: ktestb %k1, %k0 ; X86-NEXT: je LBB74_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB74_1: ## %bar ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <8 x i32> %w, zeroinitializer %b = icmp eq <8 x i32> %x, zeroinitializer %c = icmp eq <8 x i32> %y, zeroinitializer %d = icmp eq <8 x i32> %z, zeroinitializer %e = or <8 x i1> %a, %b %f = or <8 x i1> %c, %d %g = and <8 x i1> %e, %f %h = bitcast <8 x i1> %g to i8 %i = icmp eq i8 %h, 0 br i1 %i, label %bar, label %exit bar: call void @foo() br label %exit exit: ret void } define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { ; KNL-LABEL: ktest_4: ; KNL: ## %bb.0: ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2 ; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: korw %k3, %k2, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB75_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB75_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_4: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 ; SKX-NEXT: ktestb %k1, %k0 ; SKX-NEXT: je LBB75_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB75_1: ## %bar ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_4: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1 ; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2 ; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: korw %k3, %k2, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: je LBB75_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB75_1: ## %bar ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_4: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; AVX512DQ-NEXT: korb %k2, %k1, %k1 ; AVX512DQ-NEXT: ktestb %k1, %k0 ; AVX512DQ-NEXT: je LBB75_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB75_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_4: ; X86: ## %bb.0: ; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1 ; X86-NEXT: korb %k1, %k0, %k0 ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; X86-NEXT: korb %k2, %k1, %k1 ; X86-NEXT: ktestb %k1, %k0 ; X86-NEXT: je LBB75_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB75_1: ## %bar ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <8 x i64> %w, zeroinitializer %b = icmp eq <8 x i64> %x, zeroinitializer %c = icmp eq <8 x i64> %y, zeroinitializer %d = icmp eq <8 x i64> %z, zeroinitializer %e = or <8 x i1> %a, %b %f = or <8 x i1> %c, %d %g = and <8 x i1> %e, %f %h = bitcast <8 x i1> %g to i8 %i = icmp eq i8 %h, 0 br i1 %i, label %bar, label %exit bar: call void @foo() br label %exit exit: ret void } define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) { ; KNL-LABEL: ktest_5: ; KNL: ## %bb.0: ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: je LBB76_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB76_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_5: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; SKX-NEXT: korw %k1, %k0, %k0 ; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; SKX-NEXT: korw %k2, %k1, %k1 ; SKX-NEXT: ktestw %k1, %k0 ; SKX-NEXT: je LBB76_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB76_1: ## %bar ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_5: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; AVX512BW-NEXT: korw %k2, %k1, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kortestw %k0, %k0 ; AVX512BW-NEXT: je LBB76_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB76_1: ## %bar ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_5: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: ktestw %k1, %k0 ; AVX512DQ-NEXT: je LBB76_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB76_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_5: ; X86: ## %bb.0: ; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1 ; X86-NEXT: korw %k1, %k0, %k0 ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; X86-NEXT: korw %k2, %k1, %k1 ; X86-NEXT: ktestw %k1, %k0 ; X86-NEXT: je LBB76_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB76_1: ## %bar ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <16 x i32> %w, zeroinitializer %b = icmp eq <16 x i32> %x, zeroinitializer %c = icmp eq <16 x i32> %y, zeroinitializer %d = icmp eq <16 x i32> %z, zeroinitializer %e = or <16 x i1> %a, %b %f = or <16 x i1> %c, %d %g = and <16 x i1> %e, %f %h = bitcast <16 x i1> %g to i16 %i = icmp eq i16 %h, 0 br i1 %i, label %bar, label %exit bar: call void @foo() br label %exit exit: ret void } define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) { ; KNL-LABEL: ktest_6: ; KNL: ## %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 ; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 ; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kortestw %k0, %k0 ; KNL-NEXT: je LBB77_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB77_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_6: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; SKX-NEXT: kord %k2, %k1, %k1 ; SKX-NEXT: ktestd %k1, %k0 ; SKX-NEXT: je LBB77_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB77_1: ## %bar ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_6: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; AVX512BW-NEXT: kord %k1, %k0, %k0 ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; AVX512BW-NEXT: kord %k2, %k1, %k1 ; AVX512BW-NEXT: ktestd %k1, %k0 ; AVX512BW-NEXT: je LBB77_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB77_1: ## %bar ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_6: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kortestw %k0, %k0 ; AVX512DQ-NEXT: je LBB77_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB77_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_6: ; X86: ## %bb.0: ; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0 ; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1 ; X86-NEXT: kord %k1, %k0, %k0 ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; X86-NEXT: kord %k2, %k1, %k1 ; X86-NEXT: ktestd %k1, %k0 ; X86-NEXT: je LBB77_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB77_1: ## %bar ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <32 x i16> %w, zeroinitializer %b = icmp eq <32 x i16> %x, zeroinitializer %c = icmp eq <32 x i16> %y, zeroinitializer %d = icmp eq <32 x i16> %z, zeroinitializer %e = or <32 x i1> %a, %b %f = or <32 x i1> %c, %d %g = and <32 x i1> %e, %f %h = bitcast <32 x i1> %g to i32 %i = icmp eq i32 %h, 0 br i1 %i, label %bar, label %exit bar: call void @foo() br label %exit exit: ret void } define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) { ; KNL-LABEL: ktest_7: ; KNL: ## %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 ; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 ; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 ; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 ; KNL-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 ; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; KNL-NEXT: vpmovmskb %ymm0, %eax ; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: je LBB78_1 ; KNL-NEXT: ## %bb.2: ## %exit ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB78_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq ; ; SKX-LABEL: ktest_7: ; SKX: ## %bb.0: ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; SKX-NEXT: korq %k1, %k0, %k0 ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2 ; SKX-NEXT: korq %k2, %k1, %k1 ; SKX-NEXT: ktestq %k1, %k0 ; SKX-NEXT: je LBB78_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB78_1: ## %bar ; SKX-NEXT: pushq %rax ; SKX-NEXT: .cfi_def_cfa_offset 16 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _foo ; SKX-NEXT: addq $8, %rsp ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ktest_7: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; AVX512BW-NEXT: korq %k1, %k0, %k0 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2 ; AVX512BW-NEXT: korq %k2, %k1, %k1 ; AVX512BW-NEXT: ktestq %k1, %k0 ; AVX512BW-NEXT: je LBB78_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB78_1: ## %bar ; AVX512BW-NEXT: pushq %rax ; AVX512BW-NEXT: .cfi_def_cfa_offset 16 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: callq _foo ; AVX512BW-NEXT: addq $8, %rsp ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ktest_7: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4 ; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpmovmskb %ymm0, %eax ; AVX512DQ-NEXT: testl %eax, %eax ; AVX512DQ-NEXT: je LBB78_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB78_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_7: ; X86: ## %bb.0: ; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0 ; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1 ; X86-NEXT: korq %k1, %k0, %k0 ; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2 ; X86-NEXT: korq %k2, %k1, %k1 ; X86-NEXT: kandq %k1, %k0, %k0 ; X86-NEXT: kshiftrq $32, %k0, %k1 ; X86-NEXT: kortestd %k1, %k0 ; X86-NEXT: je LBB78_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; X86-NEXT: LBB78_1: ## %bar ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: vzeroupper ; X86-NEXT: calll _foo ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %a = icmp eq <64 x i8> %w, zeroinitializer %b = icmp eq <64 x i8> %x, zeroinitializer %c = icmp eq <64 x i8> %y, zeroinitializer %d = icmp eq <64 x i8> %z, zeroinitializer %e = or <64 x i1> %a, %b %f = or <64 x i1> %c, %d %g = and <64 x i1> %e, %f %h = bitcast <64 x i1> %g to i64 %i = icmp eq i64 %h, 0 br i1 %i, label %bar, label %exit bar: call void @foo() br label %exit exit: ret void } define <64 x i1> @mask64_insert(i32 %a) { ; KNL-LABEL: mask64_insert: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: andl $1, %esi ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movw $-4, %cx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: korw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: movw $-3, 6(%rdi) ; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD ; KNL-NEXT: retq ; ; SKX-LABEL: mask64_insert: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kshiftlq $63, %k0, %k0 ; SKX-NEXT: kshiftrq $63, %k0, %k0 ; SKX-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC ; SKX-NEXT: kmovq %rax, %k1 ; SKX-NEXT: kshiftrq $1, %k1, %k1 ; SKX-NEXT: kshiftlq $1, %k1, %k1 ; SKX-NEXT: korq %k0, %k1, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mask64_insert: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kshiftlq $63, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $63, %k0, %k0 ; AVX512BW-NEXT: movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC ; AVX512BW-NEXT: kmovq %rax, %k1 ; AVX512BW-NEXT: kshiftrq $1, %k1, %k1 ; AVX512BW-NEXT: kshiftlq $1, %k1, %k1 ; AVX512BW-NEXT: korq %k0, %k1, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mask64_insert: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: andl $1, %esi ; AVX512DQ-NEXT: kmovw %esi, %k0 ; AVX512DQ-NEXT: movw $-4, %cx ; AVX512DQ-NEXT: kmovw %ecx, %k1 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQ-NEXT: korw %k0, %k1, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: movw $-3, 6(%rdi) ; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD ; AVX512DQ-NEXT: retq ; ; X86-LABEL: mask64_insert: ; X86: ## %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: movl $-131076, %eax ## imm = 0xFFFDFFFC ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: movl $-131075, %eax ## imm = 0xFFFDFFFD ; X86-NEXT: kmovd %eax, %k2 ; X86-NEXT: kunpckdq %k1, %k2, %k1 ; X86-NEXT: kshiftrq $1, %k1, %k1 ; X86-NEXT: kshiftlq $1, %k1, %k1 ; X86-NEXT: kshiftlq $63, %k0, %k0 ; X86-NEXT: kshiftrq $63, %k0, %k0 ; X86-NEXT: korq %k0, %k1, %k0 ; X86-NEXT: vpmovm2b %k0, %zmm0 ; X86-NEXT: retl %a_i = trunc i32 %a to i1 %maskv = insertelement <64 x i1> , i1 %a_i, i32 0 ret <64 x i1> %maskv } define i1 @test_v1i1_add(i1 %x, i1 %y) { ; KNL-LABEL: test_v1i1_add: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al ; KNL-NEXT: retq ; ; SKX-LABEL: test_v1i1_add: ; SKX: ## %bb.0: ; SKX-NEXT: andl $1, %edi ; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $1, %esi ; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 ; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v1i1_add: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v1i1_add: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: andl $1, %edi ; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: andl $1, %esi ; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v1i1_add: ; X86: ## %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorw %k1, %k0, %k0 ; X86-NEXT: kshiftlb $7, %k0, %k0 ; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: popl %ecx ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> %m1 = bitcast i1 %y to <1 x i1> %m2 = add <1 x i1> %m0, %m1 %ret = bitcast <1 x i1> %m2 to i1 ret i1 %ret } define i1 @test_v1i1_sub(i1 %x, i1 %y) { ; KNL-LABEL: test_v1i1_sub: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al ; KNL-NEXT: retq ; ; SKX-LABEL: test_v1i1_sub: ; SKX: ## %bb.0: ; SKX-NEXT: andl $1, %edi ; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $1, %esi ; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 ; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v1i1_sub: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v1i1_sub: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: andl $1, %edi ; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: andl $1, %esi ; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v1i1_sub: ; X86: ## %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kxorw %k1, %k0, %k0 ; X86-NEXT: kshiftlb $7, %k0, %k0 ; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: popl %ecx ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> %m1 = bitcast i1 %y to <1 x i1> %m2 = sub <1 x i1> %m0, %m1 %ret = bitcast <1 x i1> %m2 to i1 ret i1 %ret } define i1 @test_v1i1_mul(i1 %x, i1 %y) { ; KNL-LABEL: test_v1i1_mul: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al ; KNL-NEXT: retq ; ; SKX-LABEL: test_v1i1_mul: ; SKX: ## %bb.0: ; SKX-NEXT: andl $1, %edi ; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $1, %esi ; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; SKX-NEXT: kandw %k1, %k0, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 ; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v1i1_mul: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v1i1_mul: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: andl $1, %edi ; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: andl $1, %esi ; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0 ; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 ; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_v1i1_mul: ; X86: ## %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kandw %k1, %k0, %k0 ; X86-NEXT: kshiftlb $7, %k0, %k0 ; X86-NEXT: kshiftrb $7, %k0, %k0 ; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp) ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: popl %ecx ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> %m1 = bitcast i1 %y to <1 x i1> %m2 = mul <1 x i1> %m0, %m1 %ret = bitcast <1 x i1> %m2 to i1 ret i1 %ret } define <1 x i1> @uadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { ; KNL-LABEL: uadd_sat_v1i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: uadd_sat_v1i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: korw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: uadd_sat_v1i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: uadd_sat_v1i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: uadd_sat_v1i1: ; X86: ## %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: korw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) ret <1 x i1> %z } declare <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { ; KNL-LABEL: usub_sat_v1i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kandnw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: usub_sat_v1i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %esi, %k0 ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kandnw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: usub_sat_v1i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %esi, %k0 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: kandnw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: usub_sat_v1i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %esi, %k0 ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: kandnw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: usub_sat_v1i1: ; X86: ## %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kandnw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y) ret <1 x i1> %z } declare <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y) define <1 x i1> @sadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { ; KNL-LABEL: sadd_sat_v1i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: sadd_sat_v1i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: korw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: sadd_sat_v1i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: sadd_sat_v1i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: sadd_sat_v1i1: ; X86: ## %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: korw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) ret <1 x i1> %z } declare <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y) define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind { ; KNL-LABEL: ssub_sat_v1i1: ; KNL: ## %bb.0: ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kandnw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: ssub_sat_v1i1: ; SKX: ## %bb.0: ; SKX-NEXT: kmovd %esi, %k0 ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: kandnw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $al killed $al killed $eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: ssub_sat_v1i1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %esi, %k0 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: kandnw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: ssub_sat_v1i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw %esi, %k0 ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: kandnw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq ; ; X86-LABEL: ssub_sat_v1i1: ; X86: ## %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kandnw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $al killed $al killed $eax ; X86-NEXT: retl %z = call <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y) ret <1 x i1> %z } declare <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y) !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} !2 = !{!"ProfileFormat", !"InstrProf"} !3 = !{!"TotalCount", i64 10000} !4 = !{!"MaxCount", i64 10} !5 = !{!"MaxInternalCount", i64 1} !6 = !{!"MaxFunctionCount", i64 1000} !7 = !{!"NumCounts", i64 3} !8 = !{!"NumFunctions", i64 3} !9 = !{!"DetailedSummary", !10} !10 = !{!11, !12, !13} !11 = !{i32 10000, i64 100, i32 1} !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 0}