521 lines
24 KiB
LLVM
521 lines
24 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32
|
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32
|
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32
|
|
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
|
|
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
|
|
|
|
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
|
|
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
|
|
declare i32 @func_int(i32, i32)
|
|
|
|
;test calling conventions - input parameters
|
|
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: testf16_inp:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-64, %esp
|
|
; X32-NEXT: subl $192, %esp
|
|
; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl %eax, (%esp)
|
|
; X32-NEXT: calll _func_float16_ptr
|
|
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_inp:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: pushl %ebp
|
|
; WIN32-NEXT: movl %esp, %ebp
|
|
; WIN32-NEXT: andl $-64, %esp
|
|
; WIN32-NEXT: subl $128, %esp
|
|
; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %esp, %eax
|
|
; WIN32-NEXT: pushl %eax
|
|
; WIN32-NEXT: calll _func_float16_ptr
|
|
; WIN32-NEXT: addl $4, %esp
|
|
; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %ebp, %esp
|
|
; WIN32-NEXT: popl %ebp
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf16_inp:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: subq $176, %rsp
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-NEXT: andq $-64, %rsp
|
|
; WIN64-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-NEXT: vaddps (%rdx), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-NEXT: callq func_float16_ptr
|
|
; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq 48(%rbp), %rsp
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: testf16_inp:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rbp
|
|
; X64-NEXT: movq %rsp, %rbp
|
|
; X64-NEXT: pushq %r13
|
|
; X64-NEXT: pushq %r12
|
|
; X64-NEXT: andq $-64, %rsp
|
|
; X64-NEXT: subq $128, %rsp
|
|
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X64-NEXT: movq %rsp, %rdi
|
|
; X64-NEXT: callq _func_float16_ptr
|
|
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
|
|
; X64-NEXT: leaq -16(%rbp), %rsp
|
|
; X64-NEXT: popq %r12
|
|
; X64-NEXT: popq %r13
|
|
; X64-NEXT: popq %rbp
|
|
; X64-NEXT: retq
|
|
%y = alloca <16 x float>, align 64
|
|
%x = fadd <16 x float> %a, %b
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
|
%2 = load <16 x float>, <16 x float>* %y, align 16
|
|
%3 = fadd <16 x float> %2, %1
|
|
ret <16 x float> %3
|
|
}
|
|
|
|
;test calling conventions - preserved registers
|
|
|
|
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: testf16_regs:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-64, %esp
|
|
; X32-NEXT: subl $256, %esp ## imm = 0x100
|
|
; X32-NEXT: vmovaps %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
|
|
; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl %eax, (%esp)
|
|
; X32-NEXT: calll _func_float16_ptr
|
|
; X32-NEXT: vaddps {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 ## 64-byte Folded Reload
|
|
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_regs:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: pushl %ebp
|
|
; WIN32-NEXT: movl %esp, %ebp
|
|
; WIN32-NEXT: andl $-64, %esp
|
|
; WIN32-NEXT: subl $192, %esp
|
|
; WIN32-NEXT: vmovaps %zmm1, (%esp) # 64-byte Spill
|
|
; WIN32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; WIN32-NEXT: pushl %eax
|
|
; WIN32-NEXT: calll _func_float16_ptr
|
|
; WIN32-NEXT: addl $4, %esp
|
|
; WIN32-NEXT: vaddps (%esp), %zmm0, %zmm0 # 64-byte Folded Reload
|
|
; WIN32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
|
; WIN32-NEXT: movl %ebp, %esp
|
|
; WIN32-NEXT: popl %ebp
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf16_regs:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: subq $176, %rsp
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-NEXT: andq $-64, %rsp
|
|
; WIN64-NEXT: vmovaps (%rdx), %zmm16
|
|
; WIN64-NEXT: vaddps (%rcx), %zmm16, %zmm0
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-NEXT: callq func_float16_ptr
|
|
; WIN64-NEXT: vaddps %zmm16, %zmm0, %zmm0
|
|
; WIN64-NEXT: vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0
|
|
; WIN64-NEXT: leaq 48(%rbp), %rsp
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: testf16_regs:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rbp
|
|
; X64-NEXT: movq %rsp, %rbp
|
|
; X64-NEXT: pushq %r13
|
|
; X64-NEXT: pushq %r12
|
|
; X64-NEXT: andq $-64, %rsp
|
|
; X64-NEXT: subq $128, %rsp
|
|
; X64-NEXT: vmovaps %zmm1, %zmm16
|
|
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; X64-NEXT: movq %rsp, %rdi
|
|
; X64-NEXT: callq _func_float16_ptr
|
|
; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
|
|
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
|
|
; X64-NEXT: leaq -16(%rbp), %rsp
|
|
; X64-NEXT: popq %r12
|
|
; X64-NEXT: popq %r13
|
|
; X64-NEXT: popq %rbp
|
|
; X64-NEXT: retq
|
|
%y = alloca <16 x float>, align 64
|
|
%x = fadd <16 x float> %a, %b
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
|
|
%2 = load <16 x float>, <16 x float>* %y, align 16
|
|
%3 = fadd <16 x float> %1, %b
|
|
%4 = fadd <16 x float> %2, %3
|
|
ret <16 x float> %4
|
|
}
|
|
|
|
; test calling conventions - prolog and epilog
|
|
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
|
|
; X32-LABEL: test_prolog_epilog:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: calll _func_float16
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: test_prolog_epilog:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: calll _func_float16
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-KNL-LABEL: test_prolog_epilog:
|
|
; WIN64-KNL: # %bb.0:
|
|
; WIN64-KNL-NEXT: pushq %rbp
|
|
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-KNL-NEXT: andq $-64, %rsp
|
|
; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
|
; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; WIN64-KNL-NEXT: callq func_float16
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
|
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
|
|
; WIN64-KNL-NEXT: popq %rbp
|
|
; WIN64-KNL-NEXT: retq
|
|
;
|
|
; WIN64-SKX-LABEL: test_prolog_epilog:
|
|
; WIN64-SKX: # %bb.0:
|
|
; WIN64-SKX-NEXT: pushq %rbp
|
|
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-SKX-NEXT: andq $-64, %rsp
|
|
; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
|
; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; WIN64-SKX-NEXT: callq func_float16
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
|
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
|
|
; WIN64-SKX-NEXT: popq %rbp
|
|
; WIN64-SKX-NEXT: retq
|
|
;
|
|
; X64-KNL-LABEL: test_prolog_epilog:
|
|
; X64-KNL: ## %bb.0:
|
|
; X64-KNL-NEXT: pushq %rsi
|
|
; X64-KNL-NEXT: subq $1072, %rsp ## imm = 0x430
|
|
; X64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-KNL-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
|
; X64-KNL-NEXT: callq _func_float16
|
|
; X64-KNL-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
|
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
|
|
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
|
|
; X64-KNL-NEXT: addq $1072, %rsp ## imm = 0x430
|
|
; X64-KNL-NEXT: popq %rsi
|
|
; X64-KNL-NEXT: retq
|
|
;
|
|
; X64-SKX-LABEL: test_prolog_epilog:
|
|
; X64-SKX: ## %bb.0:
|
|
; X64-SKX-NEXT: pushq %rsi
|
|
; X64-SKX-NEXT: subq $1072, %rsp ## imm = 0x430
|
|
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
|
; X64-SKX-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
|
; X64-SKX-NEXT: callq _func_float16
|
|
; X64-SKX-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
|
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
|
; X64-SKX-NEXT: addq $1072, %rsp ## imm = 0x430
|
|
; X64-SKX-NEXT: popq %rsi
|
|
; X64-SKX-NEXT: retq
|
|
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
|
|
ret <16 x float> %c
|
|
}
|
|
|
|
|
|
declare <16 x float> @func_float16_mask(<16 x float>, <16 x i1>)
|
|
|
|
define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
|
|
; X32-LABEL: testf16_inp_mask:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: .cfi_def_cfa_offset 16
|
|
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X32-NEXT: calll _func_float16_mask
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: testf16_inp_mask:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; WIN32-NEXT: calll _func_float16_mask
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-KNL-LABEL: testf16_inp_mask:
|
|
; WIN64-KNL: # %bb.0:
|
|
; WIN64-KNL-NEXT: subq $40, %rsp
|
|
; WIN64-KNL-NEXT: .seh_stackalloc 40
|
|
; WIN64-KNL-NEXT: .seh_endprologue
|
|
; WIN64-KNL-NEXT: # kill: def $dx killed $dx def $edx
|
|
; WIN64-KNL-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-KNL-NEXT: kmovw %edx, %k1
|
|
; WIN64-KNL-NEXT: callq func_float16_mask
|
|
; WIN64-KNL-NEXT: nop
|
|
; WIN64-KNL-NEXT: addq $40, %rsp
|
|
; WIN64-KNL-NEXT: retq
|
|
; WIN64-KNL-NEXT: .seh_endproc
|
|
;
|
|
; WIN64-SKX-LABEL: testf16_inp_mask:
|
|
; WIN64-SKX: # %bb.0:
|
|
; WIN64-SKX-NEXT: subq $40, %rsp
|
|
; WIN64-SKX-NEXT: .seh_stackalloc 40
|
|
; WIN64-SKX-NEXT: .seh_endprologue
|
|
; WIN64-SKX-NEXT: # kill: def $dx killed $dx def $edx
|
|
; WIN64-SKX-NEXT: vmovaps (%rcx), %zmm0
|
|
; WIN64-SKX-NEXT: kmovd %edx, %k1
|
|
; WIN64-SKX-NEXT: callq func_float16_mask
|
|
; WIN64-SKX-NEXT: nop
|
|
; WIN64-SKX-NEXT: addq $40, %rsp
|
|
; WIN64-SKX-NEXT: retq
|
|
; WIN64-SKX-NEXT: .seh_endproc
|
|
;
|
|
; X64-KNL-LABEL: testf16_inp_mask:
|
|
; X64-KNL: ## %bb.0:
|
|
; X64-KNL-NEXT: pushq %rbp
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-KNL-NEXT: pushq %r13
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 24
|
|
; X64-KNL-NEXT: pushq %r12
|
|
; X64-KNL-NEXT: .cfi_def_cfa_offset 32
|
|
; X64-KNL-NEXT: .cfi_offset %r12, -32
|
|
; X64-KNL-NEXT: .cfi_offset %r13, -24
|
|
; X64-KNL-NEXT: .cfi_offset %rbp, -16
|
|
; X64-KNL-NEXT: kmovw %edi, %k1
|
|
; X64-KNL-NEXT: callq _func_float16_mask
|
|
; X64-KNL-NEXT: popq %r12
|
|
; X64-KNL-NEXT: popq %r13
|
|
; X64-KNL-NEXT: popq %rbp
|
|
; X64-KNL-NEXT: retq
|
|
;
|
|
; X64-SKX-LABEL: testf16_inp_mask:
|
|
; X64-SKX: ## %bb.0:
|
|
; X64-SKX-NEXT: pushq %rbp
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-SKX-NEXT: pushq %r13
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 24
|
|
; X64-SKX-NEXT: pushq %r12
|
|
; X64-SKX-NEXT: .cfi_def_cfa_offset 32
|
|
; X64-SKX-NEXT: .cfi_offset %r12, -32
|
|
; X64-SKX-NEXT: .cfi_offset %r13, -24
|
|
; X64-SKX-NEXT: .cfi_offset %rbp, -16
|
|
; X64-SKX-NEXT: kmovd %edi, %k1
|
|
; X64-SKX-NEXT: callq _func_float16_mask
|
|
; X64-SKX-NEXT: popq %r12
|
|
; X64-SKX-NEXT: popq %r13
|
|
; X64-SKX-NEXT: popq %rbp
|
|
; X64-SKX-NEXT: retq
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
|
%1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)
|
|
ret <16 x float> %1
|
|
}
|
|
|
|
define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a, <16 x i32> %x1, <16 x i32>%x2, <16 x i1> %mask) nounwind {
|
|
; X32-LABEL: test_prolog_epilog_with_mask:
|
|
; X32: ## %bb.0:
|
|
; X32-NEXT: subl $12, %esp
|
|
; X32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; X32-NEXT: kxorw %k1, %k0, %k1
|
|
; X32-NEXT: calll _func_float16_mask
|
|
; X32-NEXT: addl $12, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN32-LABEL: test_prolog_epilog_with_mask:
|
|
; WIN32: # %bb.0:
|
|
; WIN32-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; WIN32-NEXT: kxorw %k1, %k0, %k1
|
|
; WIN32-NEXT: calll _func_float16_mask
|
|
; WIN32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_prolog_epilog_with_mask:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $40, %rsp
|
|
; WIN64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; WIN64-NEXT: kxorw %k1, %k0, %k1
|
|
; WIN64-NEXT: callq func_float16_mask
|
|
; WIN64-NEXT: addq $40, %rsp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; X64-LABEL: test_prolog_epilog_with_mask:
|
|
; X64: ## %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: vpcmpeqd %zmm2, %zmm1, %k0
|
|
; X64-NEXT: kxorw %k1, %k0, %k1
|
|
; X64-NEXT: callq _func_float16_mask
|
|
; X64-NEXT: popq %rax
|
|
; X64-NEXT: retq
|
|
%cmp_res = icmp eq <16 x i32>%x1, %x2
|
|
%mask1 = xor <16 x i1> %cmp_res, %mask
|
|
%c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1)
|
|
ret <16 x float> %c
|
|
}
|