1291 lines
44 KiB
LLVM
1291 lines
44 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=X32
|
|
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=WIN64
|
|
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=LINUXOSX64
|
|
|
|
; Test regcall when receiving/returning i1
|
|
define dso_local x86_regcallcc i1 @test_argReti1(i1 %a) {
|
|
; X32-LABEL: test_argReti1:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: # kill: def $al killed $al killed $eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argReti1:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: # kill: def $al killed $al killed $eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argReti1:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = add i1 %a, 1
|
|
ret i1 %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving i1
|
|
define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) {
|
|
; X32-LABEL: test_CallargReti1:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: movzbl %al, %eax
|
|
; X32-NEXT: calll _test_argReti1
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargReti1:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: movzbl %al, %eax
|
|
; WIN64-NEXT: callq test_argReti1
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargReti1:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: movzbl %al, %eax
|
|
; LINUXOSX64-NEXT: callq test_argReti1
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = add i1 %a, 1
|
|
%c = call x86_regcallcc i1 @test_argReti1(i1 %b)
|
|
%d = add i1 %c, 1
|
|
ret i1 %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning i8
|
|
define dso_local x86_regcallcc i8 @test_argReti8(i8 %a) {
|
|
; X32-LABEL: test_argReti8:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: # kill: def $al killed $al killed $eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argReti8:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: # kill: def $al killed $al killed $eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argReti8:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = add i8 %a, 1
|
|
ret i8 %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving i8
|
|
define dso_local x86_regcallcc i8 @test_CallargReti8(i8 %a) {
|
|
; X32-LABEL: test_CallargReti8:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: movzbl %al, %eax
|
|
; X32-NEXT: calll _test_argReti8
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargReti8:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: movzbl %al, %eax
|
|
; WIN64-NEXT: callq test_argReti8
|
|
; WIN64-NEXT: incb %al
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargReti8:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: movzbl %al, %eax
|
|
; LINUXOSX64-NEXT: callq test_argReti8
|
|
; LINUXOSX64-NEXT: incb %al
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = add i8 %a, 1
|
|
%c = call x86_regcallcc i8 @test_argReti8(i8 %b)
|
|
%d = add i8 %c, 1
|
|
ret i8 %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning i16
|
|
define dso_local x86_regcallcc i16 @test_argReti16(i16 %a) {
|
|
; X32-LABEL: test_argReti16:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argReti16:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argReti16:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = add i16 %a, 1
|
|
ret i16 %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving i16
|
|
define dso_local x86_regcallcc i16 @test_CallargReti16(i16 %a) {
|
|
; X32-LABEL: test_CallargReti16:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: calll _test_argReti16
|
|
; X32-NEXT: # kill: def $ax killed $ax def $eax
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargReti16:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: callq test_argReti16
|
|
; WIN64-NEXT: # kill: def $ax killed $ax def $eax
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargReti16:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: callq test_argReti16
|
|
; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = add i16 %a, 1
|
|
%c = call x86_regcallcc i16 @test_argReti16(i16 %b)
|
|
%d = add i16 %c, 1
|
|
ret i16 %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning i32
|
|
define dso_local x86_regcallcc i32 @test_argReti32(i32 %a) {
|
|
; X32-LABEL: test_argReti32:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argReti32:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argReti32:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = add i32 %a, 1
|
|
ret i32 %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving i32
|
|
define dso_local x86_regcallcc i32 @test_CallargReti32(i32 %a) {
|
|
; X32-LABEL: test_CallargReti32:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: calll _test_argReti32
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargReti32:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: callq test_argReti32
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargReti32:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: callq test_argReti32
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = add i32 %a, 1
|
|
%c = call x86_regcallcc i32 @test_argReti32(i32 %b)
|
|
%d = add i32 %c, 1
|
|
ret i32 %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning i64
|
|
define dso_local x86_regcallcc i64 @test_argReti64(i64 %a) {
|
|
; X32-LABEL: test_argReti64:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: addl $3, %eax
|
|
; X32-NEXT: adcl $1, %ecx
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argReti64:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
|
|
; WIN64-NEXT: addq %rcx, %rax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argReti64:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
|
|
; LINUXOSX64-NEXT: addq %rcx, %rax
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = add i64 %a, 4294967299
|
|
ret i64 %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving i64
|
|
define dso_local x86_regcallcc i64 @test_CallargReti64(i64 %a) {
|
|
; X32-LABEL: test_CallargReti64:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: addl $1, %eax
|
|
; X32-NEXT: adcl $0, %ecx
|
|
; X32-NEXT: calll _test_argReti64
|
|
; X32-NEXT: addl $1, %eax
|
|
; X32-NEXT: adcl $0, %ecx
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargReti64:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incq %rax
|
|
; WIN64-NEXT: callq test_argReti64
|
|
; WIN64-NEXT: incq %rax
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargReti64:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incq %rax
|
|
; LINUXOSX64-NEXT: callq test_argReti64
|
|
; LINUXOSX64-NEXT: incq %rax
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = add i64 %a, 1
|
|
%c = call x86_regcallcc i64 @test_argReti64(i64 %b)
|
|
%d = add i64 %c, 1
|
|
ret i64 %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning float
|
|
define dso_local x86_regcallcc float @test_argRetFloat(float %a) {
|
|
; X32-LABEL: test_argRetFloat:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: vaddss __real@3f800000, %xmm0, %xmm0
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRetFloat:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vaddss __real@{{.*}}(%rip), %xmm0, %xmm0
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRetFloat:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = fadd float 1.0, %a
|
|
ret float %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving float
|
|
define dso_local x86_regcallcc float @test_CallargRetFloat(float %a) {
|
|
; X32-LABEL: test_CallargRetFloat:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $28, %esp
|
|
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
|
|
; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
|
|
; X32-NEXT: calll _test_argRetFloat
|
|
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
|
|
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
|
|
; X32-NEXT: addl $28, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRetFloat:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $24, %rsp
|
|
; WIN64-NEXT: .seh_stackalloc 24
|
|
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
|
|
; WIN64-NEXT: .seh_savexmm %xmm8, 0
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
|
|
; WIN64-NEXT: vaddss %xmm0, %xmm8, %xmm0
|
|
; WIN64-NEXT: callq test_argRetFloat
|
|
; WIN64-NEXT: vaddss %xmm0, %xmm8, %xmm0
|
|
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
|
|
; WIN64-NEXT: addq $24, %rsp
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRetFloat:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: subq $24, %rsp
|
|
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
|
|
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
|
|
; LINUXOSX64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
|
|
; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0
|
|
; LINUXOSX64-NEXT: callq test_argRetFloat
|
|
; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0
|
|
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
|
|
; LINUXOSX64-NEXT: addq $24, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = fadd float 1.0, %a
|
|
%c = call x86_regcallcc float @test_argRetFloat(float %b)
|
|
%d = fadd float 1.0, %c
|
|
ret float %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning double
|
|
define dso_local x86_regcallcc double @test_argRetDouble(double %a) {
|
|
; X32-LABEL: test_argRetDouble:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: vaddsd __real@3ff0000000000000, %xmm0, %xmm0
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRetDouble:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRetDouble:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: retq
|
|
%add = fadd double %a, 1.0
|
|
ret double %add
|
|
}
|
|
|
|
; Test regcall when passing/retrieving double
|
|
define dso_local x86_regcallcc double @test_CallargRetDouble(double %a) {
|
|
; X32-LABEL: test_CallargRetDouble:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $28, %esp
|
|
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
|
|
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
|
|
; X32-NEXT: calll _test_argRetDouble
|
|
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
|
|
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
|
|
; X32-NEXT: addl $28, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRetDouble:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $24, %rsp
|
|
; WIN64-NEXT: .seh_stackalloc 24
|
|
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
|
|
; WIN64-NEXT: .seh_savexmm %xmm8, 0
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
|
|
; WIN64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
|
|
; WIN64-NEXT: callq test_argRetDouble
|
|
; WIN64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
|
|
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
|
|
; WIN64-NEXT: addq $24, %rsp
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRetDouble:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: subq $24, %rsp
|
|
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
|
|
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
|
|
; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
|
|
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
|
|
; LINUXOSX64-NEXT: callq test_argRetDouble
|
|
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
|
|
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
|
|
; LINUXOSX64-NEXT: addq $24, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = fadd double 1.0, %a
|
|
%c = call x86_regcallcc double @test_argRetDouble(double %b)
|
|
%d = fadd double 1.0, %c
|
|
ret double %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning long double
|
|
define dso_local x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
|
|
; X32-LABEL: test_argRetf80:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: fadd %st, %st(0)
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRetf80:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: fadd %st, %st(0)
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRetf80:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: fadd %st, %st(0)
|
|
; LINUXOSX64-NEXT: retq
|
|
%r0 = fadd x86_fp80 %a0, %a0
|
|
ret x86_fp80 %r0
|
|
}
|
|
|
|
; Test regcall when receiving/returning long double
|
|
define dso_local x86_regcallcc double @test_argParamf80(x86_fp80 %a0) nounwind {
|
|
; X32-LABEL: test_argParamf80:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-8, %esp
|
|
; X32-NEXT: subl $8, %esp
|
|
; X32-NEXT: fstpl (%esp)
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argParamf80:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: fstpl (%rsp)
|
|
; WIN64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; WIN64-NEXT: popq %rax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argParamf80:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: fstpl -{{[0-9]+}}(%rsp)
|
|
; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; LINUXOSX64-NEXT: retq
|
|
%r0 = fptrunc x86_fp80 %a0 to double
|
|
ret double %r0
|
|
}
|
|
|
|
; Test regcall when passing/retrieving long double
|
|
define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) {
|
|
; X32-LABEL: test_CallargRetf80:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: fadd %st, %st(0)
|
|
; X32-NEXT: calll _test_argRetf80
|
|
; X32-NEXT: fadd %st, %st(0)
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRetf80:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: fadd %st, %st(0)
|
|
; WIN64-NEXT: callq test_argRetf80
|
|
; WIN64-NEXT: fadd %st, %st(0)
|
|
; WIN64-NEXT: popq %rax
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRetf80:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: fadd %st, %st(0)
|
|
; LINUXOSX64-NEXT: callq test_argRetf80
|
|
; LINUXOSX64-NEXT: fadd %st, %st(0)
|
|
; LINUXOSX64-NEXT: popq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = fadd x86_fp80 %a, %a
|
|
%c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
|
|
%d = fadd x86_fp80 %c, %c
|
|
ret x86_fp80 %d
|
|
}
|
|
|
|
define dso_local x86_regcallcc double @test_CallargParamf80(x86_fp80 %a) {
|
|
; X32-LABEL: test_CallargParamf80:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: fadd %st, %st(0)
|
|
; X32-NEXT: calll _test_argParamf80
|
|
; X32-NEXT: vaddsd %xmm0, %xmm0, %xmm0
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargParamf80:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: fadd %st, %st(0)
|
|
; WIN64-NEXT: callq test_argParamf80
|
|
; WIN64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
|
|
; WIN64-NEXT: popq %rax
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargParamf80:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: fadd %st, %st(0)
|
|
; LINUXOSX64-NEXT: callq test_argParamf80
|
|
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: popq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = fadd x86_fp80 %a, %a
|
|
%c = call x86_regcallcc double @test_argParamf80(x86_fp80 %b)
|
|
%d = fadd double %c, %c
|
|
ret double %d
|
|
}
|
|
|
|
; Test regcall when receiving/returning pointer
|
|
define dso_local x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a) {
|
|
; X32-LABEL: test_argRetPointer:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRetPointer:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRetPointer:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = ptrtoint [4 x i32]* %a to i32
|
|
%c = add i32 %b, 1
|
|
%d = inttoptr i32 %c to [4 x i32]*
|
|
ret [4 x i32]* %d
|
|
}
|
|
|
|
; Test regcall when passing/retrieving pointer
|
|
define dso_local x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) {
|
|
; X32-LABEL: test_CallargRetPointer:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: calll _test_argRetPointer
|
|
; X32-NEXT: incl %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRetPointer:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rax
|
|
; WIN64-NEXT: .seh_stackalloc 8
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: callq test_argRetPointer
|
|
; WIN64-NEXT: incl %eax
|
|
; WIN64-NEXT: popq %rcx
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRetPointer:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rax
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: callq test_argRetPointer
|
|
; LINUXOSX64-NEXT: incl %eax
|
|
; LINUXOSX64-NEXT: popq %rcx
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = ptrtoint [4 x i32]* %a to i32
|
|
%c = add i32 %b, 1
|
|
%d = inttoptr i32 %c to [4 x i32]*
|
|
%e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
|
|
%f = ptrtoint [4 x i32]* %e to i32
|
|
%g = add i32 %f, 1
|
|
%h = inttoptr i32 %g to [4 x i32]*
|
|
ret [4 x i32]* %h
|
|
}
|
|
|
|
; Test regcall when receiving/returning 128 bit vector
|
|
define dso_local x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %b) {
|
|
; X32-LABEL: test_argRet128Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; X32-NEXT: vpmovd2m %xmm0, %k1
|
|
; X32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRet128Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; WIN64-NEXT: vpmovd2m %xmm0, %k1
|
|
; WIN64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRet128Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vpmovd2m %xmm0, %k1
|
|
; LINUXOSX64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
|
; LINUXOSX64-NEXT: retq
|
|
%d = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %d
|
|
}
|
|
|
|
; Test regcall when passing/retrieving 128 bit vector
|
|
define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a) {
|
|
; X32-LABEL: test_CallargRet128Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $44, %esp
|
|
; X32-NEXT: vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
|
|
; X32-NEXT: vmovdqa %xmm1, %xmm4
|
|
; X32-NEXT: vpslld $31, %xmm0, %xmm1
|
|
; X32-NEXT: vpmovd2m %xmm1, %k1
|
|
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
|
; X32-NEXT: vmovdqa %xmm4, %xmm1
|
|
; X32-NEXT: vmovdqa %xmm4, %xmm2
|
|
; X32-NEXT: calll _test_argRet128Vector
|
|
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
|
|
; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1}
|
|
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload
|
|
; X32-NEXT: addl $44, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRet128Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $40, %rsp
|
|
; WIN64-NEXT: .seh_stackalloc 40
|
|
; WIN64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; WIN64-NEXT: .seh_savexmm %xmm8, 16
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: vmovdqa %xmm1, %xmm8
|
|
; WIN64-NEXT: vpslld $31, %xmm0, %xmm1
|
|
; WIN64-NEXT: vpmovd2m %xmm1, %k1
|
|
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-NEXT: vmovdqa %xmm8, %xmm1
|
|
; WIN64-NEXT: vmovdqa %xmm8, %xmm2
|
|
; WIN64-NEXT: callq test_argRet128Vector
|
|
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
|
|
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
|
|
; WIN64-NEXT: addq $40, %rsp
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRet128Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: subq $40, %rsp
|
|
; LINUXOSX64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
|
|
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
|
|
; LINUXOSX64-NEXT: vmovdqa %xmm1, %xmm8
|
|
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm1
|
|
; LINUXOSX64-NEXT: vpmovd2m %xmm1, %k1
|
|
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1
|
|
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm2
|
|
; LINUXOSX64-NEXT: callq test_argRet128Vector
|
|
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
|
|
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
|
|
; LINUXOSX64-NEXT: addq $40, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a)
|
|
%c = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
|
|
ret <4 x i32> %c
|
|
}
|
|
|
|
; Test regcall when receiving/returning 256 bit vector
|
|
define dso_local x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %b) {
|
|
; X32-LABEL: test_argRet256Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: kmovd %eax, %k1
|
|
; X32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRet256Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: kmovd %eax, %k1
|
|
; WIN64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRet256Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: kmovd %eax, %k1
|
|
; LINUXOSX64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
|
; LINUXOSX64-NEXT: retq
|
|
%d = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
|
|
ret <8 x i32> %d
|
|
}
|
|
|
|
; Test regcall when passing/retrieving 256 bit vector
|
|
define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a) {
|
|
; X32-LABEL: test_CallargRet256Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $92, %esp
|
|
; X32-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
|
|
; X32-NEXT: kmovd %eax, %k1
|
|
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
|
; X32-NEXT: vmovdqa %ymm0, %ymm1
|
|
; X32-NEXT: calll _test_argRet256Vector
|
|
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
|
|
; X32-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload
|
|
; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
|
; X32-NEXT: addl $92, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRet256Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $88, %rsp
|
|
; WIN64-NEXT: .seh_stackalloc 88
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; WIN64-NEXT: kmovd %eax, %k1
|
|
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-NEXT: vmovdqa %ymm0, %ymm1
|
|
; WIN64-NEXT: callq test_argRet256Vector
|
|
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; WIN64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
|
|
; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
|
; WIN64-NEXT: addq $88, %rsp
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRet256Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: subq $88, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 96
|
|
; LINUXOSX64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; LINUXOSX64-NEXT: kmovd %eax, %k1
|
|
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; LINUXOSX64-NEXT: vmovdqa %ymm0, %ymm1
|
|
; LINUXOSX64-NEXT: callq test_argRet256Vector
|
|
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; LINUXOSX64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
|
|
; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
|
; LINUXOSX64-NEXT: addq $88, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a)
|
|
%c = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
|
|
ret <8 x i32> %c
|
|
}
|
|
|
|
; Test regcall when receiving/returning 512 bit vector
|
|
define dso_local x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %b) {
|
|
; X32-LABEL: test_argRet512Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: kmovd %eax, %k1
|
|
; X32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRet512Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: kmovd %eax, %k1
|
|
; WIN64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRet512Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: kmovd %eax, %k1
|
|
; LINUXOSX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
|
; LINUXOSX64-NEXT: retq
|
|
%d = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
|
|
ret <16 x i32> %d
|
|
}
|
|
|
|
; Test regcall when passing/retrieving 512 bit vector
|
|
define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a) {
|
|
; X32-LABEL: test_CallargRet512Vector:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $188, %esp
|
|
; X32-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
|
|
; X32-NEXT: kmovd %eax, %k1
|
|
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
|
|
; X32-NEXT: vmovdqa64 %zmm0, %zmm1
|
|
; X32-NEXT: calll _test_argRet512Vector
|
|
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
|
|
; X32-NEXT: vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload
|
|
; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
|
; X32-NEXT: addl $188, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_CallargRet512Vector:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: subq $184, %rsp
|
|
; WIN64-NEXT: .seh_stackalloc 184
|
|
; WIN64-NEXT: .seh_endprologue
|
|
; WIN64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; WIN64-NEXT: kmovd %eax, %k1
|
|
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; WIN64-NEXT: vmovdqa64 %zmm0, %zmm1
|
|
; WIN64-NEXT: callq test_argRet512Vector
|
|
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; WIN64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
|
|
; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
|
; WIN64-NEXT: addq $184, %rsp
|
|
; WIN64-NEXT: retq
|
|
; WIN64-NEXT: .seh_endproc
|
|
;
|
|
; LINUXOSX64-LABEL: test_CallargRet512Vector:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: subq $184, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 192
|
|
; LINUXOSX64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
|
; LINUXOSX64-NEXT: kmovd %eax, %k1
|
|
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
|
; LINUXOSX64-NEXT: vmovdqa64 %zmm0, %zmm1
|
|
; LINUXOSX64-NEXT: callq test_argRet512Vector
|
|
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
|
|
; LINUXOSX64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
|
|
; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
|
; LINUXOSX64-NEXT: addq $184, %rsp
|
|
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
|
|
; LINUXOSX64-NEXT: retq
|
|
%b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a)
|
|
%c = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
|
|
ret <16 x i32> %c
|
|
}
|
|
|
|
; Test regcall when running multiple input parameters - callee saved xmms
|
|
define dso_local x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
|
|
; X32-LABEL: testf32_inp:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: subl $44, %esp
|
|
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
|
|
; X32-NEXT: vmovups %xmm6, (%esp) # 16-byte Spill
|
|
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm6
|
|
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm7
|
|
; X32-NEXT: vmulps %zmm2, %zmm0, %zmm0
|
|
; X32-NEXT: vsubps %zmm0, %zmm6, %zmm0
|
|
; X32-NEXT: vmulps %zmm3, %zmm1, %zmm1
|
|
; X32-NEXT: vsubps %zmm1, %zmm7, %zmm1
|
|
; X32-NEXT: vaddps %zmm4, %zmm0, %zmm0
|
|
; X32-NEXT: vaddps %zmm5, %zmm1, %zmm1
|
|
; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload
|
|
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
|
|
; X32-NEXT: addl $44, %esp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf32_inp:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm6
|
|
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm7
|
|
; WIN64-NEXT: vmulps %zmm2, %zmm0, %zmm0
|
|
; WIN64-NEXT: vsubps %zmm0, %zmm6, %zmm0
|
|
; WIN64-NEXT: vmulps %zmm3, %zmm1, %zmm1
|
|
; WIN64-NEXT: vsubps %zmm1, %zmm7, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm4, %zmm0, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm5, %zmm1, %zmm1
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: testf32_inp:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm6
|
|
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm7
|
|
; LINUXOSX64-NEXT: vmulps %zmm2, %zmm0, %zmm0
|
|
; LINUXOSX64-NEXT: vsubps %zmm0, %zmm6, %zmm0
|
|
; LINUXOSX64-NEXT: vmulps %zmm3, %zmm1, %zmm1
|
|
; LINUXOSX64-NEXT: vsubps %zmm1, %zmm7, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm4, %zmm0, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm5, %zmm1, %zmm1
|
|
; LINUXOSX64-NEXT: retq
|
|
%x1 = fadd <32 x float> %a, %b
|
|
%x2 = fmul <32 x float> %a, %b
|
|
%x3 = fsub <32 x float> %x1, %x2
|
|
%x4 = fadd <32 x float> %x3, %c
|
|
ret <32 x float> %x4
|
|
}
|
|
|
|
; Test regcall when running multiple input parameters - callee saved GPRs
|
|
define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
|
|
; X32-LABEL: testi32_inp:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: subl $20, %esp
|
|
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
|
|
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; X32-NEXT: movl %eax, %ebx
|
|
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; X32-NEXT: subl %ecx, %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: movl %esi, %ebp
|
|
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
|
|
; X32-NEXT: imull %ebp, %ebx
|
|
; X32-NEXT: movl %edx, %ebp
|
|
; X32-NEXT: subl %edi, %ebp
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: movl %edx, %ecx
|
|
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: imull %ebp, %ecx
|
|
; X32-NEXT: addl %ecx, %ebx
|
|
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
|
|
; X32-NEXT: movl %edi, %ebp
|
|
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl %ecx, %eax
|
|
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: imull %ebp, %eax
|
|
; X32-NEXT: addl %eax, %ebx
|
|
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
|
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
|
|
; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
|
|
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
|
|
; X32-NEXT: addl {{[0-9]+}}(%esp), %edi
|
|
; X32-NEXT: addl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: imull %eax, %esi
|
|
; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: imull %ebp, %edx
|
|
; X32-NEXT: addl %esi, %edx
|
|
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: imull %edi, %ecx
|
|
; X32-NEXT: addl %edx, %ecx
|
|
; X32-NEXT: addl %ecx, %ebx
|
|
; X32-NEXT: movl %ebx, %eax
|
|
; X32-NEXT: addl $20, %esp
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testi32_inp:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %r13
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: pushq %rbx
|
|
; WIN64-NEXT: movl %eax, %r13d
|
|
; WIN64-NEXT: subl %ecx, %eax
|
|
; WIN64-NEXT: movl %edx, %ebp
|
|
; WIN64-NEXT: subl %edi, %ebp
|
|
; WIN64-NEXT: movl %r9d, %ebx
|
|
; WIN64-NEXT: subl %r10d, %ebx
|
|
; WIN64-NEXT: imull %ebx, %eax
|
|
; WIN64-NEXT: movl %r11d, %ebx
|
|
; WIN64-NEXT: subl %r12d, %ebx
|
|
; WIN64-NEXT: imull %ebp, %ebx
|
|
; WIN64-NEXT: movl %esi, %ebp
|
|
; WIN64-NEXT: subl %r8d, %ebp
|
|
; WIN64-NEXT: addl %ebx, %eax
|
|
; WIN64-NEXT: movl %r14d, %ebx
|
|
; WIN64-NEXT: subl %r15d, %ebx
|
|
; WIN64-NEXT: imull %ebp, %ebx
|
|
; WIN64-NEXT: addl %ebx, %eax
|
|
; WIN64-NEXT: addl %ecx, %r13d
|
|
; WIN64-NEXT: addl %edi, %edx
|
|
; WIN64-NEXT: addl %r8d, %esi
|
|
; WIN64-NEXT: addl %r10d, %r9d
|
|
; WIN64-NEXT: imull %r13d, %r9d
|
|
; WIN64-NEXT: addl %r12d, %r11d
|
|
; WIN64-NEXT: imull %edx, %r11d
|
|
; WIN64-NEXT: addl %r9d, %r11d
|
|
; WIN64-NEXT: addl %r15d, %r14d
|
|
; WIN64-NEXT: imull %esi, %r14d
|
|
; WIN64-NEXT: addl %r11d, %r14d
|
|
; WIN64-NEXT: addl %r14d, %eax
|
|
; WIN64-NEXT: popq %rbx
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: popq %r13
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: testi32_inp:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rbp
|
|
; LINUXOSX64-NEXT: pushq %rbx
|
|
; LINUXOSX64-NEXT: movl %eax, %r10d
|
|
; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r11d
|
|
; LINUXOSX64-NEXT: subl %ecx, %eax
|
|
; LINUXOSX64-NEXT: movl %edx, %ebx
|
|
; LINUXOSX64-NEXT: subl %edi, %ebx
|
|
; LINUXOSX64-NEXT: movl %r9d, %ebp
|
|
; LINUXOSX64-NEXT: subl %r12d, %ebp
|
|
; LINUXOSX64-NEXT: imull %ebp, %eax
|
|
; LINUXOSX64-NEXT: movl %r13d, %ebp
|
|
; LINUXOSX64-NEXT: subl %r14d, %ebp
|
|
; LINUXOSX64-NEXT: imull %ebx, %ebp
|
|
; LINUXOSX64-NEXT: movl %esi, %ebx
|
|
; LINUXOSX64-NEXT: subl %r8d, %ebx
|
|
; LINUXOSX64-NEXT: addl %ebp, %eax
|
|
; LINUXOSX64-NEXT: movl %r15d, %ebp
|
|
; LINUXOSX64-NEXT: subl %r11d, %ebp
|
|
; LINUXOSX64-NEXT: imull %ebx, %ebp
|
|
; LINUXOSX64-NEXT: addl %ebp, %eax
|
|
; LINUXOSX64-NEXT: addl %ecx, %r10d
|
|
; LINUXOSX64-NEXT: addl %edi, %edx
|
|
; LINUXOSX64-NEXT: addl %r8d, %esi
|
|
; LINUXOSX64-NEXT: addl %r12d, %r9d
|
|
; LINUXOSX64-NEXT: imull %r10d, %r9d
|
|
; LINUXOSX64-NEXT: addl %r14d, %r13d
|
|
; LINUXOSX64-NEXT: imull %edx, %r13d
|
|
; LINUXOSX64-NEXT: addl %r9d, %r13d
|
|
; LINUXOSX64-NEXT: addl %r11d, %r15d
|
|
; LINUXOSX64-NEXT: imull %esi, %r15d
|
|
; LINUXOSX64-NEXT: addl %r13d, %r15d
|
|
; LINUXOSX64-NEXT: addl %r15d, %eax
|
|
; LINUXOSX64-NEXT: popq %rbx
|
|
; LINUXOSX64-NEXT: popq %rbp
|
|
; LINUXOSX64-NEXT: retq
|
|
%x1 = sub i32 %a1, %a2
|
|
%x2 = sub i32 %a3, %a4
|
|
%x3 = sub i32 %a5, %a6
|
|
%y1 = sub i32 %b1, %b2
|
|
%y2 = sub i32 %b3, %b4
|
|
%y3 = sub i32 %b5, %b6
|
|
%v1 = add i32 %a1, %a2
|
|
%v2 = add i32 %a3, %a4
|
|
%v3 = add i32 %a5, %a6
|
|
%w1 = add i32 %b1, %b2
|
|
%w2 = add i32 %b3, %b4
|
|
%w3 = add i32 %b5, %b6
|
|
%s1 = mul i32 %x1, %y1
|
|
%s2 = mul i32 %x2, %y2
|
|
%s3 = mul i32 %x3, %y3
|
|
%t1 = mul i32 %v1, %w1
|
|
%t2 = mul i32 %v2, %w2
|
|
%t3 = mul i32 %v3, %w3
|
|
%m1 = add i32 %s1, %s2
|
|
%m2 = add i32 %m1, %s3
|
|
%n1 = add i32 %t1, %t2
|
|
%n2 = add i32 %n1, %t3
|
|
%r1 = add i32 %m2, %n2
|
|
ret i32 %r1
|
|
}
|
|
|
|
; Test that parameters, overflowing register capacity, are passed through the stack
|
|
define dso_local x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
|
|
; X32-LABEL: testf32_stack:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: movl %esp, %ebp
|
|
; X32-NEXT: andl $-64, %esp
|
|
; X32-NEXT: subl $64, %esp
|
|
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm1
|
|
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm0
|
|
; X32-NEXT: vaddps %zmm0, %zmm4, %zmm0
|
|
; X32-NEXT: vaddps %zmm1, %zmm5, %zmm1
|
|
; X32-NEXT: vaddps %zmm1, %zmm7, %zmm1
|
|
; X32-NEXT: vaddps %zmm0, %zmm6, %zmm0
|
|
; X32-NEXT: vaddps 8(%ebp), %zmm0, %zmm0
|
|
; X32-NEXT: vaddps 72(%ebp), %zmm1, %zmm1
|
|
; X32-NEXT: vaddps 200(%ebp), %zmm1, %zmm1
|
|
; X32-NEXT: vaddps 136(%ebp), %zmm0, %zmm0
|
|
; X32-NEXT: vaddps 264(%ebp), %zmm0, %zmm0
|
|
; X32-NEXT: vaddps 328(%ebp), %zmm1, %zmm1
|
|
; X32-NEXT: vaddps 456(%ebp), %zmm1, %zmm1
|
|
; X32-NEXT: vaddps 392(%ebp), %zmm0, %zmm0
|
|
; X32-NEXT: vaddps 520(%ebp), %zmm0, %zmm0
|
|
; X32-NEXT: vaddps 584(%ebp), %zmm1, %zmm1
|
|
; X32-NEXT: movl %ebp, %esp
|
|
; X32-NEXT: popl %ebp
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: testf32_stack:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: pushq %rbp
|
|
; WIN64-NEXT: subq $48, %rsp
|
|
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
|
; WIN64-NEXT: andq $-64, %rsp
|
|
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm4, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm5, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm7, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm6, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm8, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm9, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm11, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm10, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm12, %zmm0
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm13, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm1, %zmm15, %zmm1
|
|
; WIN64-NEXT: vaddps %zmm0, %zmm14, %zmm0
|
|
; WIN64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
|
|
; WIN64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
|
|
; WIN64-NEXT: movq %rbp, %rsp
|
|
; WIN64-NEXT: popq %rbp
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: testf32_stack:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: pushq %rbp
|
|
; LINUXOSX64-NEXT: movq %rsp, %rbp
|
|
; LINUXOSX64-NEXT: andq $-64, %rsp
|
|
; LINUXOSX64-NEXT: subq $64, %rsp
|
|
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm4, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm5, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm7, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm6, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm8, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm9, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm11, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm10, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm12, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm13, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm15, %zmm1
|
|
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm14, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
|
|
; LINUXOSX64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
|
|
; LINUXOSX64-NEXT: movq %rbp, %rsp
|
|
; LINUXOSX64-NEXT: popq %rbp
|
|
; LINUXOSX64-NEXT: retq
|
|
%x1 = fadd <32 x float> %a0, %b0
|
|
%x2 = fadd <32 x float> %c0, %x1
|
|
%x3 = fadd <32 x float> %a1, %x2
|
|
%x4 = fadd <32 x float> %b1, %x3
|
|
%x5 = fadd <32 x float> %c1, %x4
|
|
%x6 = fadd <32 x float> %a2, %x5
|
|
%x7 = fadd <32 x float> %b2, %x6
|
|
%x8 = fadd <32 x float> %c2, %x7
|
|
ret <32 x float> %x8
|
|
}
|
|
|
|
; Test regcall when passing/retrieving mixed types
|
|
define dso_local x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
|
|
; X32-LABEL: test_argRetMixTypes:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
|
; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
|
; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0
|
|
; X32-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
|
|
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; X32-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
|
|
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; X32-NEXT: vmovd %edx, %xmm1
|
|
; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
|
|
; X32-NEXT: vcvtqq2pd %ymm1, %ymm1
|
|
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; X32-NEXT: vcvtsi2sd %esi, %xmm2, %xmm1
|
|
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1
|
|
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; X32-NEXT: vcvttsd2si %xmm0, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: vzeroupper
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argRetMixTypes:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
|
|
; WIN64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; WIN64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; WIN64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; WIN64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
|
|
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; WIN64-NEXT: vcvttsd2si %xmm0, %eax
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argRetMixTypes:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
|
|
; LINUXOSX64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
|
|
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax
|
|
; LINUXOSX64-NEXT: retq
|
|
%8 = fpext float %1 to double
|
|
%9 = fadd double %8, %0
|
|
%10 = sitofp i8 %2 to double
|
|
%11 = fadd double %9, %10
|
|
%12 = sitofp i32 %3 to double
|
|
%13 = fadd double %11, %12
|
|
%14 = sitofp i64 %4 to double
|
|
%15 = fadd double %13, %14
|
|
%16 = sitofp i16 %5 to double
|
|
%17 = fadd double %15, %16
|
|
%18 = load i32, i32* %6, align 4
|
|
%19 = sitofp i32 %18 to double
|
|
%20 = fadd double %17, %19
|
|
%21 = fptosi double %20 to i32
|
|
ret i32 %21
|
|
}
|
|
|
|
%struct.complex = type { float, double, i32, i8, i64}
|
|
|
|
define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
|
|
; X32-LABEL: test_argMultiRet:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: vaddsd __real@4014000000000000, %xmm1, %xmm1
|
|
; X32-NEXT: movl $4, %eax
|
|
; X32-NEXT: movb $7, %cl
|
|
; X32-NEXT: movl $999, %edx # imm = 0x3E7
|
|
; X32-NEXT: xorl %edi, %edi
|
|
; X32-NEXT: retl
|
|
;
|
|
; WIN64-LABEL: test_argMultiRet:
|
|
; WIN64: # %bb.0:
|
|
; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1
|
|
; WIN64-NEXT: movl $999, %edx # imm = 0x3E7
|
|
; WIN64-NEXT: movl $4, %eax
|
|
; WIN64-NEXT: movb $7, %cl
|
|
; WIN64-NEXT: retq
|
|
;
|
|
; LINUXOSX64-LABEL: test_argMultiRet:
|
|
; LINUXOSX64: # %bb.0:
|
|
; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
|
|
; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7
|
|
; LINUXOSX64-NEXT: movl $4, %eax
|
|
; LINUXOSX64-NEXT: movb $7, %cl
|
|
; LINUXOSX64-NEXT: retq
|
|
%6 = fadd double %1, 5.000000e+00
|
|
%7 = insertvalue %struct.complex undef, float %0, 0
|
|
%8 = insertvalue %struct.complex %7, double %6, 1
|
|
%9 = insertvalue %struct.complex %8, i32 4, 2
|
|
%10 = insertvalue %struct.complex %9, i8 7, 3
|
|
%11 = insertvalue %struct.complex %10, i64 999, 4
|
|
ret %struct.complex %11
|
|
}
|