; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) define double @fadd_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fadd_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: addsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fadd_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fadd_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fadd_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fadd_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: faddl {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %ret } define float @fadd_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-LABEL: fadd_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fadd_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fadd_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fadd_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fadd_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %ret } define double @fsub_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fsub_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: subsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fsub_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: subsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fsub_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fsub_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fsub_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fsubl {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %ret } define float @fsub_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-LABEL: fsub_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fsub_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: subss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fsub_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fsub_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fsub_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fsubs {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %ret } define double @fmul_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fmul_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fmul_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: mulsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fmul_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fmul_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fmul_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fmull {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %ret } define float @fmul_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-LABEL: fmul_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fmul_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: mulss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fmul_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fmul_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fmul_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fmuls {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %ret } define double @fdiv_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fdiv_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: divsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fdiv_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: divsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fdiv_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fdiv_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fdiv_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fdivl {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %ret } define float @fdiv_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-LABEL: fdiv_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: divss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fdiv_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: divss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fdiv_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fdiv_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fdiv_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fdivs {{[0-9]+}}(%esp) ; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %ret } define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp { ; SSE-X86-LABEL: fpext_f32_to_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: cvtss2sd %xmm0, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%eax) ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fpext_f32_to_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X64-NEXT: cvtss2sd %xmm0, %xmm0 ; SSE-X64-NEXT: movsd %xmm0, (%rsi) ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fpext_f32_to_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%eax) ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fpext_f32_to_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX-X64-NEXT: vmovsd %xmm0, (%rsi) ; AVX-X64-NEXT: retq ; ; X87-LABEL: fpext_f32_to_f64: ; X87: # %bb.0: ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X87-NEXT: flds (%ecx) ; X87-NEXT: fstpl (%eax) ; X87-NEXT: wait ; X87-NEXT: retl %1 = load float, float* %val, align 4 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, metadata !"fpexcept.strict") #0 store double %res, double* %ret, align 8 ret void } define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp { ; SSE-X86-LABEL: fptrunc_double_to_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%eax) ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fptrunc_double_to_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X64-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE-X64-NEXT: movss %xmm0, (%rsi) ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fptrunc_double_to_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%eax) ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fptrunc_double_to_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X64-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX-X64-NEXT: vmovss %xmm0, (%rsi) ; AVX-X64-NEXT: retq ; ; X87-LABEL: fptrunc_double_to_f32: ; X87: # %bb.0: ; X87-NEXT: pushl %eax ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X87-NEXT: fldl (%ecx) ; X87-NEXT: fstps (%esp) ; X87-NEXT: flds (%esp) ; X87-NEXT: fstps (%eax) ; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: retl %1 = load double, double* %val, align 8 %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 store float %res, float* %ret, align 4 ret void } define void @fsqrt_f64(double* %a) nounwind strictfp { ; SSE-X86-LABEL: fsqrt_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: sqrtsd %xmm0, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%eax) ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fsqrt_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X64-NEXT: sqrtsd %xmm0, %xmm0 ; SSE-X64-NEXT: movsd %xmm0, (%rdi) ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fsqrt_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%eax) ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fsqrt_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX-X64-NEXT: vmovsd %xmm0, (%rdi) ; AVX-X64-NEXT: retq ; ; X87-LABEL: fsqrt_f64: ; X87: # %bb.0: ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: fldl (%eax) ; X87-NEXT: fsqrt ; X87-NEXT: fstpl (%eax) ; X87-NEXT: wait ; X87-NEXT: retl %1 = load double, double* %a, align 8 %res = call double @llvm.experimental.constrained.sqrt.f64(double %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 store double %res, double* %a, align 8 ret void } define void @fsqrt_f32(float* %a) nounwind strictfp { ; SSE-X86-LABEL: fsqrt_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: sqrtss %xmm0, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%eax) ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fsqrt_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X64-NEXT: sqrtss %xmm0, %xmm0 ; SSE-X64-NEXT: movss %xmm0, (%rdi) ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fsqrt_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%eax) ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fsqrt_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX-X64-NEXT: vmovss %xmm0, (%rdi) ; AVX-X64-NEXT: retq ; ; X87-LABEL: fsqrt_f32: ; X87: # %bb.0: ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: flds (%eax) ; X87-NEXT: fsqrt ; X87-NEXT: fstps (%eax) ; X87-NEXT: wait ; X87-NEXT: retl %1 = load float, float* %a, align 4 %res = call float @llvm.experimental.constrained.sqrt.f32(float %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 store float %res, float* %a, align 4 ret void } define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { ; SSE-X86-LABEL: fma_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: subl $24, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-X86-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: calll fma ; SSE-X86-NEXT: addl $24, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fma_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: pushq %rax ; SSE-X64-NEXT: callq fma ; SSE-X64-NEXT: popq %rax ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fma_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem ; AVX-X86-NEXT: vmovsd %xmm1, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fma_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fma_f64: ; X87: # %bb.0: ; X87-NEXT: subl $24, %esp ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) ; X87-NEXT: wait ; X87-NEXT: calll fma ; X87-NEXT: addl $24, %esp ; X87-NEXT: retl %res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %res } define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { ; SSE-X86-LABEL: fma_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: subl $12, %esp ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: calll fmaf ; SSE-X86-NEXT: addl $12, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fma_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: pushq %rax ; SSE-X64-NEXT: callq fmaf ; SSE-X64-NEXT: popq %rax ; SSE-X64-NEXT: retq ; ; AVX-X86-LABEL: fma_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem ; AVX-X86-NEXT: vmovss %xmm1, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fma_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX-X64-NEXT: retq ; ; X87-LABEL: fma_f32: ; X87: # %bb.0: ; X87-NEXT: subl $12, %esp ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: fstps (%esp) ; X87-NEXT: wait ; X87-NEXT: calll fmaf ; X87-NEXT: addl $12, %esp ; X87-NEXT: retl %res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %res } attributes #0 = { strictfp }