171 lines
5.9 KiB
LLVM
171 lines
5.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
|
|
|
|
define float @v_rsq_clamp_f32(float %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, v0
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_fabs_f32(float %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_fabs_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0|
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_fabs_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e64 v0, |v0|
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.src = call float @llvm.fabs.f32(float %src)
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_f64(double %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_fabs_f64(double %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_fabs_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_fabs_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.src = call double @llvm.fabs.f64(double %src)
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_undef_f32() #0 {
|
|
; SI-LABEL: v_rsq_clamp_undef_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_undef_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, s4
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_undef_f64() #0 {
|
|
; SI-LABEL: v_rsq_clamp_undef_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_undef_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
|
|
; SI-LABEL: v_rsq_clamp_f32_non_ieee:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f32_non_ieee:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, v0
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
|
|
; SI-LABEL: v_rsq_clamp_f64_non_ieee:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f64_non_ieee:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float) #1
|
|
declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
|
|
declare double @llvm.fabs.f64(double) #1
|
|
declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone }
|
|
attributes #2 = { nounwind "amdgpu-ieee"="false" }
|