; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s define float @v_rsq_clamp_f32(float %src) #0 { ; SI-LABEL: v_rsq_clamp_f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f32_e32 v0, v0 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) ret float %rsq_clamp } define float @v_rsq_clamp_fabs_f32(float %src) #0 { ; SI-LABEL: v_rsq_clamp_fabs_f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0| ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_fabs_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f32_e64 v0, |v0| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %fabs.src = call float @llvm.fabs.f32(float %src) %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src) ret float %rsq_clamp } define double @v_rsq_clamp_f64(double %src) #0 { ; SI-LABEL: v_rsq_clamp_f64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_f64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] ; VI-NEXT: s_mov_b32 s4, -1 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s5, 0xffefffff ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) ret double %rsq_clamp } define double @v_rsq_clamp_fabs_f64(double %src) #0 { ; SI-LABEL: v_rsq_clamp_fabs_f64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]| ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_fabs_f64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| ; VI-NEXT: s_mov_b32 s4, -1 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s5, 0xffefffff ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_setpc_b64 s[30:31] %fabs.src = call double @llvm.fabs.f64(double %src) %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src) ret double %rsq_clamp } define float @v_rsq_clamp_undef_f32() #0 { ; SI-LABEL: v_rsq_clamp_undef_f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_undef_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f32_e32 v0, s4 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) ret float %rsq_clamp } define double @v_rsq_clamp_undef_f64() #0 { ; SI-LABEL: v_rsq_clamp_undef_f64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5] ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_undef_f64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s4, -1 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s5, 0xffefffff ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef) ret double %rsq_clamp } define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { ; SI-LABEL: v_rsq_clamp_f32_non_ieee: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_f32_non_ieee: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f32_e32 v0, v0 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) ret float %rsq_clamp } define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { ; SI-LABEL: v_rsq_clamp_f64_non_ieee: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_rsq_clamp_f64_non_ieee: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] ; VI-NEXT: s_mov_b32 s4, -1 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s5, 0xffefffff ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_setpc_b64 s[30:31] %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) ret double %rsq_clamp } declare float @llvm.fabs.f32(float) #1 declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 declare double @llvm.fabs.f64(double) #1 declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "amdgpu-ieee"="false" }