; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f32_const_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s2, 1 ; GCN-NEXT: s_cselect_b32 s0, 2.0, 1.0 ; GCN-NEXT: s_cmp_eq_u32 s2, 2 ; GCN-NEXT: s_cselect_b32 s0, 0x40400000, s0 ; GCN-NEXT: s_cmp_eq_u32 s2, 3 ; GCN-NEXT: s_cselect_b32 s0, 4.0, s0 ; GCN-NEXT: s_cmp_eq_u32 s2, 4 ; GCN-NEXT: s_cselect_b32 s0, 0x40a00000, s0 ; GCN-NEXT: s_cmp_eq_u32 s2, 5 ; GCN-NEXT: s_cselect_b32 s0, 0x40c00000, s0 ; GCN-NEXT: s_cmp_eq_u32 s2, 6 ; GCN-NEXT: s_cselect_b32 s0, 0x40e00000, s0 ; GCN-NEXT: s_cmp_eq_u32 s2, 7 ; GCN-NEXT: s_cselect_b32 s0, 0x41000000, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f32_v_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s10, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s10, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 6 ; GCN-NEXT: s_cselect_b32 s0, s8, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 7 ; GCN-NEXT: s_cselect_b32 s0, s9, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8i64_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b64 s[4:5], 1 ; GCN-NEXT: s_mov_b64 s[6:7], 2 ; GCN-NEXT: s_mov_b64 s[8:9], 3 ; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: v_mov_b32_e32 v2, s5 ; GCN-NEXT: v_mov_b32_e32 v3, s6 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s7 ; GCN-NEXT: s_mov_b64 s[10:11], 4 ; GCN-NEXT: v_mov_b32_e32 v5, s8 ; GCN-NEXT: v_mov_b32_e32 v6, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b64 s[12:13], 5 ; GCN-NEXT: v_mov_b32_e32 v7, s10 ; GCN-NEXT: v_mov_b32_e32 v8, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: s_mov_b64 s[14:15], 6 ; GCN-NEXT: v_mov_b32_e32 v9, s12 ; GCN-NEXT: v_mov_b32_e32 v10, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: s_mov_b64 s[16:17], 7 ; GCN-NEXT: v_mov_b32_e32 v11, s14 ; GCN-NEXT: v_mov_b32_e32 v12, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: s_mov_b64 s[18:19], 8 ; GCN-NEXT: v_mov_b32_e32 v13, s16 ; GCN-NEXT: v_mov_b32_e32 v14, s17 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v15, s18 ; GCN-NEXT: v_mov_b32_e32 v16, s19 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b32 m0, s2 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> , i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: v_mov_b32_e32 v1, s0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s1 ; MOVREL-NEXT: v_mov_b32_e32 v3, s2 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; MOVREL-NEXT: v_mov_b32_e32 v4, s3 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: v_mov_b32_e32 v5, s4 ; MOVREL-NEXT: v_mov_b32_e32 v6, s5 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: v_mov_b32_e32 v7, s6 ; MOVREL-NEXT: v_mov_b32_e32 v8, s7 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: v_mov_b32_e32 v9, s8 ; MOVREL-NEXT: v_mov_b32_e32 v10, s9 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; MOVREL-NEXT: v_mov_b32_e32 v11, s10 ; MOVREL-NEXT: v_mov_b32_e32 v12, s11 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; MOVREL-NEXT: v_mov_b32_e32 v13, s14 ; MOVREL-NEXT: v_mov_b32_e32 v14, s15 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; MOVREL-NEXT: v_mov_b32_e32 v15, s16 ; MOVREL-NEXT: v_mov_b32_e32 v16, s17 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8i64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> %vec, i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, i64 addrspace(1)* undef ret void } define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f32_s_s_offset3: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_add_i32 s10, s10, 3 ; GCN-NEXT: s_cmp_eq_u32 s10, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s10, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 6 ; GCN-NEXT: s_cselect_b32 s0, s8, s0 ; GCN-NEXT: s_cmp_eq_u32 s10, 7 ; GCN-NEXT: s_cselect_b32 s0, s9, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 2 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 4 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 5 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 6 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] ; MOVREL-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 7 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_add_i32 m0, s18, -1 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, -1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p3_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx ret i8 addrspace(3)* %ext } define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ds_write_b32 v0, v0 ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p3_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_cmp_eq_u32 s10, 1 ; MOVREL-NEXT: s_cselect_b32 s0, s3, s2 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 2 ; MOVREL-NEXT: s_cselect_b32 s0, s4, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 3 ; MOVREL-NEXT: s_cselect_b32 s0, s5, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 4 ; MOVREL-NEXT: s_cselect_b32 s0, s6, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 5 ; MOVREL-NEXT: s_cselect_b32 s0, s7, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 6 ; MOVREL-NEXT: s_cselect_b32 s0, s8, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s10, 7 ; MOVREL-NEXT: s_cselect_b32 s0, s9, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: s_mov_b32 m0, -1 ; MOVREL-NEXT: ds_write_b32 v0, v0 ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef ret void } define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p1_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx ret i8 addrspace(1)* %ext } define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p1_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef ret void } define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v16f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v16f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v32f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v32f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <32 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v16f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v16f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v16f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s4, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s19, 0x41800000 ; GCN-NEXT: s_mov_b32 s18, 0x41700000 ; GCN-NEXT: s_mov_b32 s17, 0x41600000 ; GCN-NEXT: s_mov_b32 s16, 0x41500000 ; GCN-NEXT: s_mov_b32 s15, 0x41400000 ; GCN-NEXT: s_mov_b32 s14, 0x41300000 ; GCN-NEXT: s_mov_b32 s13, 0x41200000 ; GCN-NEXT: s_mov_b32 s12, 0x41100000 ; GCN-NEXT: s_mov_b32 s11, 0x41000000 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000 ; GCN-NEXT: s_mov_b32 s7, 4.0 ; GCN-NEXT: s_mov_b32 s6, 0x40400000 ; GCN-NEXT: s_mov_b32 s5, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s4 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v32f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s36, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s67, 0x42000000 ; GCN-NEXT: s_mov_b32 s66, 0x41f80000 ; GCN-NEXT: s_mov_b32 s65, 0x41f00000 ; GCN-NEXT: s_mov_b32 s64, 0x41e80000 ; GCN-NEXT: s_mov_b32 s63, 0x41e00000 ; GCN-NEXT: s_mov_b32 s62, 0x41d80000 ; GCN-NEXT: s_mov_b32 s61, 0x41d00000 ; GCN-NEXT: s_mov_b32 s60, 0x41c80000 ; GCN-NEXT: s_mov_b32 s59, 0x41c00000 ; GCN-NEXT: s_mov_b32 s58, 0x41b80000 ; GCN-NEXT: s_mov_b32 s57, 0x41b00000 ; GCN-NEXT: s_mov_b32 s56, 0x41a80000 ; GCN-NEXT: s_mov_b32 s55, 0x41a00000 ; GCN-NEXT: s_mov_b32 s54, 0x41980000 ; GCN-NEXT: s_mov_b32 s53, 0x41900000 ; GCN-NEXT: s_mov_b32 s52, 0x41880000 ; GCN-NEXT: s_mov_b32 s51, 0x41800000 ; GCN-NEXT: s_mov_b32 s50, 0x41700000 ; GCN-NEXT: s_mov_b32 s49, 0x41600000 ; GCN-NEXT: s_mov_b32 s48, 0x41500000 ; GCN-NEXT: s_mov_b32 s47, 0x41400000 ; GCN-NEXT: s_mov_b32 s46, 0x41300000 ; GCN-NEXT: s_mov_b32 s45, 0x41200000 ; GCN-NEXT: s_mov_b32 s44, 0x41100000 ; GCN-NEXT: s_mov_b32 s43, 0x41000000 ; GCN-NEXT: s_mov_b32 s42, 0x40e00000 ; GCN-NEXT: s_mov_b32 s41, 0x40c00000 ; GCN-NEXT: s_mov_b32 s40, 0x40a00000 ; GCN-NEXT: s_mov_b32 s39, 4.0 ; GCN-NEXT: s_mov_b32 s38, 0x40400000 ; GCN-NEXT: s_mov_b32 s37, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s36 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <32 x float> , i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v16f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s66, 0 ; GCN-NEXT: s_mov_b64 s[36:37], 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s67, 0x40300000 ; GCN-NEXT: s_mov_b32 s65, 0x402e0000 ; GCN-NEXT: s_mov_b32 s64, s66 ; GCN-NEXT: s_mov_b32 s63, 0x402c0000 ; GCN-NEXT: s_mov_b32 s62, s66 ; GCN-NEXT: s_mov_b32 s61, 0x402a0000 ; GCN-NEXT: s_mov_b32 s60, s66 ; GCN-NEXT: s_mov_b32 s59, 0x40280000 ; GCN-NEXT: s_mov_b32 s58, s66 ; GCN-NEXT: s_mov_b32 s57, 0x40260000 ; GCN-NEXT: s_mov_b32 s56, s66 ; GCN-NEXT: s_mov_b32 s55, 0x40240000 ; GCN-NEXT: s_mov_b32 s54, s66 ; GCN-NEXT: s_mov_b32 s53, 0x40220000 ; GCN-NEXT: s_mov_b32 s52, s66 ; GCN-NEXT: s_mov_b32 s51, 0x40200000 ; GCN-NEXT: s_mov_b32 s50, s66 ; GCN-NEXT: s_mov_b32 s49, 0x401c0000 ; GCN-NEXT: s_mov_b32 s48, s66 ; GCN-NEXT: s_mov_b32 s47, 0x40180000 ; GCN-NEXT: s_mov_b32 s46, s66 ; GCN-NEXT: s_mov_b32 s45, 0x40140000 ; GCN-NEXT: s_mov_b32 s44, s66 ; GCN-NEXT: s_mov_b64 s[42:43], 4.0 ; GCN-NEXT: s_mov_b32 s41, 0x40080000 ; GCN-NEXT: s_mov_b32 s40, s66 ; GCN-NEXT: s_mov_b64 s[38:39], 2.0 ; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext } define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f32_v_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s8, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s8, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f32_v_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s9, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s9, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 6 ; GCN-NEXT: s_cselect_b32 s0, s8, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f64_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s3 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_mov_b32_e32 v6, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s6 ; GCN-NEXT: v_mov_b32_e32 v8, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v6f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v6f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 m0, s14 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f64_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s3 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_mov_b32_e32 v6, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: v_mov_b32_e32 v7, s6 ; GCN-NEXT: v_mov_b32_e32 v8, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v9, s8 ; GCN-NEXT: v_mov_b32_e32 v10, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_mov_b32_e32 v14, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v13, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v14, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v7f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v7f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 m0, s16 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v5f64_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 9 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 ; GPRIDX-NEXT: s_mov_b32 s0, 0 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s2, s0 ; GPRIDX-NEXT: s_mov_b32 s1, 0x40140000 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 ; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v5f64_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 28 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 9 ; MOVREL-NEXT: workitem_vgpr_count = 4 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 ; MOVREL-NEXT: s_mov_b32 s0, 0 ; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 ; MOVREL-NEXT: s_mov_b32 s2, s0 ; MOVREL-NEXT: s_mov_b32 s1, 0x40140000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 ; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 ; MOVREL-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s6 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: v_mov_b32_e32 v3, s7 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <5 x double> , i32 %sel store double %ext, double addrspace(1)* %out ret void } define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v12, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_const_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s4, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s18, 0x41700000 ; GCN-NEXT: s_mov_b32 s17, 0x41600000 ; GCN-NEXT: s_mov_b32 s16, 0x41500000 ; GCN-NEXT: s_mov_b32 s15, 0x41400000 ; GCN-NEXT: s_mov_b32 s14, 0x41300000 ; GCN-NEXT: s_mov_b32 s13, 0x41200000 ; GCN-NEXT: s_mov_b32 s12, 0x41100000 ; GCN-NEXT: s_mov_b32 s11, 0x41000000 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000 ; GCN-NEXT: s_mov_b32 s7, 4.0 ; GCN-NEXT: s_mov_b32 s6, 0x40400000 ; GCN-NEXT: s_mov_b32 s5, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s4 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_mov_b32_e32 v6, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_mov_b32_e32 v7, s6 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_mov_b32_e32 v8, s7 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_mov_b32_e32 v9, s8 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_mov_b32_e32 v10, s9 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_mov_b32_e32 v11, s10 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc ; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_mov_b32_e32 v14, s15 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc ; GCN-NEXT: v_mov_b32_e32 v15, s16 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v15f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v15f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 m0, s17 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_movrels_b32 s0, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 m0, s17 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_movrels_b32 s0, s3 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <15 x float> %vec, i32 %add ret float %ext } define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <15 x float> %vec, i32 %add ret float %ext } define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 6 ; GPRIDX-NEXT: workitem_vgpr_count = 2 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 ; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 ; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 ; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 ; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 ; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 28 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 6 ; MOVREL-NEXT: workitem_vgpr_count = 3 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 ; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 ; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 ; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 ; MOVREL-NEXT: v_mov_b32_e32 v2, s2 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dword v[0:1], v2 ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <4 x float> , i32 %sel store float %ext, float addrspace(1)* %out ret void } define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 28 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 7 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 ; GPRIDX-NEXT: s_mov_b32 s0, 0 ; GPRIDX-NEXT: s_mov_b32 s1, 0x40080000 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 ; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 ; GPRIDX-NEXT: s_cselect_b64 s[0:1], 4.0, s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 28 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 7 ; MOVREL-NEXT: workitem_vgpr_count = 4 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 ; MOVREL-NEXT: s_mov_b32 s0, 0 ; MOVREL-NEXT: s_mov_b32 s1, 0x40080000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v2, s2 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 ; MOVREL-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] ; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 ; MOVREL-NEXT: s_cselect_b64 s[0:1], 4.0, s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: v_mov_b32_e32 v3, s3 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm entry: %ext = extractelement <4 x double> , i32 %sel store double %ext, double addrspace(1)* %out ret void } define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_7: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_7: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v0, v7 ; MOVREL-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr %elt = extractelement <64 x i32> %vec, i32 7 ret i32 %elt } define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_32: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_32: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_movk_i32 s4, 0x80 ; MOVREL-NEXT: s_mov_b32 s5, 0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr %elt = extractelement <64 x i32> %vec, i32 32 ret i32 %elt } define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_33: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_33: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_movk_i32 s4, 0x80 ; MOVREL-NEXT: s_mov_b32 s5, 0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 ; MOVREL-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr %elt = extractelement <64 x i32> %vec, i32 33 ret i32 %elt } define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_37: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_movk_i32 s4, 0x80 ; GPRIDX-NEXT: s_mov_b32 s5, 0 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 ; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_37: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_movk_i32 s4, 0x80 ; MOVREL-NEXT: s_mov_b32 s5, 0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v0, v5 ; MOVREL-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr %elt = extractelement <64 x i32> %vec, i32 37 ret i32 %elt }