llvm-for-llvmta/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -instcombine -S < %s | FileCheck %s

; --------------------------------------------------------------------
; llvm.amdgcn.image.sample a16 is disabled on pre-gfx9
; --------------------------------------------------------------------

declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
; CHECK-LABEL: @image_sample_a16_1d(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}

define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; CHECK-LABEL: @image_sample_a16_2d(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %t32 = fpext half %t to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}

define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
; CHECK-LABEL: @image_sample_a16_3d(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT:    [[R32:%.*]] = fpext half [[R:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %t32 = fpext half %t to float
  %r32 = fpext half %r to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}

define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
;
; CHECK-LABEL: @image_sample_a16_cube(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT:    [[FACE32:%.*]] = fpext half [[FACE:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %t32 = fpext half %t to float
  %face32 = fpext half %face to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}

define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
; CHECK-LABEL: @image_sample_a16_1darray(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %slice32 = fpext half %slice to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}

define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
; CHECK-LABEL: @image_sample_a16_2darray(
; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT:    ret void
;
  %s32 = fpext half %s to float
  %t32 = fpext half %t to float
  %slice32 = fpext half %slice to float
  %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  store <4 x float> %res, <4 x float> addrspace(1)* %out
  ret void
}
first commit 2022-04-25 10:02:23 +02:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
			`; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -instcombine -S < %s \| FileCheck %s`

			`; --------------------------------------------------------------------`
			`; llvm.amdgcn.image.sample a16 is disabled on pre-gfx9`
			`; --------------------------------------------------------------------`

			`declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1`

			`define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {`
			`; CHECK-LABEL: @image_sample_a16_1d(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {`
			`; CHECK-LABEL: @image_sample_a16_2d(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[T32:%.]] = fpext half [[T:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%t32 = fpext half %t to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {`
			`; CHECK-LABEL: @image_sample_a16_3d(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[T32:%.]] = fpext half [[T:%.]] to float`
			`; CHECK-NEXT: [[R32:%.]] = fpext half [[R:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%t32 = fpext half %t to float`
			`%r32 = fpext half %r to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {`
			`;`
			`; CHECK-LABEL: @image_sample_a16_cube(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[T32:%.]] = fpext half [[T:%.]] to float`
			`; CHECK-NEXT: [[FACE32:%.]] = fpext half [[FACE:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%t32 = fpext half %t to float`
			`%face32 = fpext half %face to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {`
			`; CHECK-LABEL: @image_sample_a16_1darray(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[SLICE32:%.]] = fpext half [[SLICE:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%slice32 = fpext half %slice to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {`
			`; CHECK-LABEL: @image_sample_a16_2darray(`
			`; CHECK-NEXT: [[S32:%.]] = fpext half [[S:%.]] to float`
			`; CHECK-NEXT: [[T32:%.]] = fpext half [[T:%.]] to float`
			`; CHECK-NEXT: [[SLICE32:%.]] = fpext half [[SLICE:%.]] to float`
			`; CHECK-NEXT: [[RES:%.]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)`
			`; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16`
			`; CHECK-NEXT: ret void`
			`;`
			`%s32 = fpext half %s to float`
			`%t32 = fpext half %t to float`
			`%slice32 = fpext half %slice to float`
			`%res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)`
			`store <4 x float> %res, <4 x float> addrspace(1)* %out`
			`ret void`
			`}`