[llvm] [AMDGPU] Add no return image_sample intrinsics and instructions (PR #97542)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 06:51:05 PDT 2024
================
@@ -0,0 +1,479 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 -global-isel=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 -global-isel=1 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 -global-isel=1 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+
+define amdgpu_ps void @sample_1d_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-LABEL: sample_1d_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_1d_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.1d.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_2d_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX10PLUS-LABEL: sample_2d_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_2d_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.2d.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_3d_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
+; GFX10PLUS-LABEL: sample_3d_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_3d_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.3d.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_cube_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
+; GFX10PLUS-LABEL: sample_cube_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_cube_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.cube.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_1darray_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
+; GFX10PLUS-LABEL: sample_1darray_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_1darray_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.1darray.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_2darray_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
+; GFX10PLUS-LABEL: sample_2darray_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_2darray_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.2darray.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @sample_b_1d_noret(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; GFX10PLUS-LABEL: sample_b_1d_noret:
+; GFX10PLUS: ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-LABEL: sample_b_1d_noret:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.image.sample.noret.b.1d.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
----------------
arsenm wrote:
I mean intrinsic names should match the underlying instruction name as closely as possible. Here, you've split the opcode name with the compiler invented infix of noret. It would be better if this were llvm.amdgcn.image.sample.b.1d.nortn.f32. That way it always sorts with llvm.amdgcn.image.sample.b.1d.f32
https://github.com/llvm/llvm-project/pull/97542
More information about the llvm-commits
mailing list