[llvm] [AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX (PR #117574)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 08:38:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Juan Manuel Martinez CaamaƱo (jmmartinez)
<details>
<summary>Changes</summary>
In one test code generation diverged between GISEL and DAG
For example, this intrinsic
> %ld = call i8 @<!-- -->llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
would be lowered into these two cases:
* `buffer_load_u8 v2, v2, s[0:3], null offen`
* `buffer_load_b32 v2, v2, s[0:3], null offen`
This patch fixes this issue.
---
Full diff: https://github.com/llvm/llvm-project/pull/117574.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+31-10)
- (modified) llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll (+24-54)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 8c050348f753bb..d3d4062c34f88c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1406,16 +1406,37 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
if (i != 0)
BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
- B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
- .addDef(LoadParts[i]) // vdata
- .addUse(RSrc) // rsrc
- .addUse(VIndex) // vindex
- .addUse(VOffset) // voffset
- .addUse(SOffset) // soffset
- .addImm(ImmOffset + 16 * i) // offset(imm)
- .addImm(0) // cachepolicy, swizzled buffer(imm)
- .addImm(0) // idxen(imm)
- .addMemOperand(MMO);
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ break;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+
+ B.buildInstr(Opc)
+ .addDef(LoadParts[i]) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset + 16 * i) // offset(imm)
+ .addImm(0) // cachepolicy, swizzled buffer(imm)
+ .addImm(0) // idxen(imm)
+ .addMemOperand(MMO);
}
// TODO: If only the resource is a VGPR, it may be better to execute the
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index 020c9dc130bb2a..61ae9639c52d00 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -465,19 +465,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i8 %ld to i32
@@ -538,20 +531,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i8 %ld to i32
@@ -606,19 +591,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i16 %ld to i32
@@ -679,20 +657,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i16 %ld to i32
``````````
</details>
https://github.com/llvm/llvm-project/pull/117574
More information about the llvm-commits
mailing list