[llvm] ace87ec - [AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX (#117574)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 01:24:37 PST 2024
Author: Juan Manuel Martinez CaamaƱo
Date: 2024-12-16T10:24:33+01:00
New Revision: ace87ec04cd588e5fbe393c3b642bd759a7abadb
URL: https://github.com/llvm/llvm-project/commit/ace87ec04cd588e5fbe393c3b642bd759a7abadb
DIFF: https://github.com/llvm/llvm-project/commit/ace87ec04cd588e5fbe393c3b642bd759a7abadb.diff
LOG: [AMDGPU][AMDGPURegBankInfo] Map S_BUFFER_LOAD_XXX to its corresponding BUFFER_LOAD_XXX (#117574)
In one test code generation diverged between GISEL and DAG
For example, this intrinsic
> %ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32
%offset, i32 0)
would be lowered into these two cases:
* `buffer_load_u8 v2, v2, s[0:3], null offen`
* `buffer_load_b32 v2, v2, s[0:3], null offen`
This patch fixes this issue.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 6e1f188bb3d3de..c05f079516ba68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1330,6 +1330,24 @@ unsigned AMDGPURegisterBankInfo::setBufferOffsets(
return 0;
}
+static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
+ return AMDGPU::G_AMDGPU_BUFFER_LOAD;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
+ return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
+ return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
+ return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
+ return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
+ default:
+ break;
+ }
+ llvm_unreachable("Unexpected s_buffer_load opcode");
+}
+
bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
MachineIRBuilder &B, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -1406,16 +1424,16 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
if (i != 0)
BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
- B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
- .addDef(LoadParts[i]) // vdata
- .addUse(RSrc) // rsrc
- .addUse(VIndex) // vindex
- .addUse(VOffset) // voffset
- .addUse(SOffset) // soffset
- .addImm(ImmOffset + 16 * i) // offset(imm)
- .addImm(0) // cachepolicy, swizzled buffer(imm)
- .addImm(0) // idxen(imm)
- .addMemOperand(MMO);
+ B.buildInstr(getSBufferLoadCorrespondingBufferLoadOpcode(MI.getOpcode()))
+ .addDef(LoadParts[i]) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset + 16 * i) // offset(imm)
+ .addImm(0) // cachepolicy, swizzled buffer(imm)
+ .addImm(0) // idxen(imm)
+ .addMemOperand(MMO);
}
// TODO: If only the resource is a VGPR, it may be better to execute the
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index 020c9dc130bb2a..61ae9639c52d00 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -465,19 +465,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_byte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_byte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i8 %ld to i32
@@ -538,20 +531,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ubyte_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ubyte_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u8 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i8 %ld to i32
@@ -606,19 +591,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_short_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_short_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_i16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> %src, i32 %offset, i32 0)
%sext = sext i16 %ld to i32
@@ -679,20 +657,12 @@ main_body:
}
define amdgpu_ps void @s_buffer_load_ushort_sgpr_or_imm_offset_divergent(<4 x i32> inreg %src, ptr addrspace(1) nocapture %out, i32 %offset) {
-; DAG-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; DAG: ; %bb.0: ; %main_body
-; DAG-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
-; DAG-NEXT: s_wait_loadcnt 0x0
-; DAG-NEXT: global_store_b32 v[0:1], v2, off
-; DAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
-; GISEL: ; %bb.0: ; %main_body
-; GISEL-NEXT: buffer_load_b32 v2, v2, s[0:3], null offen
-; GISEL-NEXT: s_wait_loadcnt 0x0
-; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GISEL-NEXT: global_store_b32 v[0:1], v2, off
-; GISEL-NEXT: s_endpgm
+; GCN-LABEL: s_buffer_load_ushort_sgpr_or_imm_offset_divergent:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: buffer_load_u16 v2, v2, s[0:3], null offen
+; GCN-NEXT: s_wait_loadcnt 0x0
+; GCN-NEXT: global_store_b32 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
main_body:
%ld = call i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32> %src, i32 %offset, i32 0)
%zext = zext i16 %ld to i32
More information about the llvm-commits
mailing list