[llvm] [AMDPU] More support for SILoadStoreOptimizer of buffer instructions (PR #86285)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 06:45:07 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: David Stuttard (dstutt)
<details>
<summary>Changes</summary>
Added more buffer instruction merging support
---
Patch is 319.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86285.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+4-1)
- (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+48)
- (added) llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir (+2288)
- (added) llvm/test/CodeGen/AMDGPU/merge-buffer.mir (+2290)
- (modified) llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir (+28)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 4ae514ffcf7850..c3ad1fd9a6c806 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -313,7 +313,10 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
class MUBUFGetBaseOpcode<string Op> {
string ret = !subst("DWORDX2", "DWORD",
!subst("DWORDX3", "DWORD",
- !subst("DWORDX4", "DWORD", Op)));
+ !subst("DWORDX4", "DWORD",
+ !subst("FORMAT_XY", "FORMAT_X",
+ !subst("FORMAT_XYZ", "FORMAT_X",
+ !subst("FORMAT_XYZW", "FORMAT_X", Op))))));
}
class MUBUF_Pseudo <string opName, dag outs, dag ins,
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4ddee2f6d5befa..5b89e64940161e 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -399,23 +399,71 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
switch (AMDGPU::getMUBUFBaseOpcode(Opc)) {
default:
return UNKNOWN;
+ case AMDGPU::BUFFER_LOAD_DWORD_BOTHEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_BOTHEN_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_IDXEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_IDXEN_exact:
case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_BOTHEN_exact:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_IDXEN:
+ case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_IDXEN_exact:
case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFEN:
case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFEN_exact:
case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFSET:
case AMDGPU::BUFFER_LOAD_DWORD_VBUFFER_OFFSET_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_BOTHEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_BOTHEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_IDXEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_IDXEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_OFFEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_OFFEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_OFFSET:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_OFFSET_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_BOTHEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_IDXEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_IDXEN:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN_exact:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET:
+ case AMDGPU::BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET_exact:
return BUFFER_LOAD;
+ case AMDGPU::BUFFER_STORE_DWORD_BOTHEN:
+ case AMDGPU::BUFFER_STORE_DWORD_BOTHEN_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_IDXEN:
+ case AMDGPU::BUFFER_STORE_DWORD_IDXEN_exact:
case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_BOTHEN:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_IDXEN:
+ case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_IDXEN_exact:
case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFEN:
case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFEN_exact:
case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFSET:
case AMDGPU::BUFFER_STORE_DWORD_VBUFFER_OFFSET_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_BOTHEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_BOTHEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_IDXEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_IDXEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_BOTHEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_OFFEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_IDXEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_IDXEN:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET:
+ case AMDGPU::BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact:
return BUFFER_STORE;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir
new file mode 100644
index 00000000000000..5ad17ab35c83d3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-buffer-gfx12.mir
@@ -0,0 +1,2288 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GFX12 %s
+
+#
+# buffer_load_format
+#
+
+---
+name: buffer_load_x_xyz
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_x_xyz
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub0
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY killed [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub1_sub2_sub3
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %8:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 8, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4)
+...
+---
+
+name: buffer_load_xyz_x
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_xyz_x
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub3
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4)
+ %8:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+name: buffer_load_xy_xy
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_xy_xy
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub0_sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub2_sub3
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+ %8:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 12, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+...
+---
+
+name: buffer_load_x_xy
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_x_xy
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub0
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub1_sub2
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %8:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 8, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+...
+---
+
+name: buffer_load_xy_x
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_xy_x
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub0_sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub2
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+ %8:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 12, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+
+name: buffer_load_x_x
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_x_x
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET]].sub0
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET]].sub1
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %8:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 8, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+name: buffer_load_32
+body: |
+ bb.0.entry:
+ ; GFX12-LABEL: name: buffer_load_32
+ ; GFX12: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET]].sub0
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XY_VBUFFER_OFFSET]].sub1
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFSET]].sub3
+ ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[COPY6]].sub0_sub1
+ ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub2
+ ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
+ ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY8]].sub1
+ ; GFX12-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET [[REG_SEQUENCE]], $sgpr_null, 36, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4)
+ ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub0_sub1
+ ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFSET]].sub2
+ ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY12]].sub0
+ ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed [[COPY12]].sub1
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %8:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 8, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %9:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %10:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 20, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %11:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 24, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %12:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 28, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %13:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 36, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %14:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 40, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+ %15:vgpr_32 = BUFFER_LOAD_FORMAT_X_VBUFFER_OFFSET %5:sgpr_128, $sgpr_null, 44, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+#
+# buffer_store_format
+#
+
+name: buffer_store_x_xyz
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX12-LABEL: name: buffer_store_x_xyz
+ ; GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY4]], %subreg.sub3
+ ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2
+ ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[REG_SEQUENCE1]], %subreg.sub1_sub2_sub3
+ ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFSET_exact killed [[REG_SEQUENCE2]], [[REG_SEQUENCE]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
+ BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact %7:vgpr_32, %13:sgpr_128, $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
+ BUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFSET_exact %14:vreg_96, %13:sgpr_128, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/86285
More information about the llvm-commits
mailing list