[llvm] 185cb8e - [AMDGPU] SILoadStoreOptimizer: Allow merging across a swizzled access
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 27 06:43:59 PST 2022
Author: Jay Foad
Date: 2022-01-27T14:40:58Z
New Revision: 185cb8e82c98096fdd73beb646017d86ca8692b3
URL: https://github.com/llvm/llvm-project/commit/185cb8e82c98096fdd73beb646017d86ca8692b3
DIFF: https://github.com/llvm/llvm-project/commit/185cb8e82c98096fdd73beb646017d86ca8692b3.diff
LOG: [AMDGPU] SILoadStoreOptimizer: Allow merging across a swizzled access
Swizzled accesses are not merged, but there is no particular reason not
to merge two instructions if any of the intervening instructions happens
to be a swizzled access.
This moves the check for swizzled accesses out of checkAndPrepareMerge
into collectMergeableInsts where I think it makes more sense.
Differential Revision: https://reviews.llvm.org/D118267
Added:
Modified:
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index d1133c12bc18d..0ed8287de8ac9 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -910,12 +910,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
}
const unsigned InstSubclass = getInstSubclass(Opc, *TII);
- // Do not merge VMEM buffer instructions with "swizzled" bit set.
- int Swizzled =
- AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz);
- if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
- return false;
-
DenseSet<Register> RegDefsToMove;
DenseSet<Register> PhysRegUsesToMove;
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
@@ -971,11 +965,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
continue;
}
- int Swizzled =
- AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
- if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())
- return false;
-
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
// w = DS_READ_B32 addr, idx0
@@ -2014,6 +2003,12 @@ SILoadStoreOptimizer::collectMergeableInsts(
if (InstClass == UNKNOWN)
continue;
+ // Do not merge VMEM buffer instructions with "swizzled" bit set.
+ int Swizzled =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
+ if (Swizzled != -1 && MI.getOperand(Swizzled).getImm())
+ continue;
+
CombineInfo CI;
CI.setMI(MI, *TII, *STM);
CI.Order = Order++;
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
index f0296d8544ed5..961e8213a85bf 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -780,9 +780,8 @@ body: |
# GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
name: gfx9_tbuffer_load_merge_across_swizzle
body: |
bb.0.entry:
@@ -1598,9 +1597,8 @@ body: |
# GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
name: gfx10_tbuffer_load_merge_across_swizzle
body: |
bb.0.entry:
More information about the llvm-commits
mailing list