[llvm] fc21387 - [AMDGPU] Enable constant offset promotion to immediate FLAT (#93884)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 12:23:31 PDT 2024
Author: Stanislav Mekhanoshin
Date: 2024-05-31T12:23:27-07:00
New Revision: fc21387b6510ee44520f2f17fb671f1265a9055f
URL: https://github.com/llvm/llvm-project/commit/fc21387b6510ee44520f2f17fb671f1265a9055f
DIFF: https://github.com/llvm/llvm-project/commit/fc21387b6510ee44520f2f17fb671f1265a9055f.diff
LOG: [AMDGPU] Enable constant offset promotion to immediate FLAT (#93884)
Currently it is only supported for FLAT Global.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c5f3a98e69061..4c02bb1b306e5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -223,7 +223,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -315,6 +314,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SmallVectorImpl<Value*> &/*Ops*/,
Type *&/*AccessTy*/) const override;
+ bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 774920aac2f08..c47eea20563df 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
if (!(MI.mayLoad() ^ MI.mayStore()))
return false;
- // TODO: Support flat and scratch.
- if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
+ if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
return false;
+ // TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers.
+ if (SIInstrInfo::isFLATScratch(MI))
+ return false;
+
+ unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS
+ : AMDGPUAS::FLAT_ADDRESS;
+
if (MI.mayLoad() &&
TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
return false;
@@ -2157,7 +2163,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = Dist;
- if (TLI->isLegalGlobalAddressingMode(AM) &&
+ if (TLI->isLegalFlatAddressingMode(AM, AS) &&
(uint32_t)std::abs(Dist) > MaxDist) {
MaxDist = std::abs(Dist);
@@ -2183,7 +2189,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
AM.HasBaseReg = true;
AM.BaseOffs = OtherOffset - AnchorAddr.Offset;
- if (TLI->isLegalGlobalAddressingMode(AM)) {
+ if (TLI->isLegalFlatAddressingMode(AM, AS)) {
LLVM_DEBUG(dbgs() << " Promote Offset(" << OtherOffset; dbgs() << ")";
OtherMI->dump());
updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index 1a751839e2947..4ecce2842455b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -1,9 +1,13 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
-# GFX9-LABEL: name:
diff oporder_add
+# GCN-LABEL: name:
diff oporder_add
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+
name:
diff oporder_add
body: |
bb.0.entry:
@@ -43,7 +47,7 @@ body: |
...
---
-# GFX9-LABEL: name: LowestInMiddle
+# GCN-LABEL: name: LowestInMiddle
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
@@ -57,6 +61,11 @@ body: |
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+
+
name: LowestInMiddle
body: |
bb.0.entry:
@@ -101,7 +110,7 @@ body: |
...
---
-# GFX9-LABEL: name: NegativeDistance
+# GCN-LABEL: name: NegativeDistance
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
@@ -110,6 +119,10 @@ body: |
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
+
name: NegativeDistance
body: |
bb.0.entry:
@@ -190,10 +203,13 @@ body: |
...
---
-# GFX9-LABEL: name:
diff oporder_add_store
+# GCN-LABEL: name:
diff oporder_add_store
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
+# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0
+# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
+
name:
diff oporder_add_store
body: |
bb.0.entry:
@@ -212,3 +228,57 @@ body: |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
...
+---
+
+# GCN-LABEL: name:
diff oporder_add_flat_load
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
+
+# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
+# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
+
+name:
diff oporder_add_flat_load
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GCN-LABEL: name:
diff oporder_add_flat_store
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
+
+# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0,
+# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
+
+name:
diff oporder_add_flat_store
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
+...
More information about the llvm-commits
mailing list