[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 19 19:04:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (StevenYangCC)
<details>
<summary>Changes</summary>
[SinkGEPConstOffset] FEAT: Sink constant offsets down a GEP chain to tail for reduction of register usage.
Summary:
Sink constant offsets down the GEP chain to the tail helps reduce
register usage. For example:
%gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
%gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
%gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
%data = load half, ptr addrspace(3) %gep2, align 2
==>
%gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
%gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
%gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
%data = load half, ptr addrspace(3) %gep2, align 2
---
Patch is 97.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140657.diff
18 Files Affected:
- (modified) llvm/include/llvm/InitializePasses.h (+1)
- (modified) llvm/include/llvm/LinkAllPasses.h (+1)
- (modified) llvm/include/llvm/Transforms/Scalar.h (+7)
- (added) llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h (+27)
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1)
- (modified) llvm/lib/Passes/PassRegistry.def (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+4)
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1)
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1)
- (added) llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp (+260)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll (+18-18)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll (+54-54)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll (+426-424)
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll (+2-2)
- (added) llvm/test/Transforms/SinkGEPConstOffset/AMDGPU/sink-gep-const-offset.ll (+106)
- (modified) llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn (+1)
``````````diff
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 42610d505c2bd..07656c0155d87 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -286,6 +286,7 @@ void initializeScalarizerLegacyPassPass(PassRegistry &);
void initializeScavengerTestPass(PassRegistry &);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
+void initializeSinkGEPConstOffsetLegacyPassPass(PassRegistry &);
void initializeShadowStackGCLoweringPass(PassRegistry &);
void initializeShrinkWrapLegacyPass(PassRegistry &);
void initializeSingleLoopExtractorPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 5965be676ea69..8c12aef44f1b2 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -134,6 +134,7 @@ struct ForcePassLinking {
(void)llvm::createPartiallyInlineLibCallsPass();
(void)llvm::createScalarizerPass();
(void)llvm::createSeparateConstOffsetFromGEPPass();
+ (void)llvm::createSinkGEPConstOffsetPass();
(void)llvm::createSpeculativeExecutionPass();
(void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
(void)llvm::createStraightLineStrengthReducePass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index fc772a7639c47..389324c25cdaf 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -164,6 +164,13 @@ FunctionPass *createPartiallyInlineLibCallsPass();
//
FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
+//===----------------------------------------------------------------------===//
+//
+// SinkGEPConstOffset - Sink constant offsets down the GEP chain to the tail for
+// reduction of register usage.
+//
+FunctionPass *createSinkGEPConstOffsetPass();
+
//===----------------------------------------------------------------------===//
//
// SpeculativeExecution - Aggressively hoist instructions to enable
diff --git a/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h b/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h
new file mode 100644
index 0000000000000..43f64d818dc22
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h
@@ -0,0 +1,27 @@
+//===- SinkGEPConstOffset.h -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
+#define LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class SinkGEPConstOffsetPass
+ : public PassInfoMixin<SinkGEPConstOffsetPass> {
+public:
+ SinkGEPConstOffsetPass() {}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 56e91703cb019..08faa7f0cb14c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -329,6 +329,7 @@
#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/Sink.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 94dabe290213d..5cfde2380705b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -474,6 +474,8 @@ FUNCTION_PASS("sccp", SCCPPass())
FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
FUNCTION_PASS("separate-const-offset-from-gep",
SeparateConstOffsetFromGEPPass())
+FUNCTION_PASS("sink-gep-const-offset",
+ SinkGEPConstOffsetPass())
FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM))
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e24d8481408ad..bad17d95e4a7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -104,6 +104,7 @@
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
@@ -1209,6 +1210,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
addPass(createLoopDataPrefetchPass());
addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSinkGEPConstOffsetPass());
// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
@@ -2287,6 +2289,8 @@ void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
addPass(SeparateConstOffsetFromGEPPass());
+ addPass(SinkGEPConstOffsetPass());
+
// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(StraightLineStrengthReducePass());
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 84a5b02043d01..5431e91eacea8 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -71,6 +71,7 @@ add_llvm_component_library(LLVMScalarOpts
Scalarizer.cpp
ScalarizeMaskedMemIntrin.cpp
SeparateConstOffsetFromGEP.cpp
+ SinkGEPConstOffset.cpp
SimpleLoopUnswitch.cpp
SimplifyCFGPass.cpp
Sink.cpp
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index c7e4a3e824700..5e2d1132097ba 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -45,6 +45,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
+ initializeSinkGEPConstOffsetLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp b/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp
new file mode 100644
index 0000000000000..2790e2f56445f
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp
@@ -0,0 +1,260 @@
+//===- SinkGEPConstOffset.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<bool> DisableSinkGEPConstOffset(
+ "disable-sink-gep-const-offset", cl::init(false),
+ cl::desc("Do not sink the constant offset from a GEP instruction"),
+ cl::Hidden);
+
+namespace {
+
+/// A pass that tries to sink const offset in GEP chain to tail.
+/// It is a FunctionPass because searching for the constant offset may inspect
+/// other basic blocks.
+class SinkGEPConstOffsetLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ SinkGEPConstOffsetLegacyPass() : FunctionPass(ID) {
+ initializeSinkGEPConstOffsetLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+/// A pass that tries to sink const offset in GEP chain to tail.
+/// It is a FunctionPass because searching for the constant offset may inspect
+/// other basic blocks.
+class SinkGEPConstOffset {
+public:
+ SinkGEPConstOffset() {}
+
+ bool run(Function &F);
+
+private:
+ /// Sink constant offset in a GEP chain to tail. For example,
+ /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
+ /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
+ /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
+ /// %data = load half, ptr addrspace(3) %gep2, align 2
+ /// ==>
+ /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
+ /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
+ /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
+ /// %data = load half, ptr addrspace(3) %gep2, align 2
+ ///
+ /// Return true if Ptr is a candidate for upper GEP in recursive calling.
+ bool sinkGEPConstantOffset(Value *Ptr, bool &Changed);
+
+ const DataLayout *DL = nullptr;
+};
+
+} // end anonymous namespace
+
+char SinkGEPConstOffsetLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ SinkGEPConstOffsetLegacyPass, "sink-gep-const-offset",
+ "Sink const offsets down the GEP chain to the tail for reduction of "
+ "register usage", false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(
+ SinkGEPConstOffsetLegacyPass, "sink-gep-const-offset",
+ "Sink const offsets down the GEP chain to the tail for reduction of "
+ "register usage", false, false)
+
+FunctionPass *llvm::createSinkGEPConstOffsetPass() {
+ return new SinkGEPConstOffsetLegacyPass();
+}
+
+bool SinkGEPConstOffsetLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ SinkGEPConstOffset Impl;
+ return Impl.run(F);
+}
+
+bool SinkGEPConstOffset::run(Function &F) {
+ if (DisableSinkGEPConstOffset)
+ return false;
+
+ DL = &F.getDataLayout();
+
+ bool Changed = false;
+ for (BasicBlock &B : F)
+ for (Instruction &I : llvm::make_early_inc_range(B))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
+ sinkGEPConstantOffset(GEP, Changed);
+
+ return Changed;
+}
+
+bool SinkGEPConstOffset::sinkGEPConstantOffset(Value *Ptr, bool &Changed) {
+ // The purpose of this function is to sink the constant offsets in the GEP
+ // chain to the tail of the chain.
+ // This algorithm is implemented recursively, the algorithm starts from the
+ // tail of the chain through the DFS method and shifts the constant offset
+ // of the GEP step by step upwards by bottom-up DFS method, i.e. step by step
+ // down to the tail.
+ // A simple example is given:
+ /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
+ /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
+ /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
+ /// %data = load half, ptr addrspace(3) %gep2, align 2
+ /// ==>
+ /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
+ /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
+ /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
+ /// %data = load half, ptr addrspace(3) %gep2, align 2
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return false;
+
+ if (!GEP->getParent())
+ return false;
+
+ bool BaseResult = sinkGEPConstantOffset(GEP->getPointerOperand(), Changed);
+
+ if (GEP->getNumIndices() != 1)
+ return false;
+
+ ConstantInt *C = nullptr;
+ Value *Idx = GEP->getOperand(1);
+ bool MatchConstant = match(Idx, m_ConstantInt(C));
+
+ if (!BaseResult)
+ return MatchConstant;
+
+ Type *ResTy = GEP->getResultElementType();
+ GetElementPtrInst *BaseGEP =
+ cast<GetElementPtrInst>(GEP->getPointerOperand());
+ Value *BaseIdx = BaseGEP->getOperand(1);
+ Type *BaseResTy = BaseGEP->getResultElementType();
+
+ if (MatchConstant) {
+ // %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 8
+ // %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 4
+ // as:
+ // %gep1 = getelementptr half, ptr addrspace(3) %ptr, i32 12
+ Type *NewResTy = nullptr;
+ int64_t NewIdxValue = 0;
+ if (ResTy == BaseResTy) {
+ NewResTy = ResTy;
+ NewIdxValue = cast<ConstantInt>(BaseIdx)->getSExtValue() +
+ cast<ConstantInt>(Idx)->getSExtValue();
+ } else {
+ NewResTy = Type::getInt8Ty(GEP->getContext());
+ NewIdxValue = (cast<ConstantInt>(BaseIdx)->getSExtValue() *
+ DL->getTypeAllocSize(BaseResTy)) +
+ (cast<ConstantInt>(Idx)->getSExtValue() *
+ DL->getTypeAllocSize(ResTy));
+ }
+ assert(NewResTy);
+ Type *NewIdxType = (Idx->getType()->getPrimitiveSizeInBits() >
+ BaseIdx->getType()->getPrimitiveSizeInBits())
+ ? Idx->getType() : BaseIdx->getType();
+ Constant *NewIdx = ConstantInt::get(NewIdxType, NewIdxValue);
+ auto *NewGEP = GetElementPtrInst::Create(
+ NewResTy, BaseGEP->getPointerOperand(), NewIdx);
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ NewGEP->insertBefore(GEP->getIterator());
+ NewGEP->takeName(GEP);
+
+ GEP->replaceAllUsesWith(NewGEP);
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+ Changed = true;
+ return true;
+ }
+
+ // %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 8
+ // %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %idx
+ // as:
+ // %gepx0 = getelementptr half, ptr addrspace(3) %ptr, i32 %idx
+ // %gepx1 = getelementptr half, ptr addrspace(3) %gepx0, i32 8
+ auto *GEPX0 =
+ GetElementPtrInst::Create(ResTy, BaseGEP->getPointerOperand(), Idx);
+ GEPX0->setIsInBounds(BaseGEP->isInBounds());
+ GEPX0->insertBefore(GEP->getIterator());
+ auto *GEPX1 = GetElementPtrInst::Create(BaseResTy, GEPX0, BaseIdx);
+ GEPX1->setIsInBounds(GEP->isInBounds());
+ GEPX1->insertBefore(GEP->getIterator());
+ GEPX1->takeName(GEP);
+
+ GEP->replaceAllUsesWith(GEPX1);
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+ Changed = true;
+ return true;
+}
+
+void SinkGEPConstOffsetPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SinkGEPConstOffsetPass> *>(this)
+ ->printPipeline(OS, MapClassName2PassName);
+}
+
+PreservedAnalyses
+SinkGEPConstOffsetPass::run(Function &F, FunctionAnalysisManager &AM) {
+ SinkGEPConstOffset Impl;
+ if (!Impl.run(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 29736b62f2c00..9c18b76f7d972 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -487,6 +487,7 @@
; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis
; GCN-O1-OPTS-NEXT: Loop Data Prefetch
; GCN-O1-OPTS-NEXT: Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O1-OPTS-NEXT: Sink const offsets down the GEP chain to the tail for reduction of register usage
; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis
; GCN-O1-OPTS-NEXT: Straight line strength reduction
; GCN-O1-OPTS-NEXT: Early CSE
@@ -794,6 +795,7 @@
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: AMDGPU Promote Alloca
; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O2-NEXT: Sink const offsets down the GEP chain to the tail for reduction of register usage
; GCN-O2-NEXT: Scalar Evolution Analysis
; GCN-O2-NEXT: Straight line strength reduction
; GCN-O2-NEXT: Early CSE
@@ -1111,6 +1113,7 @@
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: AMDGPU Promote Alloca
; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O3-NEXT: Sink const offsets down the GEP chain to the tail for reduction of register usage
; GCN-O3-NEXT: Scalar Evolution Analysis
; GCN-O3-NEXT: Straight line strength reduction
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index 565ad295ebbb3..4f5d93d767a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -21,15 +21,15 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in,
; GCN-NEXT: ; iglp_opt mask(0x00000000)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
-; GCN-NEXT: v_add_u32_e32 v2, 0x6000, v1
-; GCN-NEXT: ds_read_b128 a[28:31], v2 offset:57456
-; GCN-NEXT: ds_read_b128 a[24:27], v2 offset:57440
-; GCN-NEXT: ds_read_b128 a[20:23], v2 offset:57424
-; GCN-NEXT: ds_read_b128 a[16:19], v2 offset:57408
-; GCN-NEXT: ds_read_b128 a[0:3], v2 offset:57344
-; GCN-NEXT: ds_read_b128 a[4:7], v2 offset:57360
-; GCN-NEXT: ds_read_b128 a[8:11], v2 offset:57376
-; GCN-NEXT: ds_read_b128 a[12:15], v2 offset:57392
+; GCN-NEXT: v_add_u32_e32 v2, 0x14000, v1
+; GCN-NEXT: ds_read_b128 a[28:31], v2 offset:112
+; GCN-NEXT: ds_read_b128 a[24:27], v2 offset:96
+; GCN-NEXT: ds_read_b128 a[20:23], v2 offset:80
+; GCN-NEXT: ds_read_b128 a[16:19], v2 offset:64
+; GCN-NEXT: ds_read_b128 a[0:3], v2
+; GCN-NEXT: ds_read_b128 a[4:7], v2 offset:16
+; GCN-NEXT: ds_read_b128 a[8:11], v2 offset:32
+; GCN-NEXT: ds_read_b128 a[12:15], v2 offset:48
; GCN-NEXT: v_mov_b32_e32 v2, 1.0
; GCN-NEXT: ds_read_b128 a[60:63], v1 offset:49264
; GCN-NEXT: ds_read_b128 a[56:59], v1 offset:49248
@@ -199,17 +199,17 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
; GCN-NEXT: ds_read_b128 a[72:75], v1 offset:49184
; GCN-NEXT: ds_read_b128 a[68:71], v1 offset:49168
; GCN-NEXT: ds_read_b128 a[64:67], v1 offset:49152
-; GCN-NEXT: v_add_u32_e32 v1, 0x6000, v1
+; GCN-NEXT: v_add_u32_e32 v1, 0x14000, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v3, a[64:95]
-; GCN-NEXT: ds_read_b128 a[60:63], v1 offset:57456
-; GCN-NEXT: ds_read_b128 a[56:59], v1 offset:57440
-; GCN-NEXT: ds_read_b128 a...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140657
More information about the llvm-commits
mailing list