[llvm] Add a pass "SinkGEPConstOffset" (PR #140657)

Mon May 19 19:04:46 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (StevenYangCC)

<details>
<summary>Changes</summary>

[SinkGEPConstOffset] FEAT: Sink constant offsets down a GEP chain to tail for reduction of register usage.

Summary:

Sink constant offsets down the GEP chain to the tail helps reduce
register usage. For example:
%gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
%gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
%gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
%data = load half, ptr addrspace(3) %gep2, align 2

==>

%gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
%gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
%gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
%data = load half, ptr addrspace(3) %gep2, align 2


---

Patch is 97.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140657.diff


18 Files Affected:

- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (modified) llvm/include/llvm/LinkAllPasses.h (+1) 
- (modified) llvm/include/llvm/Transforms/Scalar.h (+7) 
- (added) llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h (+27) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassRegistry.def (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+4) 
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1) 
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1) 
- (added) llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp (+260) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll (+18-18) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll (+54-54) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll (+426-424) 
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll (+2-2) 
- (added) llvm/test/Transforms/SinkGEPConstOffset/AMDGPU/sink-gep-const-offset.ll (+106) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn (+1) 


``````````diff

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 42610d505c2bd..07656c0155d87 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -286,6 +286,7 @@ void initializeScalarizerLegacyPassPass(PassRegistry &);
 void initializeScavengerTestPass(PassRegistry &);
 void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
 void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
+void initializeSinkGEPConstOffsetLegacyPassPass(PassRegistry &);
 void initializeShadowStackGCLoweringPass(PassRegistry &);
 void initializeShrinkWrapLegacyPass(PassRegistry &);
 void initializeSingleLoopExtractorPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 5965be676ea69..8c12aef44f1b2 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -134,6 +134,7 @@ struct ForcePassLinking {
     (void)llvm::createPartiallyInlineLibCallsPass();
     (void)llvm::createScalarizerPass();
     (void)llvm::createSeparateConstOffsetFromGEPPass();
+    (void)llvm::createSinkGEPConstOffsetPass();
     (void)llvm::createSpeculativeExecutionPass();
     (void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
     (void)llvm::createStraightLineStrengthReducePass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index fc772a7639c47..389324c25cdaf 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -164,6 +164,13 @@ FunctionPass *createPartiallyInlineLibCallsPass();
 //
 FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
 
+//===----------------------------------------------------------------------===//
+//
+// SinkGEPConstOffset - Sink constant offsets down the GEP chain to the tail for
+// reduction of register usage.
+//
+FunctionPass *createSinkGEPConstOffsetPass();
+
 //===----------------------------------------------------------------------===//
 //
 // SpeculativeExecution - Aggressively hoist instructions to enable
diff --git a/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h b/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h
new file mode 100644
index 0000000000000..43f64d818dc22
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/SinkGEPConstOffset.h
@@ -0,0 +1,27 @@
+//===- SinkGEPConstOffset.h -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
+#define LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class SinkGEPConstOffsetPass
+    : public PassInfoMixin<SinkGEPConstOffsetPass> {
+public:
+  SinkGEPConstOffsetPass() {}
+  void printPipeline(raw_ostream &OS,
+                     function_ref<StringRef(StringRef)> MapClassName2PassName);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SINKGEPCONSTOFFSET_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 56e91703cb019..08faa7f0cb14c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -329,6 +329,7 @@
 #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
 #include "llvm/Transforms/Scalar/Scalarizer.h"
 #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 94dabe290213d..5cfde2380705b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -474,6 +474,8 @@ FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
 FUNCTION_PASS("separate-const-offset-from-gep",
               SeparateConstOffsetFromGEPPass())
+FUNCTION_PASS("sink-gep-const-offset",
+              SinkGEPConstOffsetPass())
 FUNCTION_PASS("sink", SinkingPass())
 FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM))
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e24d8481408ad..bad17d95e4a7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -104,6 +104,7 @@
 #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
 #include "llvm/Transforms/Scalar/NaryReassociate.h"
 #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
 #include "llvm/Transforms/Scalar/Sink.h"
 #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
@@ -1209,6 +1210,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
   if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
     addPass(createLoopDataPrefetchPass());
   addPass(createSeparateConstOffsetFromGEPPass());
+  addPass(createSinkGEPConstOffsetPass());
   // ReassociateGEPs exposes more opportunities for SLSR. See
   // the example in reassociate-geps-and-slsr.ll.
   addPass(createStraightLineStrengthReducePass());
@@ -2287,6 +2289,8 @@ void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
 
   addPass(SeparateConstOffsetFromGEPPass());
 
+  addPass(SinkGEPConstOffsetPass());
+
   // ReassociateGEPs exposes more opportunities for SLSR. See
   // the example in reassociate-geps-and-slsr.ll.
   addPass(StraightLineStrengthReducePass());
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 84a5b02043d01..5431e91eacea8 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -71,6 +71,7 @@ add_llvm_component_library(LLVMScalarOpts
   Scalarizer.cpp
   ScalarizeMaskedMemIntrin.cpp
   SeparateConstOffsetFromGEP.cpp
+  SinkGEPConstOffset.cpp
   SimpleLoopUnswitch.cpp
   SimplifyCFGPass.cpp
   Sink.cpp
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index c7e4a3e824700..5e2d1132097ba 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -45,6 +45,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeSinkingLegacyPassPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
+  initializeSinkGEPConstOffsetLegacyPassPass(Registry);
   initializeSpeculativeExecutionLegacyPassPass(Registry);
   initializeStraightLineStrengthReduceLegacyPassPass(Registry);
   initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp b/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp
new file mode 100644
index 0000000000000..2790e2f56445f
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/SinkGEPConstOffset.cpp
@@ -0,0 +1,260 @@
+//===- SinkGEPConstOffset.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SinkGEPConstOffset.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<bool> DisableSinkGEPConstOffset(
+    "disable-sink-gep-const-offset", cl::init(false),
+    cl::desc("Do not sink the constant offset from a GEP instruction"),
+    cl::Hidden);
+
+namespace {
+
+/// A pass that tries to sink const offset in GEP chain to tail.
+/// It is a FunctionPass because searching for the constant offset may inspect
+/// other basic blocks.
+class SinkGEPConstOffsetLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  SinkGEPConstOffsetLegacyPass() : FunctionPass(ID) {
+    initializeSinkGEPConstOffsetLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+
+  bool runOnFunction(Function &F) override;
+};
+
+/// A pass that tries to sink const offset in GEP chain to tail.
+/// It is a FunctionPass because searching for the constant offset may inspect
+/// other basic blocks.
+class SinkGEPConstOffset {
+public:
+  SinkGEPConstOffset() {}
+
+  bool run(Function &F);
+
+private:
+  /// Sink constant offset in a GEP chain to tail. For example,
+  /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
+  /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
+  /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
+  /// %data = load half, ptr addrspace(3) %gep2, align 2
+  /// ==>
+  /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
+  /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
+  /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
+  /// %data = load half, ptr addrspace(3) %gep2, align 2
+  ///
+  /// Return true if Ptr is a candidate for upper GEP in recursive calling.
+  bool sinkGEPConstantOffset(Value *Ptr, bool &Changed);
+
+  const DataLayout *DL = nullptr;
+};
+
+} // end anonymous namespace
+
+char SinkGEPConstOffsetLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+    SinkGEPConstOffsetLegacyPass, "sink-gep-const-offset",
+    "Sink const offsets down the GEP chain to the tail for reduction of "
+    "register usage", false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(
+    SinkGEPConstOffsetLegacyPass, "sink-gep-const-offset",
+    "Sink const offsets down the GEP chain to the tail for reduction of "
+    "register usage", false, false)
+
+FunctionPass *llvm::createSinkGEPConstOffsetPass() {
+  return new SinkGEPConstOffsetLegacyPass();
+}
+
+bool SinkGEPConstOffsetLegacyPass::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  SinkGEPConstOffset Impl;
+  return Impl.run(F);
+}
+
+bool SinkGEPConstOffset::run(Function &F) {
+  if (DisableSinkGEPConstOffset)
+    return false;
+
+  DL = &F.getDataLayout();
+
+  bool Changed = false;
+  for (BasicBlock &B : F)
+    for (Instruction &I : llvm::make_early_inc_range(B))
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
+        sinkGEPConstantOffset(GEP, Changed);
+
+  return Changed;
+}
+
+bool SinkGEPConstOffset::sinkGEPConstantOffset(Value *Ptr, bool &Changed) {
+  // The purpose of this function is to sink the constant offsets in the GEP
+  // chain to the tail of the chain.
+  // This algorithm is implemented recursively, the algorithm starts from the
+  // tail of the chain through the DFS method and shifts the constant offset
+  // of the GEP step by step upwards by bottom-up DFS method, i.e. step by step
+  // down to the tail.
+  // A simple example is given:
+  /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 512
+  /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst0
+  /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 %ofst1
+  /// %data = load half, ptr addrspace(3) %gep2, align 2
+  /// ==>
+  /// %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 %ofst0
+  /// %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %ofst1
+  /// %gep2 = getelementptr half, ptr addrspace(3) %gep1, i32 512
+  /// %data = load half, ptr addrspace(3) %gep2, align 2
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP)
+    return false;
+
+  if (!GEP->getParent())
+    return false;
+
+  bool BaseResult = sinkGEPConstantOffset(GEP->getPointerOperand(), Changed);
+
+  if (GEP->getNumIndices() != 1)
+    return false;
+
+  ConstantInt *C = nullptr;
+  Value *Idx = GEP->getOperand(1);
+  bool MatchConstant = match(Idx, m_ConstantInt(C));
+
+  if (!BaseResult)
+    return MatchConstant;
+
+  Type *ResTy = GEP->getResultElementType();
+  GetElementPtrInst *BaseGEP =
+      cast<GetElementPtrInst>(GEP->getPointerOperand());
+  Value *BaseIdx = BaseGEP->getOperand(1);
+  Type *BaseResTy = BaseGEP->getResultElementType();
+
+  if (MatchConstant) {
+    // %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 8
+    // %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 4
+    // as:
+    // %gep1 = getelementptr half, ptr addrspace(3) %ptr, i32 12
+    Type *NewResTy = nullptr;
+    int64_t NewIdxValue = 0;
+    if (ResTy == BaseResTy) {
+      NewResTy = ResTy;
+      NewIdxValue = cast<ConstantInt>(BaseIdx)->getSExtValue() +
+                    cast<ConstantInt>(Idx)->getSExtValue();
+    } else {
+      NewResTy = Type::getInt8Ty(GEP->getContext());
+      NewIdxValue = (cast<ConstantInt>(BaseIdx)->getSExtValue() *
+                     DL->getTypeAllocSize(BaseResTy)) +
+                    (cast<ConstantInt>(Idx)->getSExtValue() *
+                     DL->getTypeAllocSize(ResTy));
+    }
+    assert(NewResTy);
+    Type *NewIdxType = (Idx->getType()->getPrimitiveSizeInBits() >
+                      BaseIdx->getType()->getPrimitiveSizeInBits())
+                         ? Idx->getType() : BaseIdx->getType();
+    Constant *NewIdx = ConstantInt::get(NewIdxType, NewIdxValue);
+    auto *NewGEP = GetElementPtrInst::Create(
+        NewResTy, BaseGEP->getPointerOperand(), NewIdx);
+    NewGEP->setIsInBounds(GEP->isInBounds());
+    NewGEP->insertBefore(GEP->getIterator());
+    NewGEP->takeName(GEP);
+
+    GEP->replaceAllUsesWith(NewGEP);
+    RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+    Changed = true;
+    return true;
+  }
+
+  // %gep0 = getelementptr half, ptr addrspace(3) %ptr, i32 8
+  // %gep1 = getelementptr half, ptr addrspace(3) %gep0, i32 %idx
+  // as:
+  // %gepx0 = getelementptr half, ptr addrspace(3) %ptr, i32 %idx
+  // %gepx1 = getelementptr half, ptr addrspace(3) %gepx0, i32 8
+  auto *GEPX0 =
+      GetElementPtrInst::Create(ResTy, BaseGEP->getPointerOperand(), Idx);
+  GEPX0->setIsInBounds(BaseGEP->isInBounds());
+  GEPX0->insertBefore(GEP->getIterator());
+  auto *GEPX1 = GetElementPtrInst::Create(BaseResTy, GEPX0, BaseIdx);
+  GEPX1->setIsInBounds(GEP->isInBounds());
+  GEPX1->insertBefore(GEP->getIterator());
+  GEPX1->takeName(GEP);
+
+  GEP->replaceAllUsesWith(GEPX1);
+  RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+  Changed = true;
+  return true;
+}
+
+void SinkGEPConstOffsetPass::printPipeline(
+    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+  static_cast<PassInfoMixin<SinkGEPConstOffsetPass> *>(this)
+      ->printPipeline(OS, MapClassName2PassName);
+}
+
+PreservedAnalyses
+SinkGEPConstOffsetPass::run(Function &F, FunctionAnalysisManager &AM) {
+  SinkGEPConstOffset Impl;
+  if (!Impl.run(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 29736b62f2c00..9c18b76f7d972 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -487,6 +487,7 @@
 ; GCN-O1-OPTS-NEXT:      Scalar Evolution Analysis
 ; GCN-O1-OPTS-NEXT:      Loop Data Prefetch
 ; GCN-O1-OPTS-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O1-OPTS-NEXT:      Sink const offsets down the GEP chain to the tail for reduction of register usage
 ; GCN-O1-OPTS-NEXT:      Scalar Evolution Analysis
 ; GCN-O1-OPTS-NEXT:      Straight line strength reduction
 ; GCN-O1-OPTS-NEXT:      Early CSE
@@ -794,6 +795,7 @@
 ; GCN-O2-NEXT:      Natural Loop Information
 ; GCN-O2-NEXT:      AMDGPU Promote Alloca
 ; GCN-O2-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O2-NEXT:      Sink const offsets down the GEP chain to the tail for reduction of register usage
 ; GCN-O2-NEXT:      Scalar Evolution Analysis
 ; GCN-O2-NEXT:      Straight line strength reduction
 ; GCN-O2-NEXT:      Early CSE
@@ -1111,6 +1113,7 @@
 ; GCN-O3-NEXT:      Natural Loop Information
 ; GCN-O3-NEXT:      AMDGPU Promote Alloca
 ; GCN-O3-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
+; GCN-O3-NEXT:      Sink const offsets down the GEP chain to the tail for reduction of register usage
 ; GCN-O3-NEXT:      Scalar Evolution Analysis
 ; GCN-O3-NEXT:      Straight line strength reduction
 ; GCN-O3-NEXT:      Basic Alias Analysis (stateless AA impl)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index 565ad295ebbb3..4f5d93d767a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -21,15 +21,15 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in,
 ; GCN-NEXT:    ; iglp_opt mask(0x00000000)
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_add_u32_e32 v1, s0, v0
-; GCN-NEXT:    v_add_u32_e32 v2, 0x6000, v1
-; GCN-NEXT:    ds_read_b128 a[28:31], v2 offset:57456
-; GCN-NEXT:    ds_read_b128 a[24:27], v2 offset:57440
-; GCN-NEXT:    ds_read_b128 a[20:23], v2 offset:57424
-; GCN-NEXT:    ds_read_b128 a[16:19], v2 offset:57408
-; GCN-NEXT:    ds_read_b128 a[0:3], v2 offset:57344
-; GCN-NEXT:    ds_read_b128 a[4:7], v2 offset:57360
-; GCN-NEXT:    ds_read_b128 a[8:11], v2 offset:57376
-; GCN-NEXT:    ds_read_b128 a[12:15], v2 offset:57392
+; GCN-NEXT:    v_add_u32_e32 v2, 0x14000, v1
+; GCN-NEXT:    ds_read_b128 a[28:31], v2 offset:112
+; GCN-NEXT:    ds_read_b128 a[24:27], v2 offset:96
+; GCN-NEXT:    ds_read_b128 a[20:23], v2 offset:80
+; GCN-NEXT:    ds_read_b128 a[16:19], v2 offset:64
+; GCN-NEXT:    ds_read_b128 a[0:3], v2
+; GCN-NEXT:    ds_read_b128 a[4:7], v2 offset:16
+; GCN-NEXT:    ds_read_b128 a[8:11], v2 offset:32
+; GCN-NEXT:    ds_read_b128 a[12:15], v2 offset:48
 ; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
 ; GCN-NEXT:    ds_read_b128 a[60:63], v1 offset:49264
 ; GCN-NEXT:    ds_read_b128 a[56:59], v1 offset:49248
@@ -199,17 +199,17 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
 ; GCN-NEXT:    ds_read_b128 a[72:75], v1 offset:49184
 ; GCN-NEXT:    ds_read_b128 a[68:71], v1 offset:49168
 ; GCN-NEXT:    ds_read_b128 a[64:67], v1 offset:49152
-; GCN-NEXT:    v_add_u32_e32 v1, 0x6000, v1
+; GCN-NEXT:    v_add_u32_e32 v1, 0x14000, v1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mfma_f32_32x32x1f32 a[64:95], v2, v3, a[64:95]
-; GCN-NEXT:    ds_read_b128 a[60:63], v1 offset:57456
-; GCN-NEXT:    ds_read_b128 a[56:59], v1 offset:57440
-; GCN-NEXT:    ds_read_b128 a...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/140657