[llvm] [InferAddressSpaces] Infer pointer stored and then loaded from global variable (PR #159755)

Wenju He via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 25 19:01:21 PST 2025


https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/159755

>From 349dbc623cf7c490e969289b716fb94b60f71e79 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 19 Sep 2025 12:42:34 +0200
Subject: [PATCH 01/12] [InferAddressSpaces] Infer pointer stored and then
 loaded from global variable

Load of a global variable producing a (generic) pointer is treated as an
address expression if every user of the global are local loads or stores
in the same function, with the store actually writing to the global.

The load's pointer AS is inferred from the stored pointer operands.

The test is reduced from `SYCL-CTS/test_hierarchical hierarchical_implicit_barriers`.
---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 52 +++++++++++++++++--
 .../AMDGPU/gv-store-load.ll                   | 51 ++++++++++++++++++
 2 files changed, 100 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3ad87545953ff..179820bf3eb3e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -304,10 +304,24 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
          (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS));
 }
 
+// Returns true if every user of a given GV are "simple" loads or stores in the
+// same function, with the store actually writing to GV.
+static bool isLocallyAccessedBySimpleLoadsStores(const GlobalVariable *GV,
+                                                 const Function *F) {
+  return all_of(GV->users(), [=](const User *U) {
+    if (const auto *SI = dyn_cast<StoreInst>(U))
+      return SI->getPointerOperand() == GV && SI->getFunction() == F;
+    if (const auto *LI = dyn_cast<LoadInst>(U))
+      return LI->getFunction() == F;
+    return false;
+  });
+}
+
 // Returns true if V is an address expression.
 // TODO: Currently, we only consider:
 //   - arguments
 //   - phi, bitcast, addrspacecast, and getelementptr operators
+//   - load
 static bool isAddressExpression(const Value &V, const DataLayout &DL,
                                 const TargetTransformInfo *TTI) {
 
@@ -335,6 +349,16 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   }
   case Instruction::IntToPtr:
     return isNoopPtrIntCastPair(Op, DL, TTI);
+  case Instruction::Load: {
+    const auto *LI = cast<LoadInst>(Op);
+    if (LI->getType()->isPtrOrPtrVectorTy()) {
+      // Heuristic: treat load-of-GV as an address expression only if the GV is
+      // locally accessed by load and store.
+      if (const auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
+        return isLocallyAccessedBySimpleLoadsStores(GV, LI->getFunction());
+    }
+    return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+  }
   default:
     // That value is an address expression if it has an assumed address space.
     return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
@@ -342,6 +366,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 }
 
 // Returns the pointer operands of V.
+// If V is a load from a global variable G, also collect the pointer values
+// stored into G.
 //
 // Precondition: V is an address expression.
 static SmallVector<Value *, 2>
@@ -373,6 +399,20 @@ getPointerOperands(const Value &V, const DataLayout &DL,
     auto *P2I = cast<Operator>(Op.getOperand(0));
     return {P2I->getOperand(0)};
   }
+  case Instruction::Load: {
+    assert(V.getType()->isPtrOrPtrVectorTy());
+    if (const auto *GV = cast<GlobalVariable>(Op.getOperand(0))) {
+      assert(isLocallyAccessedBySimpleLoadsStores(
+          GV, cast<LoadInst>(&V)->getFunction()));
+      SmallVector<Value *, 2> PtrOps;
+      for (const auto *U : GV->users())
+        if (const auto *SI = dyn_cast<StoreInst>(U);
+            SI && SI->getPointerOperand() == GV)
+          PtrOps.push_back(cast<Operator>(U)->getOperand(0));
+      return PtrOps;
+    }
+    return {};
+  }
   default:
     llvm_unreachable("Unexpected instruction type.");
   }
@@ -561,9 +601,11 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
       PushPtrOperand(GEP->getPointerOperand());
     } else if (auto *LI = dyn_cast<LoadInst>(&I))
       PushPtrOperand(LI->getPointerOperand());
-    else if (auto *SI = dyn_cast<StoreInst>(&I))
+    else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      if (SI->getValueOperand()->getType()->isPtrOrPtrVectorTy())
+        PushPtrOperand(SI->getValueOperand());
       PushPtrOperand(SI->getPointerOperand());
-    else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+    } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
       PushPtrOperand(RMW->getPointerOperand());
     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
       PushPtrOperand(CmpX->getPointerOperand());
@@ -755,6 +797,10 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
     // back.
     return new AddrSpaceCastInst(Src, NewPtrType);
   }
+  case Instruction::Load:
+    if (I->getType()->isPtrOrPtrVectorTy())
+      return new AddrSpaceCastInst(I, NewPtrType);
+    return nullptr;
   default:
     llvm_unreachable("Unexpected opcode");
   }
@@ -866,7 +912,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
         I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
     if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
       if (NewI->getParent() == nullptr) {
-        NewI->insertBefore(I->getIterator());
+        NewI->insertAfter(I->getIterator());
         NewI->takeName(I);
         NewI->setDebugLoc(I->getDebugLoc());
       }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll
new file mode 100644
index 0000000000000..8c3274758b929
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+ at WGCopy = internal unnamed_addr addrspace(3) global ptr poison, align 16
+
+; Function Attrs: nounwind
+define void @gv_store_load() {
+; CHECK-LABEL: define void @gv_store_load() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[AGG_TMP1617:%.*]] = alloca i64, align 8, addrspace(5)
+; CHECK-NEXT:    [[IS:%.*]] = call i1 @is_leader()
+; CHECK-NEXT:    br i1 [[IS]], label %[[LEADER:.*]], label %[[MERGE:.*]]
+; CHECK:       [[LEADER]]:
+; CHECK-NEXT:    br label %[[MERGE]]
+; CHECK:       [[MERGE]]:
+; CHECK-NEXT:    [[AGG_TMP_I_SROA_0_0:%.*]] = phi ptr addrspace(5) [ [[AGG_TMP1617]], %[[LEADER]] ], [ undef, %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_TMP_I_SROA_0_0]] to ptr
+; CHECK-NEXT:    br i1 [[IS]], label %[[LEADER_I:.*]], label %[[EXIT:.*]]
+; CHECK:       [[LEADER_I]]:
+; CHECK-NEXT:    store ptr [[TMP0]], ptr addrspace(3) @WGCopy, align 16
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr addrspace(3) @WGCopy, align 16
+; CHECK-NEXT:    [[AGG_TMP_I_SROA_0_0_COPYLOAD:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(5) [[AGG_TMP_I_SROA_0_0_COPYLOAD]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %agg.tmp1617 = alloca i64, align 8, addrspace(5)
+  %is = call i1 @is_leader()
+  br i1 %is, label %leader, label %merge
+
+leader:                                      ; preds = %entry
+  %group.ascast.i = addrspacecast ptr addrspace(5) %agg.tmp1617 to ptr
+  br label %merge
+
+merge:                                          ; preds = %leader, %entry
+  %agg.tmp.i.sroa.0.0 = phi ptr [ %group.ascast.i, %leader ], [ undef, %entry ]
+  br i1 %is, label %leader.i, label %exit
+
+leader.i:                                        ; preds = %merge
+  store ptr %agg.tmp.i.sroa.0.0, ptr addrspace(3) @WGCopy, align 16
+  br label %exit
+
+exit: ; preds = %leader.i, %merge
+  %agg.tmp.i.sroa.0.0.copyload = load ptr, ptr addrspace(3) @WGCopy, align 16
+  %15 = load i64, ptr %agg.tmp.i.sroa.0.0.copyload, align 8
+  ret void
+}
+
+declare i1 @is_leader()

>From 4d2ad490a531b5a5714ad3d1d6fa24e579ed2310 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 19 Sep 2025 12:56:30 +0200
Subject: [PATCH 02/12] update per copilot review

---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 179820bf3eb3e..f2d63fa9ffa82 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -406,8 +406,7 @@ getPointerOperands(const Value &V, const DataLayout &DL,
           GV, cast<LoadInst>(&V)->getFunction()));
       SmallVector<Value *, 2> PtrOps;
       for (const auto *U : GV->users())
-        if (const auto *SI = dyn_cast<StoreInst>(U);
-            SI && SI->getPointerOperand() == GV)
+        if (isa<StoreInst>(U))
           PtrOps.push_back(cast<Operator>(U)->getOperand(0));
       return PtrOps;
     }

>From f689f415a1c451604a2594003cb0b0f46b222f94 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 22 Sep 2025 10:43:22 +0200
Subject: [PATCH 03/12] add vector test and negative tests, update
 PtrOps.push_back

---
 .../Transforms/Scalar/InferAddressSpaces.cpp  |   4 +-
 .../AMDGPU/gv-store-load.ll                   | 168 ++++++++++++++++--
 2 files changed, 151 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f2d63fa9ffa82..25b31684ceaa2 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -406,8 +406,8 @@ getPointerOperands(const Value &V, const DataLayout &DL,
           GV, cast<LoadInst>(&V)->getFunction()));
       SmallVector<Value *, 2> PtrOps;
       for (const auto *U : GV->users())
-        if (isa<StoreInst>(U))
-          PtrOps.push_back(cast<Operator>(U)->getOperand(0));
+        if (auto *SI = dyn_cast<StoreInst>(U))
+          PtrOps.push_back(const_cast<Value *>(SI->getValueOperand()));
       return PtrOps;
     }
     return {};
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll
index 8c3274758b929..a3bcc2d7c63fb 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/gv-store-load.ll
@@ -1,51 +1,181 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
 
- at WGCopy = internal unnamed_addr addrspace(3) global ptr poison, align 16
+ at WGCopy = internal addrspace(3) global ptr poison, align 8
+ at WGCopy1 = internal addrspace(3) global ptr poison, align 8
+ at WGCopy2 = internal addrspace(3) global ptr poison, align 8
+ at WGCopy3 = internal addrspace(3) global ptr poison, align 8
+ at WGCopy4 = internal addrspace(3) global ptr poison, align 8
+ at WGCopyVec = internal addrspace(3) global <2 x ptr> poison, align 16
 
-; Function Attrs: nounwind
+; load ptr AS is inferred.
 define void @gv_store_load() {
 ; CHECK-LABEL: define void @gv_store_load() {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[AGG_TMP1617:%.*]] = alloca i64, align 8, addrspace(5)
+; CHECK-NEXT:    [[AGG:%.*]] = alloca i64, align 8, addrspace(5)
 ; CHECK-NEXT:    [[IS:%.*]] = call i1 @is_leader()
 ; CHECK-NEXT:    br i1 [[IS]], label %[[LEADER:.*]], label %[[MERGE:.*]]
 ; CHECK:       [[LEADER]]:
 ; CHECK-NEXT:    br label %[[MERGE]]
 ; CHECK:       [[MERGE]]:
-; CHECK-NEXT:    [[AGG_TMP_I_SROA_0_0:%.*]] = phi ptr addrspace(5) [ [[AGG_TMP1617]], %[[LEADER]] ], [ undef, %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_TMP_I_SROA_0_0]] to ptr
+; CHECK-NEXT:    [[AGG_SROA:%.*]] = phi ptr addrspace(5) [ [[AGG]], %[[LEADER]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_SROA]] to ptr
 ; CHECK-NEXT:    br i1 [[IS]], label %[[LEADER_I:.*]], label %[[EXIT:.*]]
 ; CHECK:       [[LEADER_I]]:
-; CHECK-NEXT:    store ptr [[TMP0]], ptr addrspace(3) @WGCopy, align 16
+; CHECK-NEXT:    store ptr [[TMP0]], ptr addrspace(3) @WGCopy, align 8
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr addrspace(3) @WGCopy, align 16
-; CHECK-NEXT:    [[AGG_TMP_I_SROA_0_0_COPYLOAD:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(5)
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(5) [[AGG_TMP_I_SROA_0_0_COPYLOAD]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr addrspace(3) @WGCopy, align 8
+; CHECK-NEXT:    [[AGG_SROA_COPYLOAD:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(5) [[AGG_SROA_COPYLOAD]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %agg.tmp1617 = alloca i64, align 8, addrspace(5)
+  %agg = alloca i64, align 8, addrspace(5)
   %is = call i1 @is_leader()
   br i1 %is, label %leader, label %merge
 
-leader:                                      ; preds = %entry
-  %group.ascast.i = addrspacecast ptr addrspace(5) %agg.tmp1617 to ptr
+leader:                                           ; preds = %entry
+  %group.ascast = addrspacecast ptr addrspace(5) %agg to ptr
   br label %merge
 
-merge:                                          ; preds = %leader, %entry
-  %agg.tmp.i.sroa.0.0 = phi ptr [ %group.ascast.i, %leader ], [ undef, %entry ]
+merge:                                            ; preds = %leader, %entry
+  %agg.sroa = phi ptr [ %group.ascast, %leader ], [ poison, %entry ]
   br i1 %is, label %leader.i, label %exit
 
-leader.i:                                        ; preds = %merge
-  store ptr %agg.tmp.i.sroa.0.0, ptr addrspace(3) @WGCopy, align 16
+leader.i:                                         ; preds = %merge
+  store ptr %agg.sroa, ptr addrspace(3) @WGCopy, align 8
   br label %exit
 
-exit: ; preds = %leader.i, %merge
-  %agg.tmp.i.sroa.0.0.copyload = load ptr, ptr addrspace(3) @WGCopy, align 16
-  %15 = load i64, ptr %agg.tmp.i.sroa.0.0.copyload, align 8
+exit:                                             ; preds = %leader.i, %merge
+  %agg.sroa.copyload = load ptr, ptr addrspace(3) @WGCopy, align 8
+  %val = load i64, ptr %agg.sroa.copyload, align 8
   ret void
 }
 
 declare i1 @is_leader()
+
+; vector load ptr AS is inferred.
+define void @gv_store_load_vec(<2 x ptr addrspace(5)> %private.ptr) {
+; CHECK-LABEL: define void @gv_store_load_vec(
+; CHECK-SAME: <2 x ptr addrspace(5)> [[PRIVATE_PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast <2 x ptr addrspace(5)> [[PRIVATE_PTR]] to <2 x ptr>
+; CHECK-NEXT:    store <2 x ptr> [[GROUP_ASCAST]], ptr addrspace(3) @WGCopyVec, align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x ptr>, ptr addrspace(3) @WGCopyVec, align 16
+; CHECK-NEXT:    [[AGG_SROA_COPYLOAD:%.*]] = addrspacecast <2 x ptr> [[TMP0]] to <2 x ptr addrspace(5)>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x ptr addrspace(5)> [[AGG_SROA_COPYLOAD]], <ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))>
+; CHECK-NEXT:    ret void
+;
+entry:
+  %group.ascast = addrspacecast <2 x ptr addrspace(5)> %private.ptr to <2 x ptr>
+  store <2 x ptr> %group.ascast, ptr addrspace(3) @WGCopyVec, align 16
+  %agg.sroa.copyload = load <2 x ptr>, ptr addrspace(3) @WGCopyVec, align 16
+  %cmp = icmp eq <2 x ptr> %agg.sroa.copyload, zeroinitializer
+  ret void
+}
+
+; load ptr AS is not inferred since GV has a user that is not load or store.
+define void @negative_test_gv_user_not_load_store(ptr addrspace(5) %agg) {
+; CHECK-LABEL: define void @negative_test_gv_user_not_load_store(
+; CHECK-SAME: ptr addrspace(5) [[AGG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST]], ptr addrspace(3) @WGCopy1, align 8
+; CHECK-NEXT:    [[AGG_COPYLOAD:%.*]] = load ptr, ptr addrspace(3) @WGCopy1, align 8
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, ptr [[AGG_COPYLOAD]], align 8
+; CHECK-NEXT:    [[GV_ASC:%.*]] = addrspacecast ptr addrspace(3) @WGCopy1 to ptr
+; CHECK-NEXT:    ret void
+;
+entry:
+  %group.ascast = addrspacecast ptr addrspace(5) %agg to ptr
+  store ptr %group.ascast, ptr addrspace(3) @WGCopy1, align 8
+  %agg.copyload = load ptr, ptr addrspace(3) @WGCopy1, align 8
+  %val = load i64, ptr %agg.copyload, align 8
+  %gv.asc = addrspacecast ptr addrspace(3) @WGCopy1 to ptr
+  ret void
+}
+
+; load ptr AS is not inferred since GV is not used as pointer operand in store
+; inst.
+define void @negative_test_gv_store_not_pointer_operand(ptr addrspace(5) %agg) {
+; CHECK-LABEL: define void @negative_test_gv_store_not_pointer_operand(
+; CHECK-SAME: ptr addrspace(5) [[AGG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B:%.*]] = alloca ptr addrspace(3), align 8, addrspace(5)
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST]], ptr addrspace(3) @WGCopy2, align 8
+; CHECK-NEXT:    store ptr addrspace(3) @WGCopy2, ptr addrspace(5) [[B]], align 8
+; CHECK-NEXT:    [[AGG_COPYLOAD:%.*]] = load ptr, ptr addrspace(3) @WGCopy2, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[AGG_COPYLOAD]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca ptr addrspace(3), align 8, addrspace(5)
+  %group.ascast = addrspacecast ptr addrspace(5) %agg to ptr
+  store ptr %group.ascast, ptr addrspace(3) @WGCopy2, align 8
+  store ptr addrspace(3) @WGCopy2, ptr addrspace(5) %b, align 8
+  %agg.copyload = load ptr, ptr addrspace(3) @WGCopy2, align 8
+  %val = load i64, ptr %agg.copyload, align 8
+  ret void
+}
+
+; load ptr AS is not inferred since there are multiple stores to GV and stored
+; pointers have different AS.
+define void @negative_test_gv_multi_store_different_addrspace(ptr addrspace(1) %arg) {
+; CHECK-LABEL: define void @negative_test_gv_multi_store_different_addrspace(
+; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[AGG:%.*]] = alloca i64, align 8, addrspace(5)
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast ptr addrspace(1) [[ARG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST]], ptr addrspace(3) @WGCopy3, align 8
+; CHECK-NEXT:    [[GROUP_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST1]], ptr addrspace(3) @WGCopy3, align 8
+; CHECK-NEXT:    [[AGG_COPYLOAD:%.*]] = load ptr, ptr addrspace(3) @WGCopy3, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[AGG_COPYLOAD]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %agg = alloca i64, align 8, addrspace(5)
+  %group.ascast = addrspacecast ptr addrspace(1) %arg to ptr
+  store ptr %group.ascast, ptr addrspace(3) @WGCopy3, align 8
+  %group.ascast1 = addrspacecast ptr addrspace(5) %agg to ptr
+  store ptr %group.ascast1, ptr addrspace(3) @WGCopy3, align 8
+  %agg.copyload = load ptr, ptr addrspace(3) @WGCopy3, align 8
+  %val = load i64, ptr %agg.copyload, align 8
+  ret void
+}
+
+; load ptr AS is not inferred since GV is used in two functions
+; negative_test_gv_used_in_two_funcs_foo and negative_test_gv_used_in_two_funcs_bar.
+define void @negative_test_gv_used_in_two_funcs_foo(ptr addrspace(1) %agg) {
+; CHECK-LABEL: define void @negative_test_gv_used_in_two_funcs_foo(
+; CHECK-SAME: ptr addrspace(1) [[AGG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast ptr addrspace(1) [[AGG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST]], ptr addrspace(3) @WGCopy4, align 8
+; CHECK-NEXT:    [[AGG_SROA_COPYLOAD:%.*]] = load ptr, ptr addrspace(3) @WGCopy4, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[AGG_SROA_COPYLOAD]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %group.ascast = addrspacecast ptr addrspace(1) %agg to ptr
+  store ptr %group.ascast, ptr addrspace(3) @WGCopy4, align 8
+  %agg.sroa.copyload = load ptr, ptr addrspace(3) @WGCopy4, align 8
+  %val = load i64, ptr %agg.sroa.copyload, align 8
+  ret void
+}
+
+define void @negative_test_gv_used_in_two_funcs_bar(ptr addrspace(1) %agg) {
+; CHECK-LABEL: define void @negative_test_gv_used_in_two_funcs_bar(
+; CHECK-SAME: ptr addrspace(1) [[AGG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GROUP_ASCAST:%.*]] = addrspacecast ptr addrspace(1) [[AGG]] to ptr
+; CHECK-NEXT:    store ptr [[GROUP_ASCAST]], ptr addrspace(3) @WGCopy4, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %group.ascast = addrspacecast ptr addrspace(1) %agg to ptr
+  store ptr %group.ascast, ptr addrspace(3) @WGCopy4, align 8
+  ret void
+}

>From f3bc5852667311b2f72297c680c2a25dbcb0a617 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 22 Sep 2025 16:47:34 +0800
Subject: [PATCH 04/12] Update
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Juan Manuel Martinez CaamaƱo <jmartinezcaamao at gmail.com>
---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 25b31684ceaa2..771f636407837 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -601,8 +601,9 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
     } else if (auto *LI = dyn_cast<LoadInst>(&I))
       PushPtrOperand(LI->getPointerOperand());
     else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-      if (SI->getValueOperand()->getType()->isPtrOrPtrVectorTy())
-        PushPtrOperand(SI->getValueOperand());
+      Value* V = SI->getValueOperand();
+      if (V->getType()->isPtrOrPtrVectorTy())
+        PushPtrOperand(V);
       PushPtrOperand(SI->getPointerOperand());
     } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
       PushPtrOperand(RMW->getPointerOperand());

>From 4528b594a7e275be1d3bb8a344ef16159c197301 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 22 Sep 2025 10:51:40 +0200
Subject: [PATCH 05/12] clang-format

---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 771f636407837..4187114b8f502 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -601,7 +601,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
     } else if (auto *LI = dyn_cast<LoadInst>(&I))
       PushPtrOperand(LI->getPointerOperand());
     else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-      Value* V = SI->getValueOperand();
+      Value *V = SI->getValueOperand();
       if (V->getType()->isPtrOrPtrVectorTy())
         PushPtrOperand(V);
       PushPtrOperand(SI->getPointerOperand());

>From 09d7353ee80687a5196e4998aea564d2dda44010 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 22 Sep 2025 12:14:10 +0200
Subject: [PATCH 06/12] update lower-byval-args.ll

---
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 21257e21bea9f..3b2e0ebef12f0 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -262,16 +262,15 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
 ; PTX-EMPTY:
 ; PTX-NEXT:  // %bb.0: // %entry
 ; PTX-NEXT:    mov.b64 %SPL, __local_depot5;
-; PTX-NEXT:    cvta.local.u64 %SP, %SPL;
 ; PTX-NEXT:    ld.param.b64 %rd1, [escape_ptr_gep_store_param_0];
 ; PTX-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT:    add.u64 %rd3, %SP, 0;
-; PTX-NEXT:    add.u64 %rd4, %SPL, 0;
+; PTX-NEXT:    add.u64 %rd3, %SPL, 0;
 ; PTX-NEXT:    ld.param.b32 %r1, [escape_ptr_gep_store_param_1+4];
-; PTX-NEXT:    st.local.b32 [%rd4+4], %r1;
+; PTX-NEXT:    st.local.b32 [%rd3+4], %r1;
 ; PTX-NEXT:    ld.param.b32 %r2, [escape_ptr_gep_store_param_1];
-; PTX-NEXT:    st.local.b32 [%rd4], %r2;
-; PTX-NEXT:    add.s64 %rd5, %rd3, 4;
+; PTX-NEXT:    st.local.b32 [%rd3], %r2;
+; PTX-NEXT:    add.s64 %rd4, %rd3, 4;
+; PTX-NEXT:    cvta.local.u64 %rd5, %rd4;
 ; PTX-NEXT:    st.global.b64 [%rd2], %rd5;
 ; PTX-NEXT:    ret;
 entry:

>From 1b3fba50d125d7ec870a8f4ec20c0ee3697a6716 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Wed, 24 Dec 2025 12:36:48 +0100
Subject: [PATCH 07/12] collect GV load/store into a map, do not iterate over
 GV users, simplify isAddressExpression

---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 97 +++++++++----------
 1 file changed, 45 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 4187114b8f502..44c7e2592c496 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -156,6 +156,8 @@ using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
 using PredicatedAddrSpaceMapTy =
     DenseMap<std::pair<const Value *, const Value *>, unsigned>;
 using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
+using GVToLoadStoreMapTy =
+    DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>>;
 
 class InferAddressSpaces : public FunctionPass {
   unsigned FlatAddrSpace = 0;
@@ -196,13 +198,15 @@ class InferAddressSpacesImpl {
   // false otherwise.
   bool updateAddressSpace(const Value &V,
                           ValueToAddrSpaceMapTy &InferredAddrSpace,
-                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
+                          PredicatedAddrSpaceMapTy &PredicatedAS,
+                          const GVToLoadStoreMapTy &GVToLdSt) const;
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
   void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                           ValueToAddrSpaceMapTy &InferredAddrSpace,
-                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
+                          PredicatedAddrSpaceMapTy &PredicatedAS,
+                          const GVToLoadStoreMapTy &GVToLdSt) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
 
@@ -234,7 +238,8 @@ class InferAddressSpacesImpl {
                                           PostorderStackTy &PostorderStack,
                                           DenseSet<Value *> &Visited) const;
 
-  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
+  std::pair<std::vector<WeakTrackingVH>, GVToLoadStoreMapTy>
+  collectFlatAddressExpressions(Function &F) const;
 
   Value *cloneValueWithNewAddressSpace(
       Value *V, unsigned NewAddrSpace,
@@ -304,19 +309,6 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
          (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS));
 }
 
-// Returns true if every user of a given GV are "simple" loads or stores in the
-// same function, with the store actually writing to GV.
-static bool isLocallyAccessedBySimpleLoadsStores(const GlobalVariable *GV,
-                                                 const Function *F) {
-  return all_of(GV->users(), [=](const User *U) {
-    if (const auto *SI = dyn_cast<StoreInst>(U))
-      return SI->getPointerOperand() == GV && SI->getFunction() == F;
-    if (const auto *LI = dyn_cast<LoadInst>(U))
-      return LI->getFunction() == F;
-    return false;
-  });
-}
-
 // Returns true if V is an address expression.
 // TODO: Currently, we only consider:
 //   - arguments
@@ -342,6 +334,7 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   case Instruction::GetElementPtr:
     return true;
   case Instruction::Select:
+  case Instruction::Load:
     return Op->getType()->isPtrOrPtrVectorTy();
   case Instruction::Call: {
     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
@@ -349,16 +342,6 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   }
   case Instruction::IntToPtr:
     return isNoopPtrIntCastPair(Op, DL, TTI);
-  case Instruction::Load: {
-    const auto *LI = cast<LoadInst>(Op);
-    if (LI->getType()->isPtrOrPtrVectorTy()) {
-      // Heuristic: treat load-of-GV as an address expression only if the GV is
-      // locally accessed by load and store.
-      if (const auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
-        return isLocallyAccessedBySimpleLoadsStores(GV, LI->getFunction());
-    }
-    return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
-  }
   default:
     // That value is an address expression if it has an assumed address space.
     return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
@@ -370,9 +353,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 // stored into G.
 //
 // Precondition: V is an address expression.
-static SmallVector<Value *, 2>
-getPointerOperands(const Value &V, const DataLayout &DL,
-                   const TargetTransformInfo *TTI) {
+static SmallVector<Value *, 2> getPointerOperands(
+    const Value &V, const DataLayout &DL, const TargetTransformInfo *TTI,
+    const DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>> &GVToLdSt) {
   if (isa<Argument>(&V))
     return {};
 
@@ -400,14 +383,16 @@ getPointerOperands(const Value &V, const DataLayout &DL,
     return {P2I->getOperand(0)};
   }
   case Instruction::Load: {
-    assert(V.getType()->isPtrOrPtrVectorTy());
-    if (const auto *GV = cast<GlobalVariable>(Op.getOperand(0))) {
-      assert(isLocallyAccessedBySimpleLoadsStores(
-          GV, cast<LoadInst>(&V)->getFunction()));
+    if (auto *GV = dyn_cast<GlobalVariable>(Op.getOperand(0))) {
       SmallVector<Value *, 2> PtrOps;
-      for (const auto *U : GV->users())
-        if (auto *SI = dyn_cast<StoreInst>(U))
-          PtrOps.push_back(const_cast<Value *>(SI->getValueOperand()));
+      // Only consider GV that is exclusively used within current function.
+      auto It = GVToLdSt.find(GV);
+      assert(It != GVToLdSt.end() && "Expected GV to be in the map");
+      if (GV->getNumUses() == It->second.size()) {
+        for (auto *I : It->second)
+          if (auto *SI = dyn_cast<StoreInst>(I))
+            PtrOps.push_back(SI->getValueOperand());
+      }
       return PtrOps;
     }
     return {};
@@ -580,13 +565,14 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
 
 // Returns all flat address expressions in function F. The elements are ordered
 // in postorder.
-std::vector<WeakTrackingVH>
+std::pair<std::vector<WeakTrackingVH>, GVToLoadStoreMapTy>
 InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
   // This function implements a non-recursive postorder traversal of a partial
   // use-def graph of function F.
   PostorderStackTy PostorderStack;
   // The set of visited expressions.
   DenseSet<Value *> Visited;
+  GVToLoadStoreMapTy GVToLdSt;
 
   auto PushPtrOperand = [&](Value *Ptr) {
     appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, Visited);
@@ -598,13 +584,19 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
   for (Instruction &I : instructions(F)) {
     if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
       PushPtrOperand(GEP->getPointerOperand());
-    } else if (auto *LI = dyn_cast<LoadInst>(&I))
-      PushPtrOperand(LI->getPointerOperand());
-    else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+    } else if (auto *LI = dyn_cast<LoadInst>(&I)) {
+      Value *PtrOp = LI->getPointerOperand();
+      PushPtrOperand(PtrOp);
+      if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))
+        GVToLdSt[GV].push_back(LI);
+    } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
       Value *V = SI->getValueOperand();
       if (V->getType()->isPtrOrPtrVectorTy())
         PushPtrOperand(V);
-      PushPtrOperand(SI->getPointerOperand());
+      Value *PtrOp = SI->getPointerOperand();
+      PushPtrOperand(PtrOp);
+      if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))
+        GVToLdSt[GV].push_back(SI);
     } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
       PushPtrOperand(RMW->getPointerOperand());
     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
@@ -650,13 +642,14 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
     PostorderStack.back().setInt(true);
     // Skip values with an assumed address space.
     if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
-      for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
+      for (Value *PtrOperand :
+           getPointerOperands(*TopVal, *DL, TTI, GVToLdSt)) {
         appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                      Visited);
       }
     }
   }
-  return Postorder;
+  return std::make_pair(Postorder, GVToLdSt);
 }
 
 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
@@ -798,9 +791,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
     return new AddrSpaceCastInst(Src, NewPtrType);
   }
   case Instruction::Load:
-    if (I->getType()->isPtrOrPtrVectorTy())
-      return new AddrSpaceCastInst(I, NewPtrType);
-    return nullptr;
+    return new AddrSpaceCastInst(I, NewPtrType);
   default:
     llvm_unreachable("Unexpected opcode");
   }
@@ -954,13 +945,13 @@ bool InferAddressSpacesImpl::run(Function &CurFn) {
   }
 
   // Collects all flat address expressions in postorder.
-  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(*F);
+  auto [Postorder, GVToLdSt] = collectFlatAddressExpressions(*F);
 
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
   ValueToAddrSpaceMapTy InferredAddrSpace;
   PredicatedAddrSpaceMapTy PredicatedAS;
-  inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
+  inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS, GVToLdSt);
 
   // Changes the address spaces of the flat address expressions who are inferred
   // to point to a specific address space.
@@ -973,7 +964,8 @@ bool InferAddressSpacesImpl::run(Function &CurFn) {
 void InferAddressSpacesImpl::inferAddressSpaces(
     ArrayRef<WeakTrackingVH> Postorder,
     ValueToAddrSpaceMapTy &InferredAddrSpace,
-    PredicatedAddrSpaceMapTy &PredicatedAS) const {
+    PredicatedAddrSpaceMapTy &PredicatedAS,
+    const GVToLoadStoreMapTy &GVToLdSt) const {
   SetVector<Value *> Worklist(llvm::from_range, Postorder);
   // Initially, all expressions are in the uninitialized address space.
   for (Value *V : Postorder)
@@ -984,7 +976,7 @@ void InferAddressSpacesImpl::inferAddressSpaces(
 
     // Try to update the address space of the stack top according to the
     // address spaces of its operands.
-    if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
+    if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS, GVToLdSt))
       continue;
 
     for (Value *User : V->users()) {
@@ -1036,7 +1028,8 @@ InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr,
 
 bool InferAddressSpacesImpl::updateAddressSpace(
     const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
-    PredicatedAddrSpaceMapTy &PredicatedAS) const {
+    PredicatedAddrSpaceMapTy &PredicatedAS,
+    const GVToLoadStoreMapTy &GVToLdSt) const {
   assert(InferredAddrSpace.count(&V));
 
   LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << V << '\n');
@@ -1055,7 +1048,7 @@ bool InferAddressSpacesImpl::updateAddressSpace(
   } else {
     // Otherwise, infer the address space from its pointer operands.
     SmallVector<Constant *, 2> ConstantPtrOps;
-    for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
+    for (Value *PtrOperand : getPointerOperands(V, *DL, TTI, GVToLdSt)) {
       auto I = InferredAddrSpace.find(PtrOperand);
       unsigned OperandAS;
       if (I == InferredAddrSpace.end()) {

>From 41a07fc4d940581510ba75c5f62f9ad6f67abe13 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Wed, 24 Dec 2025 13:13:31 +0100
Subject: [PATCH 08/12] refactor stored pointer retrieving into new function
 getStoredPointerOperands

---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 54 ++++++++++++-------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 44c7e2592c496..c578d8684ac09 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -349,13 +349,11 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 }
 
 // Returns the pointer operands of V.
-// If V is a load from a global variable G, also collect the pointer values
-// stored into G.
 //
 // Precondition: V is an address expression.
-static SmallVector<Value *, 2> getPointerOperands(
-    const Value &V, const DataLayout &DL, const TargetTransformInfo *TTI,
-    const DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>> &GVToLdSt) {
+static SmallVector<Value *, 2>
+getPointerOperands(const Value &V, const DataLayout &DL,
+                   const TargetTransformInfo *TTI) {
   if (isa<Argument>(&V))
     return {};
 
@@ -383,18 +381,7 @@ static SmallVector<Value *, 2> getPointerOperands(
     return {P2I->getOperand(0)};
   }
   case Instruction::Load: {
-    if (auto *GV = dyn_cast<GlobalVariable>(Op.getOperand(0))) {
-      SmallVector<Value *, 2> PtrOps;
-      // Only consider GV that is exclusively used within current function.
-      auto It = GVToLdSt.find(GV);
-      assert(It != GVToLdSt.end() && "Expected GV to be in the map");
-      if (GV->getNumUses() == It->second.size()) {
-        for (auto *I : It->second)
-          if (auto *SI = dyn_cast<StoreInst>(I))
-            PtrOps.push_back(SI->getValueOperand());
-      }
-      return PtrOps;
-    }
+    assert(Op.getType()->isPtrOrPtrVectorTy());
     return {};
   }
   default:
@@ -402,6 +389,30 @@ static SmallVector<Value *, 2> getPointerOperands(
   }
 }
 
+// Given a load from a global variable G, collect the pointer values
+// stored into G.
+static SmallVector<Value *, 2> getStoredPointerOperands(
+    const Value &V,
+    const DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>> &GVToLdSt) {
+  const Operator &Op = cast<Operator>(V);
+  if (Op.getOpcode() != Instruction::Load)
+    return {};
+  auto *GV = dyn_cast<GlobalVariable>(Op.getOperand(0));
+  if (!GV)
+    return {};
+
+  SmallVector<Value *, 2> PtrOps;
+  // Only consider GV that is exclusively used within current function.
+  auto It = GVToLdSt.find(GV);
+  assert(It != GVToLdSt.end() && "Expected GV to be in the map");
+  if (GV->getNumUses() == It->second.size()) {
+    for (auto *I : It->second)
+      if (auto *SI = dyn_cast<StoreInst>(I))
+        PtrOps.push_back(SI->getValueOperand());
+  }
+  return PtrOps;
+}
+
 bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
                                                       Value *OldV,
                                                       Value *NewV) const {
@@ -642,8 +653,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
     PostorderStack.back().setInt(true);
     // Skip values with an assumed address space.
     if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
-      for (Value *PtrOperand :
-           getPointerOperands(*TopVal, *DL, TTI, GVToLdSt)) {
+      for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
         appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                      Visited);
       }
@@ -1048,7 +1058,11 @@ bool InferAddressSpacesImpl::updateAddressSpace(
   } else {
     // Otherwise, infer the address space from its pointer operands.
     SmallVector<Constant *, 2> ConstantPtrOps;
-    for (Value *PtrOperand : getPointerOperands(V, *DL, TTI, GVToLdSt)) {
+    SmallVector<Value *, 2> PtrOperands = getPointerOperands(V, *DL, TTI);
+    SmallVector<Value *, 2> StoredPtrOperands =
+        getStoredPointerOperands(V, GVToLdSt);
+    PtrOperands.append(StoredPtrOperands.begin(), StoredPtrOperands.end());
+    for (Value *PtrOperand : PtrOperands) {
       auto I = InferredAddrSpace.find(PtrOperand);
       unsigned OperandAS;
       if (I == InferredAddrSpace.end()) {

>From 1a45f54e38a4f29c6262ed3eaa0c637b4dc947c2 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 26 Dec 2025 03:09:44 +0100
Subject: [PATCH 09/12] isAddressExpression: check store precedes load

---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 80 ++++++++++---------
 .../AMDGPU/builtin-assumed-addrspace.ll       |  2 +-
 .../AMDGPU/phinode-address-infer.ll           | 53 ++++++++++++
 3 files changed, 98 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f17dce6516dbe..4cedb0f819217 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -189,6 +189,7 @@ class InferAddressSpacesImpl {
   const DominatorTree *DT = nullptr;
   const TargetTransformInfo *TTI = nullptr;
   const DataLayout *DL = nullptr;
+  GVToLoadStoreMapTy GVToLdSt;
 
   /// Target specific address space which uses of should be replaced if
   /// possible.
@@ -198,15 +199,13 @@ class InferAddressSpacesImpl {
   // false otherwise.
   bool updateAddressSpace(const Value &V,
                           ValueToAddrSpaceMapTy &InferredAddrSpace,
-                          PredicatedAddrSpaceMapTy &PredicatedAS,
-                          const GVToLoadStoreMapTy &GVToLdSt) const;
+                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
   void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                           ValueToAddrSpaceMapTy &InferredAddrSpace,
-                          PredicatedAddrSpaceMapTy &PredicatedAS,
-                          const GVToLoadStoreMapTy &GVToLdSt) const;
+                          PredicatedAddrSpaceMapTy &PredicatedAS) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
 
@@ -238,8 +237,7 @@ class InferAddressSpacesImpl {
                                           PostorderStackTy &PostorderStack,
                                           DenseSet<Value *> &Visited) const;
 
-  std::pair<std::vector<WeakTrackingVH>, GVToLoadStoreMapTy>
-  collectFlatAddressExpressions(Function &F) const;
+  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F);
 
   Value *cloneValueWithNewAddressSpace(
       Value *V, unsigned NewAddrSpace,
@@ -314,8 +312,9 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
 //   - arguments
 //   - phi, bitcast, addrspacecast, and getelementptr operators
 //   - load
-static bool isAddressExpression(const Value &V, const DataLayout &DL,
-                                const TargetTransformInfo *TTI) {
+static bool isAddressExpression(
+    const Value &V, const DataLayout &DL, const TargetTransformInfo *TTI,
+    const DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>> &GVToLdSt) {
 
   if (const Argument *Arg = dyn_cast<Argument>(&V))
     return Arg->getType()->isPointerTy() &&
@@ -334,7 +333,6 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   case Instruction::GetElementPtr:
     return true;
   case Instruction::Select:
-  case Instruction::Load:
     return Op->getType()->isPtrOrPtrVectorTy();
   case Instruction::Call: {
     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
@@ -342,6 +340,23 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   }
   case Instruction::IntToPtr:
     return isNoopPtrIntCastPair(Op, DL, TTI);
+  case Instruction::Load: {
+    if (TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace)
+      return true;
+    if (auto *GV = dyn_cast<GlobalVariable>(Op->getOperand(0))) {
+      const SmallVectorImpl<Instruction *> &LdSts = GVToLdSt.at(GV);
+      if (GV->getNumUses() != LdSts.size())
+        return false;
+      bool StorePrecedesFirstLoad = false;
+      for (Instruction *I : LdSts) {
+        if (isa<StoreInst>(I))
+          StorePrecedesFirstLoad = true;
+        else
+          return StorePrecedesFirstLoad;
+      }
+    }
+    return false;
+  }
   default:
     // That value is an address expression if it has an assumed address space.
     return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
@@ -381,7 +396,7 @@ getPointerOperands(const Value &V, const DataLayout &DL,
     return {P2I->getOperand(0)};
   }
   case Instruction::Load: {
-    assert(Op.getType()->isPtrOrPtrVectorTy());
+    // Pointer operands are collected in function getStoredPointerOperands.
     return {};
   }
   default:
@@ -397,20 +412,14 @@ static SmallVector<Value *, 2> getStoredPointerOperands(
   const Operator &Op = cast<Operator>(V);
   if (Op.getOpcode() != Instruction::Load)
     return {};
-  auto *GV = dyn_cast<GlobalVariable>(Op.getOperand(0));
-  if (!GV)
-    return {};
-
-  SmallVector<Value *, 2> PtrOps;
-  // Only consider GV that is exclusively used within current function.
-  auto It = GVToLdSt.find(GV);
-  assert(It != GVToLdSt.end() && "Expected GV to be in the map");
-  if (GV->getNumUses() == It->second.size()) {
-    for (auto *I : It->second)
+  if (auto *GV = dyn_cast<GlobalVariable>(Op.getOperand(0))) {
+    SmallVector<Value *, 2> PtrOps;
+    for (Instruction *I : GVToLdSt.at(GV))
       if (auto *SI = dyn_cast<StoreInst>(I))
         PtrOps.push_back(SI->getValueOperand());
+    return PtrOps;
   }
-  return PtrOps;
+  return {};
 }
 
 bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
@@ -554,21 +563,23 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
   // expressions.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     // TODO: Look in non-address parts, like icmp operands.
-    if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+    if (isAddressExpression(*CE, *DL, TTI, GVToLdSt) &&
+        Visited.insert(CE).second)
       PostorderStack.emplace_back(CE, false);
 
     return;
   }
 
   if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
-      isAddressExpression(*V, *DL, TTI)) {
+      isAddressExpression(*V, *DL, TTI, GVToLdSt)) {
     if (Visited.insert(V).second) {
       PostorderStack.emplace_back(V, false);
 
       if (auto *Op = dyn_cast<Operator>(V))
         for (auto &O : Op->operands())
           if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
-            if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+            if (isAddressExpression(*CE, *DL, TTI, GVToLdSt) &&
+                Visited.insert(CE).second)
               PostorderStack.emplace_back(CE, false);
     }
   }
@@ -576,14 +587,13 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
 
 // Returns all flat address expressions in function F. The elements are ordered
 // in postorder.
-std::pair<std::vector<WeakTrackingVH>, GVToLoadStoreMapTy>
-InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
+std::vector<WeakTrackingVH>
+InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) {
   // This function implements a non-recursive postorder traversal of a partial
   // use-def graph of function F.
   PostorderStackTy PostorderStack;
   // The set of visited expressions.
   DenseSet<Value *> Visited;
-  GVToLoadStoreMapTy GVToLdSt;
 
   auto PushPtrOperand = [&](Value *Ptr) {
     appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, Visited);
@@ -659,7 +669,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
       }
     }
   }
-  return std::make_pair(Postorder, GVToLdSt);
+  return Postorder;
 }
 
 // Inserts an addrspacecast for a phi node operand, handling the proper
@@ -933,7 +943,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
     SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
   // All values in Postorder are flat address expressions.
   assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
-         isAddressExpression(*V, *DL, TTI));
+         isAddressExpression(*V, *DL, TTI, GVToLdSt));
 
   if (auto *Arg = dyn_cast<Argument>(V)) {
     // Arguments are address space casted in the function body, as we do not
@@ -994,13 +1004,13 @@ bool InferAddressSpacesImpl::run(Function &CurFn) {
   }
 
   // Collects all flat address expressions in postorder.
-  auto [Postorder, GVToLdSt] = collectFlatAddressExpressions(*F);
+  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(*F);
 
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
   ValueToAddrSpaceMapTy InferredAddrSpace;
   PredicatedAddrSpaceMapTy PredicatedAS;
-  inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS, GVToLdSt);
+  inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
 
   // Changes the address spaces of the flat address expressions who are inferred
   // to point to a specific address space.
@@ -1013,8 +1023,7 @@ bool InferAddressSpacesImpl::run(Function &CurFn) {
 void InferAddressSpacesImpl::inferAddressSpaces(
     ArrayRef<WeakTrackingVH> Postorder,
     ValueToAddrSpaceMapTy &InferredAddrSpace,
-    PredicatedAddrSpaceMapTy &PredicatedAS,
-    const GVToLoadStoreMapTy &GVToLdSt) const {
+    PredicatedAddrSpaceMapTy &PredicatedAS) const {
   SetVector<Value *> Worklist(llvm::from_range, Postorder);
   // Initially, all expressions are in the uninitialized address space.
   for (Value *V : Postorder)
@@ -1025,7 +1034,7 @@ void InferAddressSpacesImpl::inferAddressSpaces(
 
     // Try to update the address space of the stack top according to the
     // address spaces of its operands.
-    if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS, GVToLdSt))
+    if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
       continue;
 
     for (Value *User : V->users()) {
@@ -1077,8 +1086,7 @@ InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr,
 
 bool InferAddressSpacesImpl::updateAddressSpace(
     const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
-    PredicatedAddrSpaceMapTy &PredicatedAS,
-    const GVToLoadStoreMapTy &GVToLdSt) const {
+    PredicatedAddrSpaceMapTy &PredicatedAS) const {
   assert(InferredAddrSpace.count(&V));
 
   LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << V << '\n');
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
index 32dca860a7ded..5e327a65c2b2a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
@@ -276,8 +276,8 @@ define float @contradictory_assume_after_gep_same_block(ptr %p) {
 ; CHECK-NEXT:    [[WORKITEM_ID_X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP2]])
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr addrspace(3) [[GEP]], align 4
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll
index 319c26a24b271..0d5dd419d2a91 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll
@@ -1,6 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -passes='require<domtree>,infer-address-spaces' %s | FileCheck %s
 
+ at s1 = internal addrspace(3) global ptr poison, align 8
+ at s2 = internal addrspace(3) global ptr poison, align 8
+
 define void @test(ptr %lhs_ptr, ptr %rhs_ptr) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ptr [[LHS_PTR:%.*]], ptr [[RHS_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -51,5 +54,55 @@ if.sink.split:                                    ; preds = %if.else, %if.then
   ret void
 }
 
+define void @test_gv() {
+; CHECK-LABEL: define void @test_gv(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR_1:%.*]] = load ptr, ptr addrspace(3) @s1, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3)
+; CHECK-NEXT:    [[BOOL_1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_1]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_1]])
+; CHECK-NEXT:    [[PTR_2:%.*]] = load ptr, ptr addrspace(3) @s2, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR_2]] to ptr addrspace(3)
+; CHECK-NEXT:    [[BOOL_2:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_2]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_2]])
+; CHECK-NEXT:    br i1 poison, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    br label %[[IF_SINK_SPLIT:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    br label %[[IF_SINK_SPLIT]]
+; CHECK:       [[IF_SINK_SPLIT]]:
+; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP1]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    [[V_SINK:%.*]] = phi i32 [ [[V1]], %[[IF_THEN]] ], [ [[V2]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    store i32 [[V_SINK]], ptr addrspace(3) [[PTR_SINK]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr.1 = load ptr, ptr addrspace(3) @s1, align 8
+  %bool.1 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.1)
+  tail call void @llvm.assume(i1 %bool.1)
+
+  %ptr.2 = load ptr, ptr addrspace(3) @s2, align 8
+  %bool.2 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.2)
+  tail call void @llvm.assume(i1 %bool.2)
+  br i1 poison, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %v1 = load i32, ptr null, align 4
+  br label %if.sink.split
+
+if.else:                                          ; preds = %entry
+  %v2 = load i32, ptr null, align 4
+  br label %if.sink.split
+
+if.sink.split:                                    ; preds = %if.else, %if.then
+  %ptr.sink = phi ptr [ %ptr.1, %if.then ], [ %ptr.2, %if.else ]
+  %v.sink = phi i32 [ %v1, %if.then ], [ %v2, %if.else ]
+  store i32 %v.sink, ptr %ptr.sink, align 4
+  ret void
+}
+
 declare void @llvm.assume(i1 noundef)
 declare i1 @llvm.amdgcn.is.shared(ptr)

>From 5366daa3dfff7dac97dcdb2159d03b301aa1e63b Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 26 Dec 2025 10:53:14 +0800
Subject: [PATCH 10/12] Update
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 4cedb0f819217..ef3e06662883e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -611,9 +611,9 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) {
       if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))
         GVToLdSt[GV].push_back(LI);
     } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-      Value *V = SI->getValueOperand();
-      if (V->getType()->isPtrOrPtrVectorTy())
-        PushPtrOperand(V);
+      Value *StoreVal = SI->getValueOperand();
+      if (StoreVal->getType()->isPtrOrPtrVectorTy())
+        PushPtrOperand(StoreVal);
       Value *PtrOp = SI->getPointerOperand();
       PushPtrOperand(PtrOp);
       if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))

>From 47af6192bb919bd25379ec4a43113119b557ef26 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 26 Dec 2025 10:53:29 +0800
Subject: [PATCH 11/12] Update
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index ef3e06662883e..c689aeadeeaec 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -405,7 +405,8 @@ getPointerOperands(const Value &V, const DataLayout &DL,
 }
 
 // Given a load from a global variable G, collect the pointer values
-// stored into G.
+// stored into G. This only returns stored pointer operands when G is used
+// exclusively within the current function, as captured in GVToLdSt.
 static SmallVector<Value *, 2> getStoredPointerOperands(
     const Value &V,
     const DenseMap<GlobalVariable *, SmallVector<Instruction *, 4>> &GVToLdSt) {

>From e9ab1552f118ce7e1e0f4f5c55ff981ab87d34c8 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 26 Dec 2025 11:01:08 +0800
Subject: [PATCH 12/12] Update
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index c689aeadeeaec..c6d8e225d45f9 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -612,13 +612,13 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) {
       if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))
         GVToLdSt[GV].push_back(LI);
     } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-      Value *StoreVal = SI->getValueOperand();
-      if (StoreVal->getType()->isPtrOrPtrVectorTy())
-        PushPtrOperand(StoreVal);
       Value *PtrOp = SI->getPointerOperand();
       PushPtrOperand(PtrOp);
       if (auto *GV = dyn_cast<GlobalVariable>(PtrOp))
         GVToLdSt[GV].push_back(SI);
+      Value *StoreVal = SI->getValueOperand();
+      if (StoreVal->getType()->isPtrOrPtrVectorTy())
+        PushPtrOperand(StoreVal);
     } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
       PushPtrOperand(RMW->getPointerOperand());
     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))



More information about the llvm-commits mailing list