[llvm-branch-commits] [llvm] release/18.x: [GlobalISel] Fix store merging incorrectly classifying an unknown index expr as 0. (#90375) (PR #90673)

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed May 1 11:35:19 PDT 2024


https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/90673

>From ece9d35f1a705ab8d66895c6d985907f2b9a2c0c Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Wed, 1 May 2024 05:42:14 +0800
Subject: [PATCH] [GlobalISel] Fix store merging incorrectly classifying an
 unknown index expr as 0. (#90375)

During analysis, we incorrectly leave the offset part of an address info
struct
as zero, when in actual fact we failed to decompose it into base +
offset.
This results in incorrectly assuming that the address is adjacent to
another store
addr. To fix this we wrap the offset in an optional<> so we can
distinguish between
real zero and unknown.

Fixes issue #90242

(cherry picked from commit 19f4d68252b70c81ebb1686a5a31069eda5373de)
---
 .../llvm/CodeGen/GlobalISel/LoadStoreOpt.h    | 20 ++++--
 llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp  | 48 ++++++++------
 .../AArch64/GlobalISel/store-merging.ll       | 19 ++++++
 .../AArch64/GlobalISel/store-merging.mir      | 62 ++++++++++++++++---
 4 files changed, 115 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
index 0f20a33f3a755c..7990997835d019 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -35,11 +35,23 @@ struct LegalityQuery;
 class MachineRegisterInfo;
 namespace GISelAddressing {
 /// Helper struct to store a base, index and offset that forms an address
-struct BaseIndexOffset {
+class BaseIndexOffset {
+private:
   Register BaseReg;
   Register IndexReg;
-  int64_t Offset = 0;
-  bool IsIndexSignExt = false;
+  std::optional<int64_t> Offset;
+
+public:
+  BaseIndexOffset() = default;
+  Register getBase() { return BaseReg; }
+  Register getBase() const { return BaseReg; }
+  Register getIndex() { return IndexReg; }
+  Register getIndex() const { return IndexReg; }
+  void setBase(Register NewBase) { BaseReg = NewBase; }
+  void setIndex(Register NewIndex) { IndexReg = NewIndex; }
+  void setOffset(std::optional<int64_t> NewOff) { Offset = NewOff; }
+  bool hasValidOffset() const { return Offset.has_value(); }
+  int64_t getOffset() const { return *Offset; }
 };
 
 /// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
@@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass {
     // order stores are writing to incremeneting consecutive addresses. So when
     // we walk the block in reverse order, the next eligible store must write to
     // an offset one store width lower than CurrentLowestOffset.
-    uint64_t CurrentLowestOffset;
+    int64_t CurrentLowestOffset;
     SmallVector<GStore *> Stores;
     // A vector of MachineInstr/unsigned pairs to denote potential aliases that
     // need to be checked before the candidate is considered safe to merge. The
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 246aa88b09acf6..ee499c41c558c3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
                                                 MachineRegisterInfo &MRI) {
   BaseIndexOffset Info;
   Register PtrAddRHS;
-  if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
-    Info.BaseReg = Ptr;
-    Info.IndexReg = Register();
-    Info.IsIndexSignExt = false;
+  Register BaseReg;
+  if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
+    Info.setBase(Ptr);
+    Info.setOffset(0);
     return Info;
   }
-
+  Info.setBase(BaseReg);
   auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
   if (RHSCst)
-    Info.Offset = RHSCst->Value.getSExtValue();
+    Info.setOffset(RHSCst->Value.getSExtValue());
 
   // Just recognize a simple case for now. In future we'll need to match
   // indexing patterns for base + index + constant.
-  Info.IndexReg = PtrAddRHS;
-  Info.IsIndexSignExt = false;
+  Info.setIndex(PtrAddRHS);
   return Info;
 }
 
@@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
   BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
   BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
 
-  if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+  if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
     return false;
 
   int64_t Size1 = LdSt1->getMemSize();
   int64_t Size2 = LdSt2->getMemSize();
 
   int64_t PtrDiff;
-  if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
-    PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+  if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
+      BasePtr1.hasValidOffset()) {
+    PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
     // If the size of memory access is unknown, do not use it to do analysis.
     // One example of unknown size memory access is to load/store scalable
     // vector objects on the stack.
@@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
   // able to calculate their relative offset if at least one arises
   // from an alloca. However, these allocas cannot overlap and we
   // can infer there is no alias.
-  auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
-  auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+  auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
+  auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
   if (!Base0Def || !Base1Def)
     return false; // Couldn't tell anything.
 
@@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
 
   Register StoreAddr = StoreMI.getPointerReg();
   auto BIO = getPointerInfo(StoreAddr, *MRI);
-  Register StoreBase = BIO.BaseReg;
-  uint64_t StoreOffCst = BIO.Offset;
+  Register StoreBase = BIO.getBase();
   if (C.Stores.empty()) {
+    C.BasePtr = StoreBase;
+    if (!BIO.hasValidOffset()) {
+      C.CurrentLowestOffset = 0;
+    } else {
+      C.CurrentLowestOffset = BIO.getOffset();
+    }
     // This is the first store of the candidate.
     // If the offset can't possibly allow for a lower addressed store with the
     // same base, don't bother adding it.
-    if (StoreOffCst < ValueTy.getSizeInBytes())
+    if (BIO.hasValidOffset() &&
+        BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
       return false;
-    C.BasePtr = StoreBase;
-    C.CurrentLowestOffset = StoreOffCst;
     C.Stores.emplace_back(&StoreMI);
     LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
                       << StoreMI);
@@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
   // writes to the next lowest adjacent address.
   if (C.BasePtr != StoreBase)
     return false;
-  if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
-      static_cast<uint64_t>(StoreOffCst))
+  // If we don't have a valid offset, we can't guarantee to be an adjacent
+  // offset.
+  if (!BIO.hasValidOffset())
+    return false;
+  if ((C.CurrentLowestOffset -
+       static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
     return false;
 
   // This writes to an adjacent address. Allow it.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
index 07744dada4f1fa..b1166e683ec74e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
@@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p
   store i32 14, ptr %addr4
   ret i32 %safeld
 }
+
+ at G = external global [10 x i32]
+
+define void @invalid_zero_offset_no_merge(i64 %0) {
+; CHECK-LABEL: invalid_zero_offset_no_merge:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x8, _G at GOTPAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    ldr x8, [x8, _G at GOTPAGEOFF]
+; CHECK-NEXT:    str wzr, [x8, x0, lsl #2]
+; CHECK-NEXT:    str wzr, [x8, #4]
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
+  %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
+  store i32 0, ptr %2, align 4
+  store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
index e98e1ce599f2f6..69457a8cc0c19f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
@@ -162,6 +162,13 @@
     ret void
   }
 
+  @G = external global [10 x i32]
+  define void @invalid_zero_offset_no_merge(i64 %0) {
+    %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
+    store i32 0, ptr %2, align 4
+    store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
+    ret void
+  }
 ...
 ---
 name:            test_simple_2xs8
@@ -582,13 +589,11 @@ liveins:
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
+# The store to ptr2 prevents merging into a single store.
+# We can still merge the stores into addr1 and addr2.
 body:             |
   bb.1 (%ir-block.0):
     liveins: $x0, $x1
-
-    ; The store to ptr2 prevents merging into a single store.
-    ; We can still merge the stores into addr1 and addr2.
-
     ; CHECK-LABEL: name: test_alias_4xs16
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
@@ -639,10 +644,10 @@ liveins:
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
+# Here store of 5 and 9 can be merged, others have aliasing barriers.
 body:             |
   bb.1 (%ir-block.0):
     liveins: $x0, $x1, $x2
-    ; Here store of 5 and 9 can be merged, others have aliasing barriers.
     ; CHECK-LABEL: name: test_alias2_4xs16
     ; CHECK: liveins: $x0, $x1, $x2
     ; CHECK-NEXT: {{  $}}
@@ -698,12 +703,11 @@ liveins:
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
+# No merging can be done here.
 body:             |
   bb.1 (%ir-block.0):
     liveins: $x0, $x1, $x2, $x3
 
-    ; No merging can be done here.
-
     ; CHECK-LABEL: name: test_alias3_4xs16
     ; CHECK: liveins: $x0, $x1, $x2, $x3
     ; CHECK-NEXT: {{  $}}
@@ -767,12 +771,10 @@ stack:
   - { id: 0, name: a1, size: 24, alignment: 4 }
   - { id: 1, name: a2, size: 4, alignment: 4 }
 machineFunctionInfo: {}
+# Can merge because the load is from a different alloca and can't alias.
 body:             |
   bb.1 (%ir-block.0):
     liveins: $x0
-
-    ; Can merge because the load is from a different alloca and can't alias.
-
     ; CHECK-LABEL: name: test_alias_allocas_2xs32
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
@@ -826,3 +828,43 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            invalid_zero_offset_no_merge
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1 (%ir-block.1):
+    liveins: $x0
+
+    ; CHECK-LABEL: name: invalid_zero_offset_no_merge
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64)
+    ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
+    ; CHECK-NEXT: RET_ReallyLR
+    %0:_(s64) = COPY $x0
+    %9:_(s64) = G_CONSTANT i64 2
+    %3:_(s64) = G_SHL %0, %9(s64)
+    %1:_(p0) = G_GLOBAL_VALUE @G
+    %4:_(p0) = G_PTR_ADD %1, %3(s64)
+    %6:_(s32) = G_CONSTANT i32 0
+    G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2)
+    %8:_(s64) = G_CONSTANT i64 4
+    %7:_(p0) = nuw G_PTR_ADD %1, %8(s64)
+    G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
+    RET_ReallyLR
+
+...



More information about the llvm-branch-commits mailing list