[llvm-branch-commits] [llvm] release/18.x: [GlobalISel] Fix store merging incorrectly classifying an unknown index expr as 0. (#90375) (PR #90673)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 30 14:55:41 PDT 2024
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/90673
Backport 19f4d68252b70c81ebb1686a5a31069eda5373de
Requested by: @aemerson
>From 740c8ec340a1ca74e93b5964bd3a0e5aeb0a4910 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Wed, 1 May 2024 05:42:14 +0800
Subject: [PATCH] [GlobalISel] Fix store merging incorrectly classifying an
unknown index expr as 0. (#90375)
During analysis, we incorrectly leave the offset part of an address info
struct
as zero, when in actual fact we failed to decompose it into base +
offset.
This results in incorrectly assuming that the address is adjacent to
another store
addr. To fix this we wrap the offset in an optional<> so we can
distinguish between
real zero and unknown.
Fixes issue #90242
(cherry picked from commit 19f4d68252b70c81ebb1686a5a31069eda5373de)
---
.../llvm/CodeGen/GlobalISel/LoadStoreOpt.h | 20 ++++--
llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 48 ++++++++------
.../AArch64/GlobalISel/store-merging.ll | 19 ++++++
.../AArch64/GlobalISel/store-merging.mir | 62 ++++++++++++++++---
4 files changed, 115 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
index 0f20a33f3a755c..7990997835d019 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -35,11 +35,23 @@ struct LegalityQuery;
class MachineRegisterInfo;
namespace GISelAddressing {
/// Helper struct to store a base, index and offset that forms an address
-struct BaseIndexOffset {
+class BaseIndexOffset {
+private:
Register BaseReg;
Register IndexReg;
- int64_t Offset = 0;
- bool IsIndexSignExt = false;
+ std::optional<int64_t> Offset;
+
+public:
+ BaseIndexOffset() = default;
+ Register getBase() { return BaseReg; }
+ Register getBase() const { return BaseReg; }
+ Register getIndex() { return IndexReg; }
+ Register getIndex() const { return IndexReg; }
+ void setBase(Register NewBase) { BaseReg = NewBase; }
+ void setIndex(Register NewIndex) { IndexReg = NewIndex; }
+ void setOffset(std::optional<int64_t> NewOff) { Offset = NewOff; }
+ bool hasValidOffset() const { return Offset.has_value(); }
+ int64_t getOffset() const { return *Offset; }
};
/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
@@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass {
// order stores are writing to incremeneting consecutive addresses. So when
// we walk the block in reverse order, the next eligible store must write to
// an offset one store width lower than CurrentLowestOffset.
- uint64_t CurrentLowestOffset;
+ int64_t CurrentLowestOffset;
SmallVector<GStore *> Stores;
// A vector of MachineInstr/unsigned pairs to denote potential aliases that
// need to be checked before the candidate is considered safe to merge. The
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 246aa88b09acf6..ee499c41c558c3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
MachineRegisterInfo &MRI) {
BaseIndexOffset Info;
Register PtrAddRHS;
- if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
- Info.BaseReg = Ptr;
- Info.IndexReg = Register();
- Info.IsIndexSignExt = false;
+ Register BaseReg;
+ if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
+ Info.setBase(Ptr);
+ Info.setOffset(0);
return Info;
}
-
+ Info.setBase(BaseReg);
auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
if (RHSCst)
- Info.Offset = RHSCst->Value.getSExtValue();
+ Info.setOffset(RHSCst->Value.getSExtValue());
// Just recognize a simple case for now. In future we'll need to match
// indexing patterns for base + index + constant.
- Info.IndexReg = PtrAddRHS;
- Info.IsIndexSignExt = false;
+ Info.setIndex(PtrAddRHS);
return Info;
}
@@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
- if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+ if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
return false;
int64_t Size1 = LdSt1->getMemSize();
int64_t Size2 = LdSt2->getMemSize();
int64_t PtrDiff;
- if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
- PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+ if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
+ BasePtr1.hasValidOffset()) {
+ PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
// If the size of memory access is unknown, do not use it to do analysis.
// One example of unknown size memory access is to load/store scalable
// vector objects on the stack.
@@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
- auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
- auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+ auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
+ auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
if (!Base0Def || !Base1Def)
return false; // Couldn't tell anything.
@@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
Register StoreAddr = StoreMI.getPointerReg();
auto BIO = getPointerInfo(StoreAddr, *MRI);
- Register StoreBase = BIO.BaseReg;
- uint64_t StoreOffCst = BIO.Offset;
+ Register StoreBase = BIO.getBase();
if (C.Stores.empty()) {
+ C.BasePtr = StoreBase;
+ if (!BIO.hasValidOffset()) {
+ C.CurrentLowestOffset = 0;
+ } else {
+ C.CurrentLowestOffset = BIO.getOffset();
+ }
// This is the first store of the candidate.
// If the offset can't possibly allow for a lower addressed store with the
// same base, don't bother adding it.
- if (StoreOffCst < ValueTy.getSizeInBytes())
+ if (BIO.hasValidOffset() &&
+ BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
return false;
- C.BasePtr = StoreBase;
- C.CurrentLowestOffset = StoreOffCst;
C.Stores.emplace_back(&StoreMI);
LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
<< StoreMI);
@@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
// writes to the next lowest adjacent address.
if (C.BasePtr != StoreBase)
return false;
- if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
- static_cast<uint64_t>(StoreOffCst))
+ // If we don't have a valid offset, we can't guarantee to be an adjacent
+ // offset.
+ if (!BIO.hasValidOffset())
+ return false;
+ if ((C.CurrentLowestOffset -
+ static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
return false;
// This writes to an adjacent address. Allow it.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
index 07744dada4f1fa..b1166e683ec74e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
@@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p
store i32 14, ptr %addr4
ret i32 %safeld
}
+
+ at G = external global [10 x i32]
+
+define void @invalid_zero_offset_no_merge(i64 %0) {
+; CHECK-LABEL: invalid_zero_offset_no_merge:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, _G at GOTPAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr x8, [x8, _G at GOTPAGEOFF]
+; CHECK-NEXT: str wzr, [x8, x0, lsl #2]
+; CHECK-NEXT: str wzr, [x8, #4]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
+ %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
+ store i32 0, ptr %2, align 4
+ store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
index e98e1ce599f2f6..69457a8cc0c19f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
@@ -162,6 +162,13 @@
ret void
}
+ @G = external global [10 x i32]
+ define void @invalid_zero_offset_no_merge(i64 %0) {
+ %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
+ store i32 0, ptr %2, align 4
+ store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
+ ret void
+ }
...
---
name: test_simple_2xs8
@@ -582,13 +589,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
+# The store to ptr2 prevents merging into a single store.
+# We can still merge the stores into addr1 and addr2.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1
-
- ; The store to ptr2 prevents merging into a single store.
- ; We can still merge the stores into addr1 and addr2.
-
; CHECK-LABEL: name: test_alias_4xs16
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
@@ -639,10 +644,10 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
+# Here store of 5 and 9 can be merged, others have aliasing barriers.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2
- ; Here store of 5 and 9 can be merged, others have aliasing barriers.
; CHECK-LABEL: name: test_alias2_4xs16
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
@@ -698,12 +703,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
+# No merging can be done here.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2, $x3
- ; No merging can be done here.
-
; CHECK-LABEL: name: test_alias3_4xs16
; CHECK: liveins: $x0, $x1, $x2, $x3
; CHECK-NEXT: {{ $}}
@@ -767,12 +771,10 @@ stack:
- { id: 0, name: a1, size: 24, alignment: 4 }
- { id: 1, name: a2, size: 4, alignment: 4 }
machineFunctionInfo: {}
+# Can merge because the load is from a different alloca and can't alias.
body: |
bb.1 (%ir-block.0):
liveins: $x0
-
- ; Can merge because the load is from a different alloca and can't alias.
-
; CHECK-LABEL: name: test_alias_allocas_2xs32
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
@@ -826,3 +828,43 @@ body: |
RET_ReallyLR
...
+---
+name: invalid_zero_offset_no_merge
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1 (%ir-block.1):
+ liveins: $x0
+
+ ; CHECK-LABEL: name: invalid_zero_offset_no_merge
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(s64) = COPY $x0
+ %9:_(s64) = G_CONSTANT i64 2
+ %3:_(s64) = G_SHL %0, %9(s64)
+ %1:_(p0) = G_GLOBAL_VALUE @G
+ %4:_(p0) = G_PTR_ADD %1, %3(s64)
+ %6:_(s32) = G_CONSTANT i32 0
+ G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2)
+ %8:_(s64) = G_CONSTANT i64 4
+ %7:_(p0) = nuw G_PTR_ADD %1, %8(s64)
+ G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
+ RET_ReallyLR
+
+...
More information about the llvm-branch-commits
mailing list