[llvm] [EarlyCSE] Compare GEP instructions based on offset (PR #65875)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 17 01:11:24 PDT 2023
https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/65875
>From a0e8343b9f82a2a09830c3a47f75ec8264f4e6ef Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 9 Sep 2023 15:47:55 +0800
Subject: [PATCH 1/5] [EarlyCSE] Pre-commit offset-based GEP (NFC)
---
llvm/test/Transforms/EarlyCSE/gep.ll | 47 ++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
create mode 100644 llvm/test/Transforms/EarlyCSE/gep.ll
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
new file mode 100644
index 000000000000000..838b6dbeff3332f
--- /dev/null
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s
+; RUN: opt < %s -S -passes='early-cse<memssa>' | FileCheck %s
+
+%T1 = type { i64, i64, i64 }
+
+declare void @use_vec(<4 x ptr>);
+
+define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
+; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
+; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
+; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
+; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V]])
+; CHECK-NEXT: [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V2]])
+; CHECK-NEXT: ret void
+;
+ %s1a = getelementptr i8, ptr %a, i64 8
+ %s1av = load i64, ptr %s1a
+ %s1b = getelementptr inbounds i8, ptr %a, i64 8
+ %s1bv = load i64, ptr %s1b
+ %s1c = getelementptr %T1, ptr %a, i64 0, i32 1
+ %s1cv = load i64, ptr %s1c
+ %n1d = getelementptr i8, ptr %a, i64 7
+ %n1dv = load i64, ptr %n1d
+ %s1e = getelementptr i64, ptr %a, i64 1
+ %s1ev = load i64, ptr %s1e
+ %s1f = getelementptr i32, ptr %a, i64 2
+ %s1fv = load i64, ptr %s1f
+ %n1g = getelementptr i32, ptr %a, i64 1
+ %n1gv = load i64, ptr %n1g
+ %n1h = getelementptr i8, ptr %a, i64 %i
+ %n1hv = load i64, ptr %n1h
+
+ %v = getelementptr i64, ptr %a, <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+ call void @use_vec(<4 x ptr> %v)
+ %v2 = getelementptr i64, ptr %a, <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+ call void @use_vec(<4 x ptr> %v2)
+ ret void
+}
>From ac1daad9bb4eb083df6b215c029816d3149e00d8 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 10 Sep 2023 13:14:57 +0800
Subject: [PATCH 2/5] [EarlyCSE] Add a vectorization failure example (NFC)
---
.../PhaseOrdering/X86/unroll-vectorizer.ll | 138 ++++++++++++++++++
1 file changed, 138 insertions(+)
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
new file mode 100644
index 000000000000000..3072cb39e0133e2
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%Zip = type { { ptr, ptr }, { [32 x i8], { i64, i64 } } }
+
+define void @foo(ptr %a, <32 x i8> %_0) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
+; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
+; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
+; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
+; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
+; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
+; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
+; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
+; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
+; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
+; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
+; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
+; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
+; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
+; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
+; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
+; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
+; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
+; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
+; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
+; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
+; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
+; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
+; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
+; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
+; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
+; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
+; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
+; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
+; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
+; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
+; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
+; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
+; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
+; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
+; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
+; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
+; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
+; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
+; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
+; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
+; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
+; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
+; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
+; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
+; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
+; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
+; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
+; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
+; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
+; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
+; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
+; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
+; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
+; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
+; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
+; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
+; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
+; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
+; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
+; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
+; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
+; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
+; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
+; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
+; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
+; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
+; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
+; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
+; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
+; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
+; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
+; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
+; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
+; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
+; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
+; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
+; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
+; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
+; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
+; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
+; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
+; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
+; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
+; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
+; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
+; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
+; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT: ret void
+;
+start:
+ %z = alloca %Zip, align 8
+ %sroa_1 = getelementptr i8, ptr %z, i64 16
+ store <32 x i8> %_0, ptr %sroa_1, align 8
+ %len_ = getelementptr i8, ptr %z, i64 56
+ store i64 32, ptr %len_, align 8
+ %_1 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1
+ %_2 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1, i32 1
+ %len = load i64, ptr %_2, align 8
+ %_10 = getelementptr %Zip, ptr %z, i64 0, i32 1
+ br label %body
+
+body: ; preds = %body, %start
+ %_34 = phi ptr [ %_34i, %body ], [ %a, %start ]
+ %idx = phi i64 [ %idx_, %body ], [ 0, %start ]
+ %_34i = getelementptr i8, ptr %_34, i64 1
+ %idx_ = add i64 %idx, 1
+ store i64 0, ptr %_1, align 8
+ %_24 = getelementptr i8, ptr %_10, i64 %idx
+ %_18 = load i8, ptr %_24, align 1
+ store i8 %_18, ptr %_34, align 1
+ %_6 = icmp eq i64 %len, %idx_
+ br i1 %_6, label %exit, label %body
+
+exit: ; preds = %body
+ ret void
+}
+
+attributes #0 = { "target-cpu"="znver3" }
>From 31e2ec9d89aeded9ea5da822262449b0c4e8ab16 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 16 Sep 2023 11:48:08 +0800
Subject: [PATCH 3/5] [EarlyCSE] Compare GEP instructions based on offset
This will provide more opportunities for
constant propagation for subsequent optimizations.
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 153 +++++++++++++++---
llvm/test/Transforms/EarlyCSE/gep.ll | 3 -
.../PhaseOrdering/X86/unroll-vectorizer.ll | 96 +----------
3 files changed, 134 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 439235f47471efb..4c69a2f7d75c303 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -143,11 +143,11 @@ struct SimpleValue {
!CI->getFunction()->isPresplitCoroutine();
}
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
- isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
- isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
+ isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
+ isa<FreezeInst>(Inst);
}
};
@@ -307,10 +307,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
IVI->getOperand(1),
hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
- assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
- isa<FreezeInst>(Inst)) &&
+ assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
"Invalid/unknown instruction");
// Handle intrinsics with commutative operands.
@@ -553,6 +552,77 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
return LHSI->isIdenticalTo(RHSI);
}
+//===----------------------------------------------------------------------===//
+// GEPValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct GEPValue {
+ Instruction *Inst;
+ APInt ConstantOffset;
+ bool HasConstantOffset;
+
+ GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+ GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset)
+ : Inst(I), ConstantOffset(ConstantOffset),
+ HasConstantOffset(HasConstantOffset) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ return isa<GetElementPtrInst>(Inst);
+ }
+};
+
+} // namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<GEPValue> {
+ static inline GEPValue getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+
+ static inline GEPValue getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+
+ static unsigned getHashValue(GEPValue Val);
+ static bool isEqual(GEPValue LHS, GEPValue RHS);
+};
+
+} // end namespace llvm
+
+unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst);
+ if (Val.HasConstantOffset)
+ return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
+ Val.ConstantOffset);
+ return hash_combine(
+ GEP->getOpcode(),
+ hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
+}
+
+bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHS.Inst == RHS.Inst;
+ GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst);
+ GetElementPtrInst *RGEP = cast<GetElementPtrInst>(RHS.Inst);
+ if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
+ return false;
+ if (LHS.HasConstantOffset && RHS.HasConstantOffset)
+ return LHS.ConstantOffset == RHS.ConstantOffset;
+ return LGEP->isIdenticalToWhenDefined(RGEP);
+}
+
//===----------------------------------------------------------------------===//
// EarlyCSE implementation
//===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
CallHTType AvailableCalls;
+ using GEPMapAllocatorTy =
+ RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<GEPValue, Value *>>;
+ using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
+ GEPMapAllocatorTy>;
+ GEPHTType AvailableGEPs;
+
/// This is the current generation of the memory value.
unsigned CurrentGeneration = 0;
@@ -667,9 +744,11 @@ class EarlyCSE {
class NodeScope {
public:
NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
- InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
- : Scope(AvailableValues), LoadScope(AvailableLoads),
- InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
+ InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
+ GEPHTType &AvailableGEPs)
+ : Scope(AvailableValues), LoadScope(AvailableLoads),
+ InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
+ GEPScope(AvailableGEPs) {}
NodeScope(const NodeScope &) = delete;
NodeScope &operator=(const NodeScope &) = delete;
@@ -678,6 +757,7 @@ class EarlyCSE {
LoadHTType::ScopeTy LoadScope;
InvariantHTType::ScopeTy InvariantScope;
CallHTType::ScopeTy CallScope;
+ GEPHTType::ScopeTy GEPScope;
};
// Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
public:
StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
- unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
+ GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
+ DomTreeNode::const_iterator child,
DomTreeNode::const_iterator end)
: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
EndIter(end),
Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
- AvailableCalls)
- {}
+ AvailableCalls, AvailableGEPs) {}
StackNode(const StackNode &) = delete;
StackNode &operator=(const StackNode &) = delete;
@@ -1561,6 +1641,39 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ if (GEPValue::canHandle(&Inst)) {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
+ APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset);
+ GEPValue GEPVal(GEP, Offset, HasConstantOffset);
+ if (Value *V = AvailableGEPs.lookup(GEPVal)) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
+ << '\n');
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // If I being poison triggers UB, there is no need to drop those
+ // flags. Otherwise, only retain flags present on both I and Inst.
+ // TODO: Currently some fast-math flags are not treated as
+ // poison-generating even though they should. Until this is fixed,
+ // always retain flags present on both I and Inst for floating point
+ // instructions.
+ if (isa<FPMathOperator>(I) ||
+ (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+ I->andIRFlags(&Inst);
+ }
+ Inst.replaceAllUsesWith(V);
+ salvageKnowledge(&Inst, &AC);
+ removeMSSA(Inst);
+ Inst.eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableGEPs.insert(GEPVal, &Inst);
+ continue;
+ }
+
// A release fence requires that all stores complete before it, but does
// not prevent the reordering of following loads 'before' the fence. As a
// result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1788,7 @@ bool EarlyCSE::run() {
// Process the root node.
nodesToProcess.push_back(new StackNode(
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
- CurrentGeneration, DT.getRootNode(),
+ AvailableGEPs, CurrentGeneration, DT.getRootNode(),
DT.getRootNode()->begin(), DT.getRootNode()->end()));
assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
@@ -1698,10 +1811,10 @@ bool EarlyCSE::run() {
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
// Push the next child onto the stack.
DomTreeNode *child = NodeToProcess->nextChild();
- nodesToProcess.push_back(
- new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
- AvailableCalls, NodeToProcess->childGeneration(),
- child, child->begin(), child->end()));
+ nodesToProcess.push_back(new StackNode(
+ AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
+ AvailableGEPs, NodeToProcess->childGeneration(), child,
+ child->begin(), child->end()));
} else {
// It has been processed, and there are no more children to process,
// so delete it and pop it off the stack.
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
index 838b6dbeff3332f..499b5ac8de0af9a 100644
--- a/llvm/test/Transforms/EarlyCSE/gep.ll
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -10,10 +10,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
-; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
index 3072cb39e0133e2..1c9e7a771ca19c7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -10,101 +10,7 @@ define void @foo(ptr %a, <32 x i8> %_0) #0 {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: start:
-; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
-; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
-; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
-; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
-; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
-; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
-; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
-; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
-; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
-; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
-; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
-; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
-; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
-; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
-; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
-; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
-; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
-; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
-; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
-; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
-; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
-; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
-; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
-; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
-; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
-; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
-; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
-; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
-; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
-; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
-; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
-; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
-; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
-; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
-; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
-; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
-; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
-; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
-; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
-; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
-; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
-; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
-; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
-; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
-; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
-; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
-; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
-; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
-; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
-; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
-; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
-; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
-; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
-; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
-; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
-; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
-; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
-; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
-; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
-; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
-; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
-; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
-; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
-; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
-; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
-; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
-; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
-; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
-; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
-; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
-; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
-; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
-; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
-; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
-; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
-; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
-; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
-; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
-; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
-; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
-; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
-; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
-; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
-; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
-; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
-; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
-; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
-; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
-; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1
; CHECK-NEXT: ret void
;
start:
>From 089349f9eb47c57cfa2049ce0af5c7dd079a8581 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Sep 2023 10:06:59 +0800
Subject: [PATCH 4/5] [EarlyCSE] Use ref
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 4c69a2f7d75c303..1bf614191c10bec 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -595,13 +595,13 @@ template <> struct DenseMapInfo<GEPValue> {
return DenseMapInfo<Instruction *>::getTombstoneKey();
}
- static unsigned getHashValue(GEPValue Val);
- static bool isEqual(GEPValue LHS, GEPValue RHS);
+ static unsigned getHashValue(const GEPValue &Val);
+ static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
};
} // end namespace llvm
-unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
+unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst);
if (Val.HasConstantOffset)
return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
@@ -611,7 +611,7 @@ unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
}
-bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
+bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
if (LHS.isSentinel() || RHS.isSentinel())
return LHS.Inst == RHS.Inst;
GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst);
>From 12eda5cf2df6438453683ce8c917a64786686a69 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Sep 2023 16:08:12 +0800
Subject: [PATCH 5/5] fixup! [EarlyCSE] Compare GEP instructions based on
offset
Add comments and some minor changes
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 45 ++++++++++++-------------
1 file changed, 21 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 1bf614191c10bec..06e31f2a4666a03 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
STATISTIC(NumDSE, "Number of trivial dead stores removed");
DEBUG_COUNTER(CSECounter, "early-cse",
@@ -1294,6 +1295,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
return Result;
}
+static void combineIRFlags(Instruction &From, Value *To) {
+ if (auto *I = dyn_cast<Instruction>(To)) {
+ // If I being poison triggers UB, there is no need to drop those
+ // flags. Otherwise, only retain flags present on both I and Inst.
+ // TODO: Currently some fast-math flags are not treated as
+ // poison-generating even though they should. Until this is fixed,
+ // always retain flags present on both I and Inst for floating point
+ // instructions.
+ if (isa<FPMathOperator>(I) ||
+ (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+ I->andIRFlags(&From);
+ }
+}
+
bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
const ParseMemoryInst &Later) {
// Can we remove Earlier store because of Later store?
@@ -1519,16 +1534,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (auto *I = dyn_cast<Instruction>(V)) {
- // If I being poison triggers UB, there is no need to drop those
- // flags. Otherwise, only retain flags present on both I and Inst.
- // TODO: Currently some fast-math flags are not treated as
- // poison-generating even though they should. Until this is fixed,
- // always retain flags present on both I and Inst for floating point
- // instructions.
- if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
- I->andIRFlags(&Inst);
- }
+ combineIRFlags(Inst, V);
Inst.replaceAllUsesWith(V);
salvageKnowledge(&Inst, &AC);
removeMSSA(Inst);
@@ -1641,35 +1647,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Compare GEP instructions based on offset.
if (GEPValue::canHandle(&Inst)) {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset);
GEPValue GEPVal(GEP, Offset, HasConstantOffset);
if (Value *V = AvailableGEPs.lookup(GEPVal)) {
- LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
+ LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
<< '\n');
- if (auto *I = dyn_cast<Instruction>(V)) {
- // If I being poison triggers UB, there is no need to drop those
- // flags. Otherwise, only retain flags present on both I and Inst.
- // TODO: Currently some fast-math flags are not treated as
- // poison-generating even though they should. Until this is fixed,
- // always retain flags present on both I and Inst for floating point
- // instructions.
- if (isa<FPMathOperator>(I) ||
- (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
- I->andIRFlags(&Inst);
- }
+ combineIRFlags(Inst, V);
Inst.replaceAllUsesWith(V);
salvageKnowledge(&Inst, &AC);
removeMSSA(Inst);
Inst.eraseFromParent();
Changed = true;
- ++NumCSE;
+ ++NumCSEGEP;
continue;
}
- // Otherwise, just remember that this value is available.
+ // Otherwise, just remember that we have this GEP.
AvailableGEPs.insert(GEPVal, &Inst);
continue;
}
More information about the llvm-commits
mailing list