[llvm] [EarlyCSE] Compare GEP instructions based on offset (PR #65875)

via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 17 01:11:24 PDT 2023


https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/65875

>From a0e8343b9f82a2a09830c3a47f75ec8264f4e6ef Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 9 Sep 2023 15:47:55 +0800
Subject: [PATCH 1/5] [EarlyCSE] Pre-commit offset-based GEP (NFC)

---
 llvm/test/Transforms/EarlyCSE/gep.ll | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 llvm/test/Transforms/EarlyCSE/gep.ll

diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
new file mode 100644
index 000000000000000..838b6dbeff3332f
--- /dev/null
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s
+; RUN: opt < %s -S -passes='early-cse<memssa>' | FileCheck %s
+
+%T1 = type { i64, i64, i64 }
+
+declare void @use_vec(<4 x ptr>);
+
+define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
+; CHECK-NEXT:    [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT:    [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
+; CHECK-NEXT:    [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
+; CHECK-NEXT:    [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
+; CHECK-NEXT:    [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    call void @use_vec(<4 x ptr> [[V]])
+; CHECK-NEXT:    [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+; CHECK-NEXT:    call void @use_vec(<4 x ptr> [[V2]])
+; CHECK-NEXT:    ret void
+;
+  %s1a = getelementptr i8, ptr %a, i64 8
+  %s1av = load i64, ptr %s1a
+  %s1b = getelementptr inbounds i8, ptr %a, i64 8
+  %s1bv = load i64, ptr %s1b
+  %s1c = getelementptr %T1, ptr %a, i64 0, i32 1
+  %s1cv = load i64, ptr %s1c
+  %n1d = getelementptr i8, ptr %a, i64 7
+  %n1dv = load i64, ptr %n1d
+  %s1e = getelementptr i64, ptr %a, i64 1
+  %s1ev = load i64, ptr %s1e
+  %s1f = getelementptr i32, ptr %a, i64 2
+  %s1fv = load i64, ptr %s1f
+  %n1g = getelementptr i32, ptr %a, i64 1
+  %n1gv = load i64, ptr %n1g
+  %n1h = getelementptr i8, ptr %a, i64 %i
+  %n1hv = load i64, ptr %n1h
+
+  %v = getelementptr i64, ptr %a, <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+  call void @use_vec(<4 x ptr> %v)
+  %v2 = getelementptr i64, ptr %a, <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+  call void @use_vec(<4 x ptr> %v2)
+  ret void
+}

>From ac1daad9bb4eb083df6b215c029816d3149e00d8 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 10 Sep 2023 13:14:57 +0800
Subject: [PATCH 2/5] [EarlyCSE] Add a vectorization failure example (NFC)

---
 .../PhaseOrdering/X86/unroll-vectorizer.ll    | 138 ++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
new file mode 100644
index 000000000000000..3072cb39e0133e2
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%Zip = type { { ptr, ptr }, { [32 x i8], { i64, i64 } } }
+
+define void @foo(ptr %a, <32 x i8> %_0) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  start:
+; CHECK-NEXT:    [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
+; CHECK-NEXT:    [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
+; CHECK-NEXT:    store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
+; CHECK-NEXT:    [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
+; CHECK-NEXT:    [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
+; CHECK-NEXT:    store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
+; CHECK-NEXT:    [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT:    [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
+; CHECK-NEXT:    store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
+; CHECK-NEXT:    [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
+; CHECK-NEXT:    store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
+; CHECK-NEXT:    [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
+; CHECK-NEXT:    [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
+; CHECK-NEXT:    store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
+; CHECK-NEXT:    [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
+; CHECK-NEXT:    [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
+; CHECK-NEXT:    store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
+; CHECK-NEXT:    [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT:    [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
+; CHECK-NEXT:    store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
+; CHECK-NEXT:    [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
+; CHECK-NEXT:    store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
+; CHECK-NEXT:    [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
+; CHECK-NEXT:    [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
+; CHECK-NEXT:    store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
+; CHECK-NEXT:    [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
+; CHECK-NEXT:    [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
+; CHECK-NEXT:    store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
+; CHECK-NEXT:    [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
+; CHECK-NEXT:    [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
+; CHECK-NEXT:    store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
+; CHECK-NEXT:    [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT:    [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
+; CHECK-NEXT:    store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
+; CHECK-NEXT:    [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
+; CHECK-NEXT:    [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
+; CHECK-NEXT:    store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
+; CHECK-NEXT:    [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
+; CHECK-NEXT:    [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
+; CHECK-NEXT:    store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
+; CHECK-NEXT:    [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
+; CHECK-NEXT:    [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
+; CHECK-NEXT:    store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
+; CHECK-NEXT:    [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT:    [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
+; CHECK-NEXT:    store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
+; CHECK-NEXT:    [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
+; CHECK-NEXT:    [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
+; CHECK-NEXT:    store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
+; CHECK-NEXT:    [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
+; CHECK-NEXT:    [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
+; CHECK-NEXT:    store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
+; CHECK-NEXT:    [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
+; CHECK-NEXT:    [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
+; CHECK-NEXT:    store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
+; CHECK-NEXT:    [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
+; CHECK-NEXT:    [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
+; CHECK-NEXT:    store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
+; CHECK-NEXT:    [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
+; CHECK-NEXT:    [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
+; CHECK-NEXT:    store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
+; CHECK-NEXT:    [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
+; CHECK-NEXT:    [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
+; CHECK-NEXT:    store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
+; CHECK-NEXT:    [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
+; CHECK-NEXT:    [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
+; CHECK-NEXT:    store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
+; CHECK-NEXT:    [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT:    [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
+; CHECK-NEXT:    store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
+; CHECK-NEXT:    [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
+; CHECK-NEXT:    [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
+; CHECK-NEXT:    store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
+; CHECK-NEXT:    [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
+; CHECK-NEXT:    [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
+; CHECK-NEXT:    store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
+; CHECK-NEXT:    [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
+; CHECK-NEXT:    [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
+; CHECK-NEXT:    store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
+; CHECK-NEXT:    [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
+; CHECK-NEXT:    [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
+; CHECK-NEXT:    store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
+; CHECK-NEXT:    [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
+; CHECK-NEXT:    [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
+; CHECK-NEXT:    store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
+; CHECK-NEXT:    [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
+; CHECK-NEXT:    [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
+; CHECK-NEXT:    store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
+; CHECK-NEXT:    [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
+; CHECK-NEXT:    [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
+; CHECK-NEXT:    store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
+; CHECK-NEXT:    [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
+; CHECK-NEXT:    store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT:    ret void
+;
+start:
+  %z = alloca %Zip, align 8
+  %sroa_1 = getelementptr i8, ptr %z, i64 16
+  store <32 x i8> %_0, ptr %sroa_1, align 8
+  %len_ = getelementptr i8, ptr %z, i64 56
+  store i64 32, ptr %len_, align 8
+  %_1 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1
+  %_2 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1, i32 1
+  %len = load i64, ptr %_2, align 8
+  %_10 = getelementptr %Zip, ptr %z, i64 0, i32 1
+  br label %body
+
+body:                                             ; preds = %body, %start
+  %_34 = phi ptr [ %_34i, %body ], [ %a, %start ]
+  %idx = phi i64 [ %idx_, %body ], [ 0, %start ]
+  %_34i = getelementptr i8, ptr %_34, i64 1
+  %idx_ = add i64 %idx, 1
+  store i64 0, ptr %_1, align 8
+  %_24 = getelementptr i8, ptr %_10, i64 %idx
+  %_18 = load i8, ptr %_24, align 1
+  store i8 %_18, ptr %_34, align 1
+  %_6 = icmp eq i64 %len, %idx_
+  br i1 %_6, label %exit, label %body
+
+exit:                                             ; preds = %body
+  ret void
+}
+
+attributes #0 = { "target-cpu"="znver3" }

>From 31e2ec9d89aeded9ea5da822262449b0c4e8ab16 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 16 Sep 2023 11:48:08 +0800
Subject: [PATCH 3/5] [EarlyCSE] Compare GEP instructions based on offset

This will provide more opportunities for
constant propagation for subsequent optimizations.
---
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp       | 153 +++++++++++++++---
 llvm/test/Transforms/EarlyCSE/gep.ll          |   3 -
 .../PhaseOrdering/X86/unroll-vectorizer.ll    |  96 +----------
 3 files changed, 134 insertions(+), 118 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 439235f47471efb..4c69a2f7d75c303 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -143,11 +143,11 @@ struct SimpleValue {
              !CI->getFunction()->isPresplitCoroutine();
     }
     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
-           isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
-           isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
-           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
-           isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
-           isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
+           isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
+           isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+           isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+           isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
+           isa<FreezeInst>(Inst);
   }
 };
 
@@ -307,10 +307,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
                         IVI->getOperand(1),
                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
 
-  assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
-          isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
-          isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
-          isa<FreezeInst>(Inst)) &&
+  assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+          isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+          isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
          "Invalid/unknown instruction");
 
   // Handle intrinsics with commutative operands.
@@ -553,6 +552,77 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
   return LHSI->isIdenticalTo(RHSI);
 }
 
+//===----------------------------------------------------------------------===//
+// GEPValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct GEPValue {
+  Instruction *Inst;
+  APInt ConstantOffset;
+  bool HasConstantOffset;
+
+  GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
+  GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset)
+      : Inst(I), ConstantOffset(ConstantOffset),
+        HasConstantOffset(HasConstantOffset) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
+
+  bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static bool canHandle(Instruction *Inst) {
+      return isa<GetElementPtrInst>(Inst);
+  }
+};
+
+} // namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<GEPValue> {
+  static inline GEPValue getEmptyKey() {
+      return DenseMapInfo<Instruction *>::getEmptyKey();
+  }
+
+  static inline GEPValue getTombstoneKey() {
+      return DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static unsigned getHashValue(GEPValue Val);
+  static bool isEqual(GEPValue LHS, GEPValue RHS);
+};
+
+} // end namespace llvm
+
+unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
+  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst);
+  if (Val.HasConstantOffset)
+      return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
+                          Val.ConstantOffset);
+  return hash_combine(
+      GEP->getOpcode(),
+      hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
+}
+
+bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
+  if (LHS.isSentinel() || RHS.isSentinel())
+      return LHS.Inst == RHS.Inst;
+  GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst);
+  GetElementPtrInst *RGEP = cast<GetElementPtrInst>(RHS.Inst);
+  if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
+      return false;
+  if (LHS.HasConstantOffset && RHS.HasConstantOffset)
+      return LHS.ConstantOffset == RHS.ConstantOffset;
+  return LGEP->isIdenticalToWhenDefined(RGEP);
+}
+
 //===----------------------------------------------------------------------===//
 // EarlyCSE implementation
 //===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
   CallHTType AvailableCalls;
 
+  using GEPMapAllocatorTy =
+      RecyclingAllocator<BumpPtrAllocator,
+                         ScopedHashTableVal<GEPValue, Value *>>;
+  using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
+                                    GEPMapAllocatorTy>;
+  GEPHTType AvailableGEPs;
+
   /// This is the current generation of the memory value.
   unsigned CurrentGeneration = 0;
 
@@ -667,9 +744,11 @@ class EarlyCSE {
   class NodeScope {
   public:
     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
-              InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
-      : Scope(AvailableValues), LoadScope(AvailableLoads),
-        InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
+              InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
+              GEPHTType &AvailableGEPs)
+        : Scope(AvailableValues), LoadScope(AvailableLoads),
+          InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
+          GEPScope(AvailableGEPs) {}
     NodeScope(const NodeScope &) = delete;
     NodeScope &operator=(const NodeScope &) = delete;
 
@@ -678,6 +757,7 @@ class EarlyCSE {
     LoadHTType::ScopeTy LoadScope;
     InvariantHTType::ScopeTy InvariantScope;
     CallHTType::ScopeTy CallScope;
+    GEPHTType::ScopeTy GEPScope;
   };
 
   // Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
   public:
     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
-              unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
+              GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
+              DomTreeNode::const_iterator child,
               DomTreeNode::const_iterator end)
         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
           EndIter(end),
           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
-                 AvailableCalls)
-          {}
+                 AvailableCalls, AvailableGEPs) {}
     StackNode(const StackNode &) = delete;
     StackNode &operator=(const StackNode &) = delete;
 
@@ -1561,6 +1641,39 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
+    if (GEPValue::canHandle(&Inst)) {
+      GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
+      APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+      bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset);
+      GEPValue GEPVal(GEP, Offset, HasConstantOffset);
+      if (Value *V = AvailableGEPs.lookup(GEPVal)) {
+        LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
+                          << '\n');
+        if (auto *I = dyn_cast<Instruction>(V)) {
+          // If I being poison triggers UB, there is no need to drop those
+          // flags. Otherwise, only retain flags present on both I and Inst.
+          // TODO: Currently some fast-math flags are not treated as
+          // poison-generating even though they should. Until this is fixed,
+          // always retain flags present on both I and Inst for floating point
+          // instructions.
+          if (isa<FPMathOperator>(I) ||
+              (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+            I->andIRFlags(&Inst);
+        }
+        Inst.replaceAllUsesWith(V);
+        salvageKnowledge(&Inst, &AC);
+        removeMSSA(Inst);
+        Inst.eraseFromParent();
+        Changed = true;
+        ++NumCSE;
+        continue;
+      }
+
+      // Otherwise, just remember that this value is available.
+      AvailableGEPs.insert(GEPVal, &Inst);
+      continue;
+    }
+
     // A release fence requires that all stores complete before it, but does
     // not prevent the reordering of following loads 'before' the fence.  As a
     // result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1788,7 @@ bool EarlyCSE::run() {
   // Process the root node.
   nodesToProcess.push_back(new StackNode(
       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
-      CurrentGeneration, DT.getRootNode(),
+      AvailableGEPs, CurrentGeneration, DT.getRootNode(),
       DT.getRootNode()->begin(), DT.getRootNode()->end()));
 
   assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
@@ -1698,10 +1811,10 @@ bool EarlyCSE::run() {
     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
       // Push the next child onto the stack.
       DomTreeNode *child = NodeToProcess->nextChild();
-      nodesToProcess.push_back(
-          new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
-                        AvailableCalls, NodeToProcess->childGeneration(),
-                        child, child->begin(), child->end()));
+      nodesToProcess.push_back(new StackNode(
+          AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
+          AvailableGEPs, NodeToProcess->childGeneration(), child,
+          child->begin(), child->end()));
     } else {
       // It has been processed, and there are no more children to process,
       // so delete it and pop it off the stack.
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
index 838b6dbeff3332f..499b5ac8de0af9a 100644
--- a/llvm/test/Transforms/EarlyCSE/gep.ll
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -10,10 +10,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
 ; CHECK-LABEL: define void @foo(
 ; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
 ; CHECK-NEXT:    [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT:    [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
 ; CHECK-NEXT:    [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT:    [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
-; CHECK-NEXT:    [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
 ; CHECK-NEXT:    [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
 ; CHECK-NEXT:    [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
index 3072cb39e0133e2..1c9e7a771ca19c7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -10,101 +10,7 @@ define void @foo(ptr %a, <32 x i8> %_0) #0 {
 ; CHECK-LABEL: define void @foo(
 ; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  start:
-; CHECK-NEXT:    [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
-; CHECK-NEXT:    [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
-; CHECK-NEXT:    store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
-; CHECK-NEXT:    [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
-; CHECK-NEXT:    [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
-; CHECK-NEXT:    store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
-; CHECK-NEXT:    [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
-; CHECK-NEXT:    [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
-; CHECK-NEXT:    store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
-; CHECK-NEXT:    [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT:    [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
-; CHECK-NEXT:    store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
-; CHECK-NEXT:    [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
-; CHECK-NEXT:    [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
-; CHECK-NEXT:    store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
-; CHECK-NEXT:    [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
-; CHECK-NEXT:    [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
-; CHECK-NEXT:    store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
-; CHECK-NEXT:    [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT:    [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
-; CHECK-NEXT:    store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
-; CHECK-NEXT:    [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT:    [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
-; CHECK-NEXT:    store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
-; CHECK-NEXT:    [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
-; CHECK-NEXT:    [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
-; CHECK-NEXT:    store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
-; CHECK-NEXT:    [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
-; CHECK-NEXT:    [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
-; CHECK-NEXT:    store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
-; CHECK-NEXT:    [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
-; CHECK-NEXT:    [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
-; CHECK-NEXT:    store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
-; CHECK-NEXT:    [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT:    [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
-; CHECK-NEXT:    store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
-; CHECK-NEXT:    [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
-; CHECK-NEXT:    [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
-; CHECK-NEXT:    store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
-; CHECK-NEXT:    [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
-; CHECK-NEXT:    [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
-; CHECK-NEXT:    store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
-; CHECK-NEXT:    [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
-; CHECK-NEXT:    [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
-; CHECK-NEXT:    store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
-; CHECK-NEXT:    [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; CHECK-NEXT:    [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
-; CHECK-NEXT:    store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
-; CHECK-NEXT:    [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
-; CHECK-NEXT:    [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
-; CHECK-NEXT:    store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
-; CHECK-NEXT:    [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
-; CHECK-NEXT:    [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
-; CHECK-NEXT:    store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
-; CHECK-NEXT:    [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
-; CHECK-NEXT:    [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
-; CHECK-NEXT:    store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
-; CHECK-NEXT:    [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
-; CHECK-NEXT:    [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
-; CHECK-NEXT:    store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
-; CHECK-NEXT:    [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
-; CHECK-NEXT:    [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
-; CHECK-NEXT:    store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
-; CHECK-NEXT:    [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
-; CHECK-NEXT:    [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
-; CHECK-NEXT:    store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
-; CHECK-NEXT:    [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
-; CHECK-NEXT:    [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
-; CHECK-NEXT:    store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
-; CHECK-NEXT:    [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; CHECK-NEXT:    [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
-; CHECK-NEXT:    store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
-; CHECK-NEXT:    [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
-; CHECK-NEXT:    [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
-; CHECK-NEXT:    store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
-; CHECK-NEXT:    [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
-; CHECK-NEXT:    [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
-; CHECK-NEXT:    store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
-; CHECK-NEXT:    [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
-; CHECK-NEXT:    [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
-; CHECK-NEXT:    store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
-; CHECK-NEXT:    [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
-; CHECK-NEXT:    [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
-; CHECK-NEXT:    store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
-; CHECK-NEXT:    [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
-; CHECK-NEXT:    [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
-; CHECK-NEXT:    store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
-; CHECK-NEXT:    [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
-; CHECK-NEXT:    [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
-; CHECK-NEXT:    store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
-; CHECK-NEXT:    [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
-; CHECK-NEXT:    [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
-; CHECK-NEXT:    store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
-; CHECK-NEXT:    [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
-; CHECK-NEXT:    store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT:    store <32 x i8> [[_0]], ptr [[A]], align 1
 ; CHECK-NEXT:    ret void
 ;
 start:

>From 089349f9eb47c57cfa2049ce0af5c7dd079a8581 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Sep 2023 10:06:59 +0800
Subject: [PATCH 4/5] [EarlyCSE] Use ref

---
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 4c69a2f7d75c303..1bf614191c10bec 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -595,13 +595,13 @@ template <> struct DenseMapInfo<GEPValue> {
       return DenseMapInfo<Instruction *>::getTombstoneKey();
   }
 
-  static unsigned getHashValue(GEPValue Val);
-  static bool isEqual(GEPValue LHS, GEPValue RHS);
+  static unsigned getHashValue(const GEPValue &Val);
+  static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
 };
 
 } // end namespace llvm
 
-unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
+unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
   GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst);
   if (Val.HasConstantOffset)
       return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
@@ -611,7 +611,7 @@ unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
       hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
 }
 
-bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
+bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
   if (LHS.isSentinel() || RHS.isSentinel())
       return LHS.Inst == RHS.Inst;
   GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst);

>From 12eda5cf2df6438453683ce8c917a64786686a69 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Sep 2023 16:08:12 +0800
Subject: [PATCH 5/5] fixup! [EarlyCSE] Compare GEP instructions based on
 offset

Add comments and some minor changes
---
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 45 ++++++++++++-------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 1bf614191c10bec..06e31f2a4666a03 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumCSE,      "Number of instructions CSE'd");
 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
+STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
 
 DEBUG_COUNTER(CSECounter, "early-cse",
@@ -1294,6 +1295,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
   return Result;
 }
 
+static void combineIRFlags(Instruction &From, Value *To) {
+  if (auto *I = dyn_cast<Instruction>(To)) {
+    // If I being poison triggers UB, there is no need to drop those
+    // flags. Otherwise, only retain flags present on both I and Inst.
+    // TODO: Currently some fast-math flags are not treated as
+    // poison-generating even though they should. Until this is fixed,
+    // always retain flags present on both I and Inst for floating point
+    // instructions.
+    if (isa<FPMathOperator>(I) ||
+        (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+      I->andIRFlags(&From);
+  }
+}
+
 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
                                 const ParseMemoryInst &Later) {
   // Can we remove Earlier store because of Later store?
@@ -1519,16 +1534,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
           continue;
         }
-        if (auto *I = dyn_cast<Instruction>(V)) {
-          // If I being poison triggers UB, there is no need to drop those
-          // flags. Otherwise, only retain flags present on both I and Inst.
-          // TODO: Currently some fast-math flags are not treated as
-          // poison-generating even though they should. Until this is fixed,
-          // always retain flags present on both I and Inst for floating point
-          // instructions.
-          if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
-            I->andIRFlags(&Inst);
-        }
+        combineIRFlags(Inst, V);
         Inst.replaceAllUsesWith(V);
         salvageKnowledge(&Inst, &AC);
         removeMSSA(Inst);
@@ -1641,35 +1647,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
+    // Compare GEP instructions based on offset.
     if (GEPValue::canHandle(&Inst)) {
       GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
       APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
       bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset);
       GEPValue GEPVal(GEP, Offset, HasConstantOffset);
       if (Value *V = AvailableGEPs.lookup(GEPVal)) {
-        LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
+        LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << "  to: " << *V
                           << '\n');
-        if (auto *I = dyn_cast<Instruction>(V)) {
-          // If I being poison triggers UB, there is no need to drop those
-          // flags. Otherwise, only retain flags present on both I and Inst.
-          // TODO: Currently some fast-math flags are not treated as
-          // poison-generating even though they should. Until this is fixed,
-          // always retain flags present on both I and Inst for floating point
-          // instructions.
-          if (isa<FPMathOperator>(I) ||
-              (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
-            I->andIRFlags(&Inst);
-        }
+        combineIRFlags(Inst, V);
         Inst.replaceAllUsesWith(V);
         salvageKnowledge(&Inst, &AC);
         removeMSSA(Inst);
         Inst.eraseFromParent();
         Changed = true;
-        ++NumCSE;
+        ++NumCSEGEP;
         continue;
       }
 
-      // Otherwise, just remember that this value is available.
+      // Otherwise, just remember that we have this GEP.
       AvailableGEPs.insert(GEPVal, &Inst);
       continue;
     }



More information about the llvm-commits mailing list