[llvm] [EarlyCSE] Compare GEP instructions based on offset (PR #65875)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 07:16:57 PDT 2023
https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/65875:
>From a0e8343b9f82a2a09830c3a47f75ec8264f4e6ef Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 9 Sep 2023 15:47:55 +0800
Subject: [PATCH 1/4] [EarlyCSE] Pre-commit offset-based GEP (NFC)
---
llvm/test/Transforms/EarlyCSE/gep.ll | 47 ++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
create mode 100644 llvm/test/Transforms/EarlyCSE/gep.ll
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
new file mode 100644
index 000000000000000..838b6dbeff3332f
--- /dev/null
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s
+; RUN: opt < %s -S -passes='early-cse<memssa>' | FileCheck %s
+
+%T1 = type { i64, i64, i64 }
+
+declare void @use_vec(<4 x ptr>);
+
+define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
+; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
+; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
+; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
+; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V]])
+; CHECK-NEXT: [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V2]])
+; CHECK-NEXT: ret void
+;
+ %s1a = getelementptr i8, ptr %a, i64 8
+ %s1av = load i64, ptr %s1a
+ %s1b = getelementptr inbounds i8, ptr %a, i64 8
+ %s1bv = load i64, ptr %s1b
+ %s1c = getelementptr %T1, ptr %a, i64 0, i32 1
+ %s1cv = load i64, ptr %s1c
+ %n1d = getelementptr i8, ptr %a, i64 7
+ %n1dv = load i64, ptr %n1d
+ %s1e = getelementptr i64, ptr %a, i64 1
+ %s1ev = load i64, ptr %s1e
+ %s1f = getelementptr i32, ptr %a, i64 2
+ %s1fv = load i64, ptr %s1f
+ %n1g = getelementptr i32, ptr %a, i64 1
+ %n1gv = load i64, ptr %n1g
+ %n1h = getelementptr i8, ptr %a, i64 %i
+ %n1hv = load i64, ptr %n1h
+
+ %v = getelementptr i64, ptr %a, <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+ call void @use_vec(<4 x ptr> %v)
+ %v2 = getelementptr i64, ptr %a, <4 x i64> <i64 0, i64 2, i64 1, i64 1>
+ call void @use_vec(<4 x ptr> %v2)
+ ret void
+}
>From ac1daad9bb4eb083df6b215c029816d3149e00d8 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 10 Sep 2023 13:14:57 +0800
Subject: [PATCH 2/4] [EarlyCSE] Add a vectorization failure example (NFC)
---
.../PhaseOrdering/X86/unroll-vectorizer.ll | 138 ++++++++++++++++++
1 file changed, 138 insertions(+)
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
new file mode 100644
index 000000000000000..3072cb39e0133e2
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%Zip = type { { ptr, ptr }, { [32 x i8], { i64, i64 } } }
+
+define void @foo(ptr %a, <32 x i8> %_0) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
+; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
+; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
+; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
+; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
+; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
+; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
+; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
+; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
+; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
+; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
+; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
+; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
+; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
+; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
+; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
+; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
+; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
+; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
+; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
+; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
+; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
+; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
+; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
+; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
+; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
+; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
+; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
+; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
+; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
+; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
+; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
+; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
+; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
+; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
+; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
+; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
+; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
+; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
+; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
+; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
+; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
+; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
+; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
+; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
+; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
+; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
+; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
+; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
+; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
+; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
+; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
+; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
+; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
+; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
+; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
+; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
+; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
+; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
+; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
+; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
+; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
+; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
+; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
+; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
+; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
+; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
+; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
+; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
+; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
+; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
+; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
+; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
+; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
+; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
+; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
+; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
+; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
+; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
+; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
+; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
+; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
+; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
+; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
+; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
+; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
+; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
+; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
+; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
+; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT: ret void
+;
+start:
+ %z = alloca %Zip, align 8
+ %sroa_1 = getelementptr i8, ptr %z, i64 16
+ store <32 x i8> %_0, ptr %sroa_1, align 8
+ %len_ = getelementptr i8, ptr %z, i64 56
+ store i64 32, ptr %len_, align 8
+ %_1 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1
+ %_2 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1, i32 1
+ %len = load i64, ptr %_2, align 8
+ %_10 = getelementptr %Zip, ptr %z, i64 0, i32 1
+ br label %body
+
+body: ; preds = %body, %start
+ %_34 = phi ptr [ %_34i, %body ], [ %a, %start ]
+ %idx = phi i64 [ %idx_, %body ], [ 0, %start ]
+ %_34i = getelementptr i8, ptr %_34, i64 1
+ %idx_ = add i64 %idx, 1
+ store i64 0, ptr %_1, align 8
+ %_24 = getelementptr i8, ptr %_10, i64 %idx
+ %_18 = load i8, ptr %_24, align 1
+ store i8 %_18, ptr %_34, align 1
+ %_6 = icmp eq i64 %len, %idx_
+ br i1 %_6, label %exit, label %body
+
+exit: ; preds = %body
+ ret void
+}
+
+attributes #0 = { "target-cpu"="znver3" }
>From e079f3f9107f55e1595a47aafec5fe4e03277665 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 9 Sep 2023 15:09:58 +0800
Subject: [PATCH 3/4] [EarlyCSE] Compare GEP instructions based on offset
This will provide more opportunities for
constant propagation for subsequent optimizations.
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 18 ++++
llvm/test/Transforms/EarlyCSE/gep.ll | 3 -
.../PhaseOrdering/X86/unroll-vectorizer.ll | 96 +------------------
3 files changed, 19 insertions(+), 98 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 439235f47471efb..c27b14c0d6e2e5d 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -336,6 +336,13 @@ static unsigned getHashValueImpl(SimpleValue Val) {
if (CallInst *CI = dyn_cast<CallInst>(Inst))
return hashCallInst(CI);
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ auto &DL = GEP->getModule()->getDataLayout();
+ APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ if (GEP->accumulateConstantOffset(DL, Offset))
+ return hash_combine(GEP->getOpcode(), GEP->getOperand(0), Offset);
+ }
+
// Mix in the opcode.
return hash_combine(
Inst->getOpcode(),
@@ -405,6 +412,17 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
LII->getArgOperand(1) == RII->getArgOperand(0);
}
+ if (auto *LGEP = dyn_cast<GetElementPtrInst>(LHSI)) {
+ auto *RGEP = cast<GetElementPtrInst>(RHSI);
+ if (LGEP->getOperand(0) != RGEP->getOperand(0))
+ return false;
+ auto &DL = LGEP->getModule()->getDataLayout();
+ APInt LOffset(DL.getIndexTypeSizeInBits(LGEP->getType()), 0);
+ APInt ROffset(DL.getIndexTypeSizeInBits(LGEP->getType()), 0);
+ return LGEP->accumulateConstantOffset(DL, LOffset) &&
+ RGEP->accumulateConstantOffset(DL, ROffset) && LOffset == ROffset;
+ }
+
// See comment above in `getHashValue()`.
if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
index 838b6dbeff3332f..499b5ac8de0af9a 100644
--- a/llvm/test/Transforms/EarlyCSE/gep.ll
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -10,10 +10,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
-; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
index 3072cb39e0133e2..1c9e7a771ca19c7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll
@@ -10,101 +10,7 @@ define void @foo(ptr %a, <32 x i8> %_0) #0 {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: start:
-; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
-; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
-; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
-; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
-; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
-; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
-; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
-; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
-; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
-; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
-; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
-; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
-; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
-; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
-; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
-; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
-; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
-; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
-; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
-; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
-; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
-; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
-; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
-; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
-; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
-; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
-; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
-; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
-; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
-; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
-; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
-; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
-; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
-; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
-; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
-; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
-; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
-; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
-; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
-; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
-; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
-; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
-; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
-; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
-; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
-; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
-; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
-; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
-; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
-; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
-; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
-; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
-; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
-; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
-; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
-; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
-; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
-; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
-; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
-; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
-; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
-; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
-; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
-; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
-; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
-; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
-; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
-; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
-; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
-; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
-; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
-; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
-; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
-; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
-; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
-; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
-; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
-; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
-; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
-; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
-; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
-; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
-; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
-; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
-; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
-; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
-; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
-; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
-; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
-; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
+; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1
; CHECK-NEXT: ret void
;
start:
>From 78ed2a0aea96087430cc12eb41c94e6648055d0b Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 14 Sep 2023 18:43:34 +0800
Subject: [PATCH 4/4] fixup! [EarlyCSE] Compare GEP instructions based on
offset
Resolve nits
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index c27b14c0d6e2e5d..da39a2e3c29290f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -340,7 +340,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
auto &DL = GEP->getModule()->getDataLayout();
APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, Offset))
- return hash_combine(GEP->getOpcode(), GEP->getOperand(0), Offset);
+ return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), Offset);
}
// Mix in the opcode.
@@ -414,11 +414,12 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
if (auto *LGEP = dyn_cast<GetElementPtrInst>(LHSI)) {
auto *RGEP = cast<GetElementPtrInst>(RHSI);
- if (LGEP->getOperand(0) != RGEP->getOperand(0))
+ if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
return false;
auto &DL = LGEP->getModule()->getDataLayout();
- APInt LOffset(DL.getIndexTypeSizeInBits(LGEP->getType()), 0);
- APInt ROffset(DL.getIndexTypeSizeInBits(LGEP->getType()), 0);
+ auto Size = DL.getIndexTypeSizeInBits(LGEP->getType());
+ APInt LOffset(Size, 0);
+ APInt ROffset(Size, 0);
return LGEP->accumulateConstantOffset(DL, LOffset) &&
RGEP->accumulateConstantOffset(DL, ROffset) && LOffset == ROffset;
}
More information about the llvm-commits
mailing list