[llvm] [InstCombine] Canonicalize more geps with constant gep bases and constant offsets. (PR #110033)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 23:59:02 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/110033
>From edd4566cc9a8c2175e33fec07d82548f97224620 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 25 Sep 2024 18:53:05 +0100
Subject: [PATCH 1/2] [Instcombine] Test for more gep canonicalization
---
.../InstCombine/canonicalize-gep-constglob.ll | 72 +++++++++++++++++++
1 file changed, 72 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
new file mode 100644
index 00000000000000..f1f01a1884a22e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+ at glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
+
+define ptr @x12(i64 %x) {
+; CHECK-LABEL: define ptr @x12(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 1, i64 2
+ ret ptr %gep
+}
+
+define ptr @x1y(i64 %x, i64 %y) {
+; CHECK-LABEL: define ptr @x1y(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 %y
+ ret ptr %gep
+}
+
+define ptr @xzy(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: define ptr @xzy(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 40), i64 0, i64 [[X]], i64 [[Z]], i64 [[Y]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 40), i64 0, i64 %x, i64 %z, i64 %y
+ ret ptr %gep
+}
+
+define ptr @zerox(i64 %x) {
+; CHECK-LABEL: define ptr @zerox(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x i32], ptr getelementptr inbounds (i8, ptr @glob, i64 32), i64 0, i64 [[X]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x i32], ptr getelementptr (i8, ptr @glob, i64 32), i64 0, i64 %x
+ ret ptr %gep
+}
+
+define i32 @twoloads(i64 %x) {
+; CHECK-LABEL: define i32 @twoloads(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
+; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %gep1 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 50), i64 0, i64 %x, i64 2, i64 1
+ %gep2 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 4
+ %a = load i32, ptr %gep1
+ %b = load i32, ptr %gep2
+ %c = add i32 %a, %b
+ ret i32 %c
+}
>From 8d20eac63f0bfc131d006df8ddcde6f72641086e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 27 Sep 2024 07:58:50 +0100
Subject: [PATCH 2/2] [InstCombine] Canonicalize more geps with constant gep
bases and constant offsets.
This is another small but hopefully not performance negative step to
canonicalizing towards i8 geps. We looks for geps with a constant offset base
pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep
instruction, so that the constant can hopefully be combined together (or the
offset can be computed in common).
---
.../InstCombine/InstCombineInternal.h | 3 +-
.../InstCombine/InstructionCombining.cpp | 45 ++++++++++++++-----
.../InstCombine/canonicalize-gep-constglob.ll | 14 ++++--
3 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index da6f991ad4cd15..a7ba87f686583c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -238,6 +238,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>>
convertOrOfShiftsToFunnelShift(Instruction &Or);
+ Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
+
private:
bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
bool isDesirableIntType(unsigned BitWidth) const;
@@ -374,7 +376,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
}
}
- Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5740285675eba8..61b52b6e8a4d3a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2736,6 +2736,35 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
return nullptr;
}
+// Return true if we should canonicalize the gep to a i8 ptradd.
+static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
+ Value *PtrOp = GEP.getOperand(0);
+ Type *GEPEltType = GEP.getSourceElementType();
+ if (GEPEltType->isIntegerTy(8))
+ return false;
+
+ // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
+ // intrinsic. This has better support in BasicAA.
+ if (GEPEltType->isScalableTy())
+ return true;
+
+ // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
+ // together.
+ if (GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1),
+ m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
+ m_Shl(m_Value(), m_ConstantInt())))))
+ return true;
+
+ // gep (gep @global, C1), %x, C2 is expanded so the two constants can
+ // possibly be merged together.
+ return isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
+ any_of(GEP.indices(), [](Value *V) {
+ const APInt *C;
+ return match(V, m_APInt(C)) && !C->isZero();
+ });
+}
+
Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2817,19 +2846,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GEP.getNoWrapFlags()));
}
- // Canonicalize
- // - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
- // This has better support in BasicAA.
- // - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
- // multiplies together.
- if (GEPEltType->isScalableTy() ||
- (!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
- match(GEP.getOperand(1),
- m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
- m_Shl(m_Value(), m_ConstantInt())))))) {
+ if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
- return replaceInstUsesWith(
- GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
+ Value *NewGEP =
+ Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
+ return replaceInstUsesWith(GEP, NewGEP);
}
// Check to see if the inputs to the PHI node are getelementptr instructions.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index f1f01a1884a22e..53585dfde48b22 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
; CHECK-LABEL: define ptr @x12(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
; CHECK-LABEL: define ptr @x1y(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
@@ -55,8 +59,10 @@ define i32 @twoloads(i64 %x) {
; CHECK-LABEL: define i32 @twoloads(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
-; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
More information about the llvm-commits
mailing list