[llvm] [InstCombine] Canonicalize more geps with constant gep bases and constant offsets. (PR #110033)

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 12:53:13 PDT 2024


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/110033

This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep
instruction, so that the constant can hopefully be combined together (or the x offset can be computed in common).

>From 092d47b3b2036ce058ef5e155fe7df64b9ac29c8 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 25 Sep 2024 18:53:05 +0100
Subject: [PATCH 1/2] [Instcombine] Test for more gep canonicalization

---
 .../InstCombine/canonicalize-gep-constglob.ll | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll

diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
new file mode 100644
index 00000000000000..610552b0b1ce57
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+ at glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
+
+define ptr @x12(i64 %x) {
+; CHECK-LABEL: define ptr @x12(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 1, i64 2
+  ret ptr %gep
+}
+
+define ptr @x1y(i64 %x, i64 %y) {
+; CHECK-LABEL: define ptr @x1y(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 %y
+  ret ptr %gep
+}
+
+define ptr @xzy(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: define ptr @xzy(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 40), i64 0, i64 [[X]], i64 [[Z]], i64 [[Y]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 40), i64 0, i64 %x, i64 %z, i64 %y
+  ret ptr %gep
+}
+
+define i32 @twoloads(i64 %x) {
+; CHECK-LABEL: define i32 @twoloads(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %gep1 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 50), i64 0, i64 %x, i64 2, i64 1
+  %gep2 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 4
+  %a = load i32, ptr %gep1
+  %b = load i32, ptr %gep2
+  %c = add i32 %a, %b
+  ret i32 %c
+}

>From 43dbb98eab8eddab346a4c13feee4326fa5532d3 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 25 Sep 2024 20:30:08 +0100
Subject: [PATCH 2/2] [InstCombine] Canonicalize more geps with constant gep
 bases and constant offsets.

This is another small but hopefully not performance negative step to
canonicalizing towards i8 geps. We looks for geps with a constant offset base
pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep
instruction, so that the constant can hopefully be combined together (or the
offset can be computed in common).
---
 .../InstCombine/InstructionCombining.cpp         | 16 +++++++++++-----
 .../InstCombine/canonicalize-gep-constglob.ll    | 14 ++++++++++----
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5740285675eba8..ef478bf106f731 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2822,11 +2822,17 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   //    This has better support in BasicAA.
   //  - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
   //    multiplies together.
-  if (GEPEltType->isScalableTy() ||
-      (!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
-       match(GEP.getOperand(1),
-             m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
-                                  m_Shl(m_Value(), m_ConstantInt())))))) {
+  //  - gep (gep @global, C1), %x, C2 is expanded so the two constants can
+  //    possibly be merged together.
+  if (!GEPEltType->isIntegerTy(8) &&
+      (GEPEltType->isScalableTy() ||
+       (GEP.getNumIndices() == 1 &&
+        match(GEP.getOperand(1),
+              m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
+                                   m_Shl(m_Value(), m_ConstantInt()))))) ||
+       (isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
+        any_of(drop_begin(GEP.indices()),
+               [](Value *V) { return isa<Constant>(V); })))) {
     Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
     return replaceInstUsesWith(
         GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index 610552b0b1ce57..f0dbd1b0a49a55 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
 ; CHECK-LABEL: define ptr @x12(
 ; CHECK-SAME: i64 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
 ; CHECK-LABEL: define ptr @x1y(
 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 entry:
@@ -43,8 +47,10 @@ define i32 @twoloads(i64 %x) {
 ; CHECK-LABEL: define i32 @twoloads(
 ; CHECK-SAME: i64 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
 ; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]



More information about the llvm-commits mailing list