[llvm] [InstCombine] Try to fold add into GEP x, C (PR #85090)

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 28 10:46:16 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/85090

>From e4795e7c5b7a0714957353d13567e5a956515b72 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Mar 2024 11:54:17 +0000
Subject: [PATCH 1/2] [InstCombine] Add Gep + Add tests. NFC

---
 llvm/test/Transforms/InstCombine/gepadd.ll | 115 +++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/gepadd.ll

diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
new file mode 100644
index 00000000000000..328b0d0de0883e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+define ptr @add1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @sub1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @sub1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @add10(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add10(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @nooff(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @nooff(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 %n
+  ret ptr %g
+}
+
+define ptr @inbounds(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @inbounds(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+%struct.Struct = type { i32, i32, ptr }
+define ptr @struct(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @struct(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [[STRUCT_STRUCT:%.*]], ptr [[P]], i64 [[A]], i32 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, -1
+  %g = getelementptr %struct.Struct, ptr %p, i64 %a, i32 1
+  ret ptr %g
+}
+
+define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @lessargs(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x i32]]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}
+
+define ptr @twice(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @twice(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %b = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr %p, i64 0, i64 %a, i64 1, i64 %b, i64 -3
+  ret ptr %g
+}
+
+define ptr @simpler(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @simpler(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 8
+  %g = getelementptr [16 x [8 x i32]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}

>From 67c1d5236fe9b33531e2cb6676bdaf65e6caa4db Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 28 Apr 2024 18:46:01 +0100
Subject: [PATCH 2/2] [InstCombine] Try to fold add into GEP

This attempts to GEP p, (x + C1), C2 with GEP p, x, C2+C1*S, removing the need
for the add by folding it into the following constant offset in the GEP. It
does not attempt to alter inbounds geps, to avoid the need to drop the
inbounds.

Example proof using a much reduced pointer address space:
https://alive2.llvm.org/ce/z/hG8xfF
---
 .../InstCombine/InstructionCombining.cpp      | 33 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/gepadd.ll    | 22 ++++---------
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 58b2d8e9dec1c3..ebc3b7e1388571 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3002,6 +3002,39 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
+  // Try to replace GEP p, (x + C1), C2 with GEP p, x, C2+C1*S
+  // Note this can be removed once geps are canonicalized to single indices.
+  if (GEP.getNumIndices() > 1) {
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end() - 1; I != E;
+         ++I, ++GTI) {
+      if (!GTI.isSequential())
+        break;
+      Value *X;
+      const APInt *C1, *C2;
+      User::op_iterator Next = std::next(I);
+      if (match(I->get(), m_Add(m_Value(X), m_APInt(C1))) &&
+          match(Next->get(), m_APInt(C2))) {
+        TypeSize Scale1 = GTI.getSequentialElementStride(DL);
+        if (Scale1.isScalable() || !(++GTI).isSequential())
+          break;
+        TypeSize Scale2 = GTI.getSequentialElementStride(DL);
+        if (Scale2.isScalable())
+          break;
+
+        // Update the GEP instruction indices, and add Add to the worklist
+        // so that it can be DCEd.
+        Instruction *Add = cast<Instruction>(*I);
+        *I = X;
+        *Next =
+            ConstantInt::get((*Next)->getType(), *C2 + *C1 * (Scale1 / Scale2));
+        addToWorklist(Add);
+        GEP.setIsInBounds(false);
+        return &GEP;
+      }
+    }
+  }
+
   if (Instruction *R = foldSelectGEP(GEP, Builder))
     return R;
 
diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
index 328b0d0de0883e..a22f2b549acdbe 100644
--- a/llvm/test/Transforms/InstCombine/gepadd.ll
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64"
 define ptr @add1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -18,8 +17,7 @@ define ptr @add1(ptr %p, i64 %o, i64 %n) {
 define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @sub1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 -9, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = sub i64 %o, 1
@@ -30,8 +28,7 @@ define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 define ptr @add10(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add10(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 101, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 10
@@ -54,8 +51,7 @@ define ptr @nooff(ptr %p, i64 %o, i64 %n) {
 define ptr @inbounds(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @inbounds(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -79,8 +75,7 @@ define ptr @struct(ptr %p, i64 %o, i64 %n) {
 define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @lessargs(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[O]], i64 11
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -91,9 +86,7 @@ define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 define ptr @twice(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @twice(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 [[O]], i64 -13
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -105,8 +98,7 @@ define ptr @twice(ptr %p, i64 %o, i64 %n) {
 define ptr @simpler(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @simpler(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[O]], i64 65
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 8



More information about the llvm-commits mailing list