[llvm] [InstCombine] Try to fold add into GEP x, C (PR #85090)

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 13 07:44:29 PDT 2024


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/85090

This attempts to GEP p, (x + C1), C2 with GEP p, x, C2+C1*S, removing the need
for the add by folding it into the following constant offset in the GEP. It
does not attempt to alter inbounds geps, to avoid the need to drop the
inbounds.

Example proof using a much reduced pointer address space:
https://alive2.llvm.org/ce/z/hG8xfF

>From 4046676f26be1d2ba250e89f6f3af020ec814700 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Mar 2024 11:54:17 +0000
Subject: [PATCH 1/2] [InstCombine] Add Gep + Add tests. NFC

---
 llvm/test/Transforms/InstCombine/gepadd.ll | 115 +++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/gepadd.ll

diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
new file mode 100644
index 00000000000000..328b0d0de0883e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+define ptr @add1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @sub1(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @sub1(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @add10(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @add10(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+define ptr @nooff(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @nooff(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 10
+  %g = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 %n
+  ret ptr %g
+}
+
+define ptr @inbounds(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @inbounds(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr inbounds [10 x [10 x [10 x [10 x i32]]]], ptr %p, i64 0, i64 %a, i64 1, i64 -6, i64 %n
+  ret ptr %g
+}
+
+%struct.Struct = type { i32, i32, ptr }
+define ptr @struct(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @struct(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [[STRUCT_STRUCT:%.*]], ptr [[P]], i64 [[A]], i32 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, -1
+  %g = getelementptr %struct.Struct, ptr %p, i64 %a, i32 1
+  ret ptr %g
+}
+
+define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @lessargs(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x i32]]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}
+
+define ptr @twice(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @twice(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 1
+  %b = sub i64 %o, 1
+  %g = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr %p, i64 0, i64 %a, i64 1, i64 %b, i64 -3
+  ret ptr %g
+}
+
+define ptr @simpler(ptr %p, i64 %o, i64 %n) {
+; CHECK-LABEL: define ptr @simpler(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    ret ptr [[G]]
+;
+  %a = add i64 %o, 8
+  %g = getelementptr [16 x [8 x i32]], ptr %p, i64 0, i64 %a, i64 1
+  ret ptr %g
+}

>From adeb2874f9cf45e9125438397d15248ad3b0051f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Mar 2024 14:37:06 +0000
Subject: [PATCH 2/2] [InstCombine] Try to fold add into GEP

This attempts to GEP p, (x + C1), C2 with GEP p, x, C2+C1*S, removing the need
for the add by folding it into the following constant offset in the GEP. It
does not attempt to alter inbounds geps, to avoid the need to drop the
inbounds.

Example proof using a much reduced pointer address space:
https://alive2.llvm.org/ce/z/hG8xfF
---
 .../InstCombine/InstructionCombining.cpp      | 31 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/gepadd.ll    | 19 ++++--------
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1688005de2104d..7932822c0f7464 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2893,6 +2893,37 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         }
       }
     }
+
+    // Try to replace GEP p, (x + C1), C2 with GEP p, x, C2+C1*S
+    if (GEP.getNumIndices() > 1) {
+      gep_type_iterator GTI = gep_type_begin(GEP);
+      for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end() - 1;
+           I != E; ++I, ++GTI) {
+        if (!GTI.isSequential())
+          break;
+        Value *X;
+        const APInt *C1, *C2;
+        User::op_iterator Next = std::next(I);
+        if (match(I->get(), m_Add(m_Value(X), m_APInt(C1))) &&
+            match(Next->get(), m_APInt(C2))) {
+          TypeSize Scale1 = GTI.getSequentialElementStride(DL);
+          if (Scale1.isScalable() || !(++GTI).isSequential())
+            break;
+          TypeSize Scale2 = GTI.getSequentialElementStride(DL);
+          if (Scale2.isScalable())
+            break;
+
+          // Update the GEP instruction indices, and add Add to the worklist
+          // so that it can be DCEd.
+          Instruction *Add = cast<Instruction>(*I);
+          *I = X;
+          *Next = ConstantInt::get((*Next)->getType(),
+                                    *C2 + *C1 * (Scale1 / Scale2));
+          addToWorklist(Add);
+          return &GEP;
+        }
+      }
+    }
   }
 
   if (Instruction *R = foldSelectGEP(GEP, Builder))
diff --git a/llvm/test/Transforms/InstCombine/gepadd.ll b/llvm/test/Transforms/InstCombine/gepadd.ll
index 328b0d0de0883e..895bb94ca99576 100644
--- a/llvm/test/Transforms/InstCombine/gepadd.ll
+++ b/llvm/test/Transforms/InstCombine/gepadd.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64"
 define ptr @add1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -18,8 +17,7 @@ define ptr @add1(ptr %p, i64 %o, i64 %n) {
 define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @sub1(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 -9, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = sub i64 %o, 1
@@ -30,8 +28,7 @@ define ptr @sub1(ptr %p, i64 %o, i64 %n) {
 define ptr @add10(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @add10(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 10
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 -6, i64 [[N]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x i32]]]], ptr [[P]], i64 0, i64 [[O]], i64 101, i64 -6, i64 [[N]]
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 10
@@ -79,8 +76,7 @@ define ptr @struct(ptr %p, i64 %o, i64 %n) {
 define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @lessargs(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x i32]]], ptr [[P]], i64 0, i64 [[O]], i64 11
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -91,9 +87,7 @@ define ptr @lessargs(ptr %p, i64 %o, i64 %n) {
 define ptr @twice(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @twice(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 1
-; CHECK-NEXT:    [[B:%.*]] = add i64 [[O]], -1
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[A]], i64 1, i64 [[B]], i64 -3
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [10 x [10 x [10 x [10 x [10 x i32]]]]], ptr [[P]], i64 0, i64 [[O]], i64 11, i64 [[O]], i64 -13
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 1
@@ -105,8 +99,7 @@ define ptr @twice(ptr %p, i64 %o, i64 %n) {
 define ptr @simpler(ptr %p, i64 %o, i64 %n) {
 ; CHECK-LABEL: define ptr @simpler(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[O:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[O]], 8
-; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[A]], i64 1
+; CHECK-NEXT:    [[G:%.*]] = getelementptr [16 x [8 x i32]], ptr [[P]], i64 0, i64 [[O]], i64 65
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
   %a = add i64 %o, 8



More information about the llvm-commits mailing list