[llvm] 336d728 - [InstCombine] Preserve inbounds when folding select of GEP

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 01:01:34 PDT 2023


Author: Nikita Popov
Date: 2023-07-07T09:56:33+02:00
New Revision: 336d7281ad61fbd4a14fa2ef3d2bde5a63d10417

URL: https://github.com/llvm/llvm-project/commit/336d7281ad61fbd4a14fa2ef3d2bde5a63d10417
DIFF: https://github.com/llvm/llvm-project/commit/336d7281ad61fbd4a14fa2ef3d2bde5a63d10417.diff

LOG: [InstCombine] Preserve inbounds when folding select of GEP

The select base, (gep base, offset) to gep base, select (0, offset)
fold used to drop inbounds, because the gep base, 0 this introduces
might not be inbounds. After the semantics change in D154051, such
a GEP is always considered inbounds, in which allows us to preserve
the flag here.

As the PhaseOrdering test demonstrates, this can result in major
optimization improvements in some cases.

Differential Revision: https://reviews.llvm.org/D154055

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/test/Transforms/InstCombine/select-gep.ll
    llvm/test/Transforms/InstCombine/stpncpy-1.ll
    llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7c12aa41195b25..8352174d52239b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3400,6 +3400,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
       std::swap(NewT, NewF);
     Value *NewSI =
         Builder.CreateSelect(CondVal, NewT, NewF, SI.getName() + ".idx", &SI);
+    if (Gep->isInBounds())
+      return GetElementPtrInst::CreateInBounds(ElementType, Ptr, {NewSI});
     return GetElementPtrInst::Create(ElementType, Ptr, {NewSI});
   };
   if (auto *TrueGep = dyn_cast<GetElementPtrInst>(TrueVal))

diff  --git a/llvm/test/Transforms/InstCombine/select-gep.ll b/llvm/test/Transforms/InstCombine/select-gep.ll
index f3a906139bdf31..ad2b029c517644 100644
--- a/llvm/test/Transforms/InstCombine/select-gep.ll
+++ b/llvm/test/Transforms/InstCombine/select-gep.ll
@@ -75,7 +75,7 @@ define ptr @test2a(ptr %p, i64 %x, i64 %y) {
 ; CHECK-LABEL: @test2a(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SELECT_IDX:%.*]] = select i1 [[CMP]], i64 [[X]], i64 0
-; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
 ; CHECK-NEXT:    ret ptr [[SELECT]]
 ;
   %gep = getelementptr inbounds i32, ptr %p, i64 %x
@@ -89,7 +89,7 @@ define ptr @test2b(ptr %p, i64 %x, i64 %y) {
 ; CHECK-LABEL: @test2b(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SELECT_IDX:%.*]] = select i1 [[CMP]], i64 0, i64 [[X]]
-; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[SELECT_IDX]]
 ; CHECK-NEXT:    ret ptr [[SELECT]]
 ;
   %gep = getelementptr inbounds i32, ptr %p, i64 %x
@@ -104,7 +104,7 @@ define ptr @test2c(ptr %p, i64 %x, i64 %y) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 0, i64 6
-; CHECK-NEXT:    [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
+; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
 ; CHECK-NEXT:    ret ptr [[SEL]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 %x
@@ -120,7 +120,7 @@ define ptr @test2d(ptr %p, i64 %x, i64 %y) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 6, i64 0
-; CHECK-NEXT:    [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
+; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
 ; CHECK-NEXT:    ret ptr [[SEL]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 %x
@@ -231,7 +231,7 @@ define ptr @test6(ptr %p, i64 %x, i64 %y) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 [[Y]], i64 0
-; CHECK-NEXT:    [[SEL:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[SEL_IDX]]
+; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 [[SEL_IDX]]
 ; CHECK-NEXT:    call void @use_i32p(ptr [[GEP1]])
 ; CHECK-NEXT:    ret ptr [[SEL]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
index 9f9442705d126b..15eee6c1019353 100644
--- a/llvm/test/Transforms/InstCombine/stpncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
@@ -47,8 +47,8 @@ define void @fold_stpncpy_overlap(ptr %dst, i64 %n) {
 ; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
-; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;
 ; Fold stpncpy(D, D, 0) to just D.
@@ -398,8 +398,8 @@ define void @fold_stpncpy_s(ptr %dst, ptr %src) {
 ; ANY-NEXT:    store i8 [[STXNCPY_CHAR0]], ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
-; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;
 ; Fold stpncpy(D, S, 0) to just D.

diff  --git a/llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll b/llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll
index a2d56f2f279259..545e203c5e2c95 100644
--- a/llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll
@@ -52,22 +52,9 @@ bb:
 
 define i32 @using_malloc() {
 ; CHECK-LABEL: define i32 @using_malloc
-; CHECK-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () local_unnamed_addr #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[ALLOC:%.*]] = tail call dereferenceable_or_null(64) ptr @malloc(i64 64)
-; CHECK-NEXT:    store i32 1, ptr [[ALLOC]], align 4
-; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 1
-; CHECK-NEXT:    store i32 2, ptr [[GETELEMENTPTR]], align 4
-; CHECK-NEXT:    [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 2
-; CHECK-NEXT:    store i32 3, ptr [[GETELEMENTPTR1]], align 4
-; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 1
-; CHECK-NEXT:    [[LOAD_I:%.*]] = load i32, ptr [[ALLOC]], align 4
-; CHECK-NEXT:    [[SPEC_SELECT_I_1:%.*]] = getelementptr i32, ptr [[ALLOC]], i64 2
-; CHECK-NEXT:    [[LOAD_I_1:%.*]] = load i32, ptr [[SPEC_SELECT_I]], align 4
-; CHECK-NEXT:    [[ADD_I_1:%.*]] = add i32 [[LOAD_I_1]], [[LOAD_I]]
-; CHECK-NEXT:    [[LOAD_I_2:%.*]] = load i32, ptr [[SPEC_SELECT_I_1]], align 4
-; CHECK-NEXT:    [[ADD_I_2:%.*]] = add i32 [[LOAD_I_2]], [[ADD_I_1]]
-; CHECK-NEXT:    ret i32 [[ADD_I_2]]
+; CHECK-NEXT:    ret i32 6
 ;
 bb:
   %alloc = call dereferenceable_or_null(64) ptr @malloc(i64 64)


        


More information about the llvm-commits mailing list