[llvm] 8b30067 - [InstCombine] improve fold of pointer differences

Mon Sep 7 12:54:53 PDT 2020

Author: Sanjay Patel
Date: 2020-09-07T15:54:32-04:00
New Revision: 8b300679192b317aa91a28e781fcf60d4416b0d6

URL: https://github.com/llvm/llvm-project/commit/8b300679192b317aa91a28e781fcf60d4416b0d6
DIFF: https://github.com/llvm/llvm-project/commit/8b300679192b317aa91a28e781fcf60d4416b0d6.diff

LOG: [InstCombine] improve fold of pointer differences

This was supposed to be an NFC cleanup, but there's
a real logic difference (did not drop 'nsw') visible
in some tests in addition to an efficiency improvement.

This is because in the case where we have 2 GEPs,
the code was *always* swapping the operands and
negating the result. But if we have 2 GEPs, we
should *never* need swapping/negation AFAICT.

This is part of improving flags propagation noticed
with PR47430.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/sub-gep.ll
    llvm/test/Transforms/InstCombine/sub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 5cf6eb2a885a..5ce32bc592d0 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1615,43 +1615,27 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
   // this.
   bool Swapped = false;
   GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
+  if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) {
+    std::swap(LHS, RHS);
+    Swapped = true;
+  }
 
-  // For now we require one side to be the base pointer "A" or a constant
-  // GEP derived from it.
-  if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+  // Require at least one GEP with a common base pointer on both sides.
+  if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
     // (gep X, ...) - X
     if (LHSGEP->getOperand(0) == RHS) {
       GEP1 = LHSGEP;
-      Swapped = false;
-    } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+    } else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
       // (gep X, ...) - (gep X, ...)
       if (LHSGEP->getOperand(0)->stripPointerCasts() ==
-            RHSGEP->getOperand(0)->stripPointerCasts()) {
-        GEP2 = RHSGEP;
+          RHSGEP->getOperand(0)->stripPointerCasts()) {
         GEP1 = LHSGEP;
-        Swapped = false;
-      }
-    }
-  }
-
-  if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
-    // X - (gep X, ...)
-    if (RHSGEP->getOperand(0) == LHS) {
-      GEP1 = RHSGEP;
-      Swapped = true;
-    } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
-      // (gep X, ...) - (gep X, ...)
-      if (RHSGEP->getOperand(0)->stripPointerCasts() ==
-            LHSGEP->getOperand(0)->stripPointerCasts()) {
-        GEP2 = LHSGEP;
-        GEP1 = RHSGEP;
-        Swapped = true;
+        GEP2 = RHSGEP;
       }
     }
   }
 
   if (!GEP1)
-    // No GEP found.
     return nullptr;
 
   if (GEP2) {

diff  --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index fcb24eec349a..f31eeb46d882 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -124,10 +124,10 @@ define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) {
 ; The sub and shl here could be nuw, but this is harder to handle.
 define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) {
 ; CHECK-LABEL: @test_inbounds_nuw_two_gep(
+; CHECK-NEXT:    [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2
 ; CHECK-NEXT:    [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4
-; CHECK-NEXT:    [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]]
-; CHECK-NEXT:    ret i64 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = add i64 [[P1_IDX_NEG]], [[P2_IDX]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx
   %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2

diff  --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index dbe1631226d6..437d8f8c5c02 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -505,9 +505,9 @@ define i64 @test24b(i8* %P, i64 %A){
 
 define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
-; CHECK-NEXT:    [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84
-; CHECK-NEXT:    ret i64 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A
   %C = ptrtoint i16* %B to i64
@@ -520,9 +520,9 @@ define i64 @test25(i8* %P, i64 %A){
 define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
 ; CHECK-LABEL: @test25_as1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16
-; CHECK-NEXT:    [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84
-; CHECK-NEXT:    ret i16 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84
+; CHECK-NEXT:    ret i16 [[GEPDIFF]]
 ;
   %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
   %C = ptrtoint i16 addrspace(1)* %B to i16
@@ -825,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) {
 
 define i64 @test29(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test29(
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
-; CHECK-NEXT:    ret i64 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i
   %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
@@ -838,9 +838,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) {
 
 define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test30(
-; CHECK-NEXT:    [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]]
-; CHECK-NEXT:    ret i64 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
@@ -853,9 +853,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 
 define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
 ; CHECK-LABEL: @test30_as1(
-; CHECK-NEXT:    [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]]
-; CHECK-NEXT:    ret i16 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i16 [[GEPDIFF]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
   %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
@@ -1234,10 +1234,10 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
 ; "%sub = i64 %i, %j, ret i64 %sub"
 ; gep1 and gep2 have only one use
 ; CHECK-LABEL: @test58(
-; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200
 ; CHECK-NEXT:    [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200
-; CHECK-NEXT:    [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]]
-; CHECK-NEXT:    ret i64 [[GEPDIFF_NEG]]
+; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i
   %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j