[llvm] ee6abef - [ValueTracking] Interpret GEPs as a series of adds multiplied by the related scaling factor

Wed Oct 21 15:07:18 PDT 2020

Author: Quentin Colombet
Date: 2020-10-21T15:07:04-07:00
New Revision: ee6abef5323d59b983129bf3514ef6775d1d6cd5

URL: https://github.com/llvm/llvm-project/commit/ee6abef5323d59b983129bf3514ef6775d1d6cd5
DIFF: https://github.com/llvm/llvm-project/commit/ee6abef5323d59b983129bf3514ef6775d1d6cd5.diff

LOG: [ValueTracking] Interpret GEPs as a series of adds multiplied by the related scaling factor

Prior to this patch, computeKnownBits would only try to deduce trailing zeros
bits for getelementptrs. This patch adds the logic to treat geps as a series
of add * scaling factor.

Thanks to this patch, using a gep or performing an address computation
directly "by hand" (ptrtoint followed by adds and mul followed by inttoptr)
offers the same computeKnownBits information.

Previously, the "by hand" approach would have given more information.

This is related to https://llvm.org/PR47241.

Differential Revision: https://reviews.llvm.org/D86364

Added: 
    

Modified: 
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
    llvm/test/Transforms/InstCombine/constant-fold-gep.ll
    llvm/unittests/Analysis/ValueTrackingTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 4dd536e76d06..9ecc5ce85e65 100644

--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1358,24 +1358,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
   case Instruction::GetElementPtr: {
     // Analyze all of the subscripts of this getelementptr instruction
     // to determine if we can prove known low zero bits.
-    KnownBits LocalKnown(BitWidth);
-    computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
-    unsigned TrailZ = LocalKnown.countMinTrailingZeros();
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+    // Accumulate the constant indices in a separate variable
+    // to minimize the number of calls to computeForAddSub.
+    APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
 
     gep_type_iterator GTI = gep_type_begin(I);
+    // If the inbounds keyword is not present, the offsets are added to the
+    // base address with silently-wrapping two’s complement arithmetic.
+    bool IsInBounds = cast<GEPOperator>(I)->isInBounds();
     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
       // TrailZ can only become smaller, short-circuit if we hit zero.
-      if (TrailZ == 0)
+      if (Known.isUnknown())
         break;
 
       Value *Index = I->getOperand(i);
+
+      // Handle case when index is zero.
+      Constant *CIndex = dyn_cast<Constant>(Index);
+      if (CIndex && CIndex->isZeroValue())
+        continue;
+
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // Handle struct member offset arithmetic.
 
-        // Handle case when index is vector zeroinitializer
-        Constant *CIndex = cast<Constant>(Index);
-        if (CIndex->isZeroValue())
-          continue;
+        assert(CIndex &&
+               "Access to structure field must be known at compile time");
 
         if (CIndex->getType()->isVectorTy())
           Index = CIndex->getSplatValue();
@@ -1383,26 +1391,58 @@ static void computeKnownBitsFromOperator(const Operator *I,
         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
         const StructLayout *SL = Q.DL.getStructLayout(STy);
         uint64_t Offset = SL->getElementOffset(Idx);
-        TrailZ = std::min<unsigned>(TrailZ,
-                                    countTrailingZeros(Offset));
+        AccConstIndices += Offset;
+        continue;
+      }
+
+      // Handle array index arithmetic.
+      Type *IndexedTy = GTI.getIndexedType();
+      if (!IndexedTy->isSized()) {
+        Known.resetAll();
+        break;
+      }
+
+      unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
+      KnownBits IndexBits(IndexBitWidth);
+      computeKnownBits(Index, IndexBits, Depth + 1, Q);
+      TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
+      uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize();
+      KnownBits ScalingFactor(IndexBitWidth);
+      // Multiply by current sizeof type.
+      // &A[i] == A + i * sizeof(*A[i]).
+      if (IndexTypeSize.isScalable()) {
+        // For scalable types the only thing we know about sizeof is
+        // that this is a multiple of the minimum size.
+        ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
+      } else if (IndexBits.isConstant()) {
+        APInt IndexConst = IndexBits.getConstant();
+        APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
+        IndexConst *= ScalingFactor;
+        AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
+        continue;
       } else {
-        // Handle array index arithmetic.
-        Type *IndexedTy = GTI.getIndexedType();
-        if (!IndexedTy->isSized()) {
-          TrailZ = 0;
-          break;
-        }
-        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
-        uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize();
-        LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
-        computeKnownBits(Index, LocalKnown, Depth + 1, Q);
-        TrailZ = std::min(TrailZ,
-                          unsigned(countTrailingZeros(TypeSize) +
-                                   LocalKnown.countMinTrailingZeros()));
+        ScalingFactor.Zero = ~TypeSizeInBytes;
+        ScalingFactor.One = TypeSizeInBytes;
       }
-    }
+      IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor);
+
+      // If the offsets have a 
diff erent width from the pointer, according
+      // to the language reference we need to sign-extend or truncate them
+      // to the width of the pointer.
+      IndexBits = IndexBits.sextOrTrunc(BitWidth);
 
-    Known.Zero.setLowBits(TrailZ);
+      Known = KnownBits::computeForAddSub(
+          /*Add=*/true,
+          /*NSW=*/IsInBounds, Known, IndexBits);
+    }
+    if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
+      KnownBits Index(BitWidth);
+      Index.Zero = ~AccConstIndices;
+      Index.One = AccConstIndices;
+      Known = KnownBits::computeForAddSub(
+          /*Add=*/true,
+          /*NSW=*/IsInBounds, Known, Index);
+    }
     break;
   }
   case Instruction::PHI: {

diff  --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 5a28354eb532..04f15ece02ab 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -225,7 +225,7 @@ define i32 @test_cast_gep_large_indices_as() {
 
 define i32 @test_constant_cast_gep_struct_indices_as() {
 ; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
-; CHECK-NEXT:    [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+; CHECK-NEXT:    [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 16
 ; CHECK-NEXT:    ret i32 [[Y]]
 ;
   %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2

diff  --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
index 277a900a48b7..cd49353f707b 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -15,20 +15,20 @@ define void @frob() {
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 16
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
@@ -49,9 +49,9 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8
-  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8
-  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
@@ -98,3 +98,22 @@ entry:
 
   ret i16 %E
 }
+
+; Check that we improve the alignment information.
+; The base pointer is 16-byte aligned and we access the field at
+; an offset of 8-byte.
+; Every element in the @CallerInfos array is 16-byte aligned so
+; any access from the following gep is 8-byte aligned.
+%struct.CallerInfo = type { i8*, i32 }
+ at CallerInfos = global [128 x %struct.CallerInfo] zeroinitializer, align 16
+
+define i32 @test_gep_in_struct(i64 %idx) {
+; CHECK-LABEL: @test_gep_in_struct(
+; CHECK-NEXT:    [[NS7:%.*]] = getelementptr inbounds [128 x %struct.CallerInfo], [128 x %struct.CallerInfo]* @CallerInfos, i64 0, i64 [[IDX:%.*]], i32 1
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[NS7]], align 8
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %NS7 = getelementptr inbounds [128 x %struct.CallerInfo], [128 x %struct.CallerInfo]* @CallerInfos, i64 0, i64 %idx, i32 1
+  %res = load i32, i32* %NS7, align 1
+  ret i32 %res
+}

diff  --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index f2cafa6aa510..5b5c3a36677e 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -1208,6 +1208,66 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRangeNoOverlap) {
   EXPECT_EQ(Known.getMaxValue(), 575);
 }
 
+TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRange) {
+  parseAssembly(
+      "define void @test(i64* %p) {\n"
+      "  %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n"
+      "  %APtr = inttoptr i64 %A to float*"
+      "  %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n"
+      "  %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n"
+      "  call void @llvm.assume(i1 %c)\n"
+      "  ret void\n"
+      "}\n"
+      "declare void @llvm.assume(i1)\n");
+  AssumptionCache AC(*F);
+  KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC,
+                                     F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 0u);
+  Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512");
+  Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC,
+                           F->front().getTerminator());
+  // We know of one less zero because 512 may have produced a 1 that
+  // got carried all the way to the first trailing zero.
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1) << 1);
+  EXPECT_EQ(Known.One.getZExtValue(), 0u);
+  // The known range is not precise given computeKnownBits works
+  // with the masks of zeros and ones, not the ranges.
+  EXPECT_EQ(Known.getMinValue(), 0u);
+  EXPECT_EQ(Known.getMaxValue(), 131071);
+}
+
+// 4*128 + [32, 64) doesn't produce overlapping bits.
+// Make sure we get all the individual bits properly.
+// This test is useful to check that we account for the scaling factor
+// in the gep. Indeed, gep float, [32,64), 128 is not 128 + [32,64).
+TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRangeNoOverlap) {
+  parseAssembly(
+      "define void @test(i64* %p) {\n"
+      "  %A = load i64, i64* %p, !range !{i64 32, i64 64}\n"
+      "  %APtr = inttoptr i64 %A to float*"
+      "  %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n"
+      "  %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n"
+      "  call void @llvm.assume(i1 %c)\n"
+      "  ret void\n"
+      "}\n"
+      "declare void @llvm.assume(i1)\n");
+  AssumptionCache AC(*F);
+  KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC,
+                                     F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 32u);
+  Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512");
+  Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC,
+                           F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u);
+  // The known range is not precise given computeKnownBits works
+  // with the masks of zeros and ones, not the ranges.
+  EXPECT_EQ(Known.getMinValue(), 544);
+  EXPECT_EQ(Known.getMaxValue(), 575);
+}
+
 class IsBytewiseValueTest : public ValueTrackingTest,
                             public ::testing::WithParamInterface<
                                 std::pair<const char *, const char *>> {