[llvm] 5c1d1a6 - [InstCombine][SVE] Fix visitGetElementPtrInst for scalable type.

Tue Apr 14 12:38:46 PDT 2020

Author: Huihui Zhang
Date: 2020-04-14T12:38:32-07:00
New Revision: 5c1d1a62e3754e5f3aa3f5e5e03d13acd9f973e4

URL: https://github.com/llvm/llvm-project/commit/5c1d1a62e3754e5f3aa3f5e5e03d13acd9f973e4
DIFF: https://github.com/llvm/llvm-project/commit/5c1d1a62e3754e5f3aa3f5e5e03d13acd9f973e4.diff

LOG: [InstCombine][SVE] Fix visitGetElementPtrInst for scalable type.

Summary:
This patch fix the following issues in InstCombiner::visitGetElementPtrInst

    1. Skip for scalable type if transformation requires fixed size number of
    vector element.
    2. Skip for scalable type if transformation relies on compile-time known type
    alloc size.
    3. Use VectorType::getElementCount when scalable property is used to construct
    new VectorType.
    4. Use TypeSize::getKnownMinSize when minimal size of a scalable type is valid to determine GEP 'inbounds'.
    5. Explicitly call TypeSize::getFixedSize to avoid implicit type conversion to uint64_t.

Reviewers: sdesmalen, efriedma, spatel, ctetreau

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78081

Added: 
    llvm/test/Transforms/InstCombine/vscale_gep.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index cd5126884dad..4d286f19f919 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1854,12 +1854,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
   Type *GEPType = GEP.getType();
   Type *GEPEltType = GEP.getSourceElementType();
+  bool IsGEPSrcEleScalable =
+      GEPEltType->isVectorTy() && cast<VectorType>(GEPEltType)->isScalable();
   if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
   // For vector geps, use the generic demanded vector support.
-  if (auto *GEPVTy = dyn_cast<VectorType>(GEP.getType())) {
-    auto VWidth = GEPVTy->getNumElements();
+  // Skip if GEP return type is scalable. The number of elements is unknown at
+  // compile-time.
+  if (GEPType->isVectorTy() && !cast<VectorType>(GEPType)->isScalable()) {
+    auto VWidth = cast<VectorType>(GEPType)->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
     if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
@@ -1896,7 +1900,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     Type *NewIndexType =
         IndexTy->isVectorTy()
             ? VectorType::get(NewScalarIndexTy,
-                              cast<VectorType>(IndexTy)->getNumElements())
+                              cast<VectorType>(IndexTy)->getElementCount())
             : NewScalarIndexTy;
 
     // If the element type has zero size then any index over it is equivalent
@@ -2146,11 +2150,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                                              GEP.getName());
   }
 
-  if (GEP.getNumIndices() == 1) {
+  // Skip if GEP source element type is scalable. The type alloc size is unknown
+  // at compile-time.
+  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
     unsigned AS = GEP.getPointerAddressSpace();
     if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
         DL.getIndexSizeInBits(AS)) {
-      uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType);
+      uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
 
       bool Matched = false;
       uint64_t C;
@@ -2263,10 +2269,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           }
         }
       }
-    } else if (GEP.getNumOperands() == 2) {
-      // Transform things like:
-      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
-      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+    } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
+      // Skip if GEP source element type is scalable. The type alloc size is
+      // unknown at compile-time.
+      // Transform things like: %t = getelementptr i32*
+      // bitcast ([2 x i32]* %str to i32*), i32 %V into:  %t1 = getelementptr [2
+      // x i32]* %str, i32 0, i32 %V; bitcast
       if (StrippedPtrEltTy->isArrayTy() &&
           DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
               DL.getTypeAllocSize(GEPEltType)) {
@@ -2290,8 +2298,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
         // Check that changing the type amounts to dividing the index by a scale
         // factor.
-        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
-        uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
+        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
+        uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
         if (ResSize && SrcSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2330,9 +2338,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           StrippedPtrEltTy->isArrayTy()) {
         // Check that changing to the array element type amounts to dividing the
         // index by a scale factor.
-        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
+        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
         uint64_t ArrayEltSize =
-            DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
+            DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
+                .getFixedSize();
         if (ResSize && ArrayEltSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2480,7 +2489,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
       if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
           BasePtrOffset.isNonNegative()) {
-        APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
+        APInt AllocSize(
+            IdxWidth,
+            DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
         if (BasePtrOffset.ule(AllocSize)) {
           return GetElementPtrInst::CreateInBounds(
               GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),

diff  --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll
new file mode 100644
index 000000000000..65bede711c42
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; This test is used to verify we are not crashing at Assertion `CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!".
+define <vscale x 2 x i8*> @gep_index_type_is_scalable(i8* %p) {
+; CHECK-LABEL: @gep_index_type_is_scalable(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, i8* [[P:%.*]], <vscale x 2 x i64> undef
+; CHECK-NEXT:    ret <vscale x 2 x i8*> [[GEP]]
+;
+  %gep = getelementptr i8, i8* %p, <vscale x 2 x i64> undef
+  ret <vscale x 2 x i8*> %gep
+}
+
+; This test serves to verify code changes for "GEP.getNumIndices() == 1".
+define <vscale x 4 x i32>* @gep_num_of_indices_1(<vscale x 4 x i32>* %p) {
+; CHECK-LABEL: @gep_num_of_indices_1(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1
+; CHECK-NEXT:    ret <vscale x 4 x i32>* [[GEP]]
+;
+  %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1
+  ret <vscale x 4 x i32>* %gep
+}
+
+; This test serves to verify code changes for "GEP.getNumOperands() == 2".
+define void @gep_bitcast(i8* %p) {
+; CHECK-LABEL: @gep_bitcast(
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast i8* [[P:%.*]] to <vscale x 16 x i8>*
+; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[CAST]], align 16
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[CAST]], i64 1
+; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[GEP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cast = bitcast i8* %p to <vscale x 16 x i8>*
+  %gep1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %cast, i64 0
+  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %gep1
+  %gep2 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %cast, i64 1
+  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %gep2
+  ret void
+}
+
+; These tests serve to verify code changes when underlying gep ptr is alloca.
+; This test is to verify 'inbounds' is added when it's valid to accumulate constant offset.
+define i32 @gep_alloca_inbounds_vscale_zero() {
+; CHECK-LABEL: @gep_alloca_inbounds_vscale_zero(
+; CHECK-NEXT:    [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds <vscale x 4 x i32>, <vscale x 4 x i32>* [[A]], i64 0, i64 2
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %a = alloca <vscale x 4 x i32>
+  %tmp = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i32 0, i32 2
+  %load = load i32, i32* %tmp
+  ret i32 %load
+}
+
+; This test is to verify 'inbounds' is not added when a constant offset can not be determined at compile-time.
+define i32 @gep_alloca_inbounds_vscale_nonzero() {
+; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero(
+; CHECK-NEXT:    [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[A]], i64 1, i64 2
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %a = alloca <vscale x 4 x i32>
+  %tmp = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i32 1, i32 2
+  %load = load i32, i32* %tmp
+  ret i32 %load
+}