[llvm] 1ec0cc0 - [InstCombine][SVE] Fix visitExtractElementInst for scalable type.

Huihui Zhang via llvm-commits llvm-commits at lists.llvm.org
Thu May 7 13:04:07 PDT 2020


Author: Huihui Zhang
Date: 2020-05-07T13:03:52-07:00
New Revision: 1ec0cc0f02432ef640173b319a9c3b13fc850d33

URL: https://github.com/llvm/llvm-project/commit/1ec0cc0f02432ef640173b319a9c3b13fc850d33
DIFF: https://github.com/llvm/llvm-project/commit/1ec0cc0f02432ef640173b319a9c3b13fc850d33.diff

LOG: [InstCombine][SVE] Fix visitExtractElementInst for scalable type.

Summary:
This patch fix the following issues with visitExtractElementInst:

      1. Restrict VectorUtils::findScalarElement to fixed-length vector.
         For scalable type, the number of elements in shuffle mask is
         unknown at compile-time.
      2. Fix out-of-range calculation for fixed-length vector.
      3. Skip scalable type when analysis rely on fixed number of elements.
      4. Add unit tests to check functionality of extractelement for scalable type.

Reviewers: sdesmalen, efriedma, spatel, nikic

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78267

Added: 
    llvm/test/Transforms/InstCombine/vscale_extractelement.ll

Modified: 
    llvm/lib/Analysis/VectorUtils.cpp
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index a96686aaf1c5..f16b04e4402a 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -288,9 +288,11 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
     return findScalarElement(III->getOperand(0), EltNo);
   }
 
-  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+  // Restrict the following transformation to fixed-length vector.
+  if (SVI && isa<FixedVectorType>(SVI->getType())) {
     unsigned LHSWidth =
-        cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+        cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
     int InEl = SVI->getMaskValue(EltNo);
     if (InEl < 0)
       return UndefValue::get(VTy->getElementType());

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index feb618383e74..b2dc7259e139 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -332,14 +332,18 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // find a previously computed scalar that was inserted into the vector.
   auto *IndexC = dyn_cast<ConstantInt>(Index);
   if (IndexC) {
-    unsigned NumElts = EI.getVectorOperandType()->getNumElements();
+    ElementCount EC = EI.getVectorOperandType()->getElementCount();
+    unsigned NumElts = EC.Min;
 
     // InstSimplify should handle cases where the index is invalid.
-    if (!IndexC->getValue().ule(NumElts))
+    // For fixed-length vector, it's invalid to extract out-of-range element.
+    if (!EC.Scalable && IndexC->getValue().uge(NumElts))
       return nullptr;
 
     // This instruction only demands the single element from the input vector.
-    if (NumElts != 1) {
+    // Skip for scalable type, the number of elements is unknown at
+    // compile-time.
+    if (!EC.Scalable && NumElts != 1) {
       // If the input vector has a single use, simplify it based on this use
       // property.
       if (SrcVec->hasOneUse()) {
@@ -417,11 +421,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
       // If this is extracting an element from a shufflevector, figure out where
       // it came from and extract from the appropriate input element instead.
-      if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
-        int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
+      // Restrict the following transformation to fixed-length vector.
+      if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
+        int SrcIdx =
+            SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
         Value *Src;
-        unsigned LHSWidth =
-            cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+        unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
+                                ->getNumElements();
 
         if (SrcIdx < 0)
           return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -432,9 +438,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           Src = SVI->getOperand(1);
         }
         Type *Int32Ty = Type::getInt32Ty(EI.getContext());
-        return ExtractElementInst::Create(Src,
-                                          ConstantInt::get(Int32Ty,
-                                                           SrcIdx, false));
+        return ExtractElementInst::Create(
+            Src, ConstantInt::get(Int32Ty, SrcIdx, false));
       }
     } else if (auto *CI = dyn_cast<CastInst>(I)) {
       // Canonicalize extractelement(cast) -> cast(extractelement).

diff  --git a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll
new file mode 100644
index 000000000000..397767dcfd91
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @extractelement_in_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @extractelement_in_range(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = extractelement <vscale x 4 x i32> %a, i64 1
+  ret i32 %r
+}
+
+define i32 @extractelement_maybe_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @extractelement_maybe_out_of_range(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i64 4
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = extractelement <vscale x 4 x i32> %a, i64 4
+  ret i32 %r
+}
+
+define i32 @extractelement_bitcast(float %f) {
+; CHECK-LABEL: @extractelement_bitcast(
+; CHECK-NEXT:    [[R:%.*]] = bitcast float [[F:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %vec_float = insertelement <vscale x 4 x float> undef, float %f, i32 0
+  %vec_int = bitcast <vscale x 4 x float> %vec_float to <vscale x 4 x i32>
+  %r = extractelement <vscale x 4 x i32> %vec_int, i32 0
+  ret i32 %r
+}
+
+define i8 @extractelement_bitcast_to_trunc(<vscale x 2 x i32> %a, i32 %x) {
+; CHECK-LABEL: @extractelement_bitcast_to_trunc(
+; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %vec = insertelement <vscale x 2 x i32> %a, i32 %x, i32 1
+  %vec_cast = bitcast <vscale x 2 x i32> %vec to <vscale x 8 x i8>
+  %r = extractelement <vscale x 8 x i8> %vec_cast, i32 4
+  ret i8 %r
+}
+
+; TODO: Instcombine could remove the insert.
+define i8 @extractelement_bitcast_wrong_insert(<vscale x 2 x i32> %a, i32 %x) {
+; CHECK-LABEL: @extractelement_bitcast_wrong_insert(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <vscale x 2 x i32> [[A:%.*]], i32 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast <vscale x 2 x i32> [[VEC]] to <vscale x 8 x i8>
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 8 x i8> [[VEC_CAST]], i32 2
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %vec = insertelement <vscale x 2 x i32> %a, i32 %x, i32 1 ; <- This insert could be removed.
+  %vec_cast = bitcast <vscale x 2 x i32> %vec to <vscale x 8 x i8>
+  %r = extractelement <vscale x 8 x i8> %vec_cast, i32 2
+  ret i8 %r
+}
+
+; TODO: Instcombine could optimize to return %v.
+define i32 @extractelement_shuffle_in_range(i32 %v) {
+; CHECK-LABEL: @extractelement_shuffle_in_range(
+; CHECK-NEXT:    [[IN:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %in = insertelement <vscale x 4 x i32> undef, i32 %v, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %r = extractelement <vscale x 4 x i32> %splat, i32 1
+  ret i32 %r
+}
+
+define i32 @extractelement_shuffle_maybe_out_of_range(i32 %v) {
+; CHECK-LABEL: @extractelement_shuffle_maybe_out_of_range(
+; CHECK-NEXT:    [[IN:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 4
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %in = insertelement <vscale x 4 x i32> undef, i32 %v, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %r = extractelement <vscale x 4 x i32> %splat, i32 4
+  ret i32 %r
+}
+
+define i32 @extractelement_shuffle_invalid_index(i32 %v) {
+; CHECK-LABEL: @extractelement_shuffle_invalid_index(
+; CHECK-NEXT:    [[IN:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %in = insertelement <vscale x 4 x i32> undef, i32 %v, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %r = extractelement <vscale x 4 x i32> %splat, i32 -1
+  ret i32 %r
+}
+
+
+define i32 @extractelement_shuffle_symbolic_index(i32 %v, i32 %idx) {
+; CHECK-LABEL: @extractelement_shuffle_symbolic_index(
+; CHECK-NEXT:    [[IN:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[IN]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[SPLAT]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %in = insertelement <vscale x 4 x i32> undef, i32 %v, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %r = extractelement <vscale x 4 x i32> %splat, i32 %idx
+  ret i32 %r
+}
+
+define <vscale x 4 x i32> @extractelement_insertelement_same_positions(<vscale x 4 x i32> %vec) {
+; CHECK-LABEL: @extractelement_insertelement_same_positions(
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[VEC:%.*]]
+;
+  %vec.e0 = extractelement <vscale x 4 x i32> %vec, i32 0
+  %vec.e1 = extractelement <vscale x 4 x i32> %vec, i32 1
+  %vec.e2 = extractelement <vscale x 4 x i32> %vec, i32 2
+  %vec.e3 = extractelement <vscale x 4 x i32> %vec, i32 3
+  %1 = insertelement <vscale x 4 x i32> %vec, i32 %vec.e0, i32 0
+  %2 = insertelement <vscale x 4 x i32> %1, i32 %vec.e1, i32 1
+  %3 = insertelement <vscale x 4 x i32> %2, i32 %vec.e2, i32 2
+  %4 = insertelement <vscale x 4 x i32> %3, i32 %vec.e3, i32 3
+  ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 4 x i32> @extractelement_insertelement_
diff _positions(<vscale x 4 x i32> %vec) {
+; CHECK-LABEL: @extractelement_insertelement_
diff _positions(
+; CHECK-NEXT:    [[VEC_E0:%.*]] = extractelement <vscale x 4 x i32> [[VEC:%.*]], i32 4
+; CHECK-NEXT:    [[VEC_E1:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 5
+; CHECK-NEXT:    [[VEC_E2:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 6
+; CHECK-NEXT:    [[VEC_E3:%.*]] = extractelement <vscale x 4 x i32> [[VEC]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[VEC_E0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[VEC_E1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <vscale x 4 x i32> [[TMP2]], i32 [[VEC_E2]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <vscale x 4 x i32> [[TMP3]], i32 [[VEC_E3]], i32 3
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+;
+  %vec.e0 = extractelement <vscale x 4 x i32> %vec, i32 4
+  %vec.e1 = extractelement <vscale x 4 x i32> %vec, i32 5
+  %vec.e2 = extractelement <vscale x 4 x i32> %vec, i32 6
+  %vec.e3 = extractelement <vscale x 4 x i32> %vec, i32 7
+  %1 = insertelement <vscale x 4 x i32> %vec, i32 %vec.e0, i32 0
+  %2 = insertelement <vscale x 4 x i32> %1, i32 %vec.e1, i32 1
+  %3 = insertelement <vscale x 4 x i32> %2, i32 %vec.e2, i32 2
+  %4 = insertelement <vscale x 4 x i32> %3, i32 %vec.e3, i32 3
+  ret <vscale x 4 x i32> %4
+}


        


More information about the llvm-commits mailing list