[llvm] 31574d3 - [SVE] Fix bug in simplification of scalable vector instructions

Mon Feb 3 10:16:43 PST 2020

Author: Christopher Tetreault
Date: 2020-02-03T10:15:56-08:00
New Revision: 31574d38ac5fa4646cf01dd252a23e682402134f

URL: https://github.com/llvm/llvm-project/commit/31574d38ac5fa4646cf01dd252a23e682402134f
DIFF: https://github.com/llvm/llvm-project/commit/31574d38ac5fa4646cf01dd252a23e682402134f.diff

LOG: [SVE] Fix bug in simplification of scalable vector instructions

Summary:
* Most of the simplifications in SimplifyShuffleVectorInst depend on the
concrete value of, or the length of the mask vector. For scalable
vectors, this cannot be known at compile time.
** for these tests, detect if the vector is scalable before attempting
the transformation
* The functions ShuffleVectorInst::getMaskValue and
ShuffleVectorInst::getShuffleMask access the value of the constant mask.
However, since the length of the mask is unknown at compile time, these
function do not work for scalable vectors. Add asserts to ensure that
the input mask is not scalable

Reviewers: efriedma, sdesmalen, apazos, chrisj, huihuiz

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73555

Added: 
    llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
    llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll

Modified: 
    llvm/include/llvm/IR/Instructions.h
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/IR/ConstantFold.cpp
    llvm/lib/IR/Instructions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c3d4dc96f97e..e558d4317efc 100644

--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1060,13 +1060,13 @@ class GetElementPtrInst : public Instruction {
                                    Ptr->getType()->getPointerAddressSpace());
     // Vector GEP
     if (Ptr->getType()->isVectorTy()) {
-      unsigned NumElem = Ptr->getType()->getVectorNumElements();
-      return VectorType::get(PtrTy, NumElem);
+      ElementCount EltCount = Ptr->getType()->getVectorElementCount();
+      return VectorType::get(PtrTy, EltCount);
     }
     for (Value *Index : IdxList)
       if (Index->getType()->isVectorTy()) {
-        unsigned NumElem = Index->getType()->getVectorNumElements();
-        return VectorType::get(PtrTy, NumElem);
+        ElementCount EltCount = Index->getType()->getVectorElementCount();
+        return VectorType::get(PtrTy, EltCount);
       }
     // Scalar GEP
     return PtrTy;

diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 98f64b9edb3d..bc1b008f1c96 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4074,9 +4074,9 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
   Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
   else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
-    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+    GEPTy = VectorType::get(GEPTy, VT->getElementCount());
 
   if (isa<UndefValue>(Ops[0]))
     return UndefValue::get(GEPTy);
@@ -4445,52 +4445,66 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
     return UndefValue::get(RetTy);
 
   Type *InVecTy = Op0->getType();
-  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
-  unsigned InVecNumElts = InVecTy->getVectorNumElements();
+  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
+  ElementCount InVecEltCount = InVecTy->getVectorElementCount();
+
+  assert(MaskEltCount.Scalable == InVecEltCount.Scalable &&
+         "vscale mismatch between input vector and mask");
+
+  bool Scalable = MaskEltCount.Scalable;
 
   SmallVector<int, 32> Indices;
-  ShuffleVectorInst::getShuffleMask(Mask, Indices);
-  assert(MaskNumElts == Indices.size() &&
-         "Size of Indices not same as number of mask elements?");
-
-  // Canonicalization: If mask does not select elements from an input vector,
-  // replace that input vector with undef.
-  bool MaskSelects0 = false, MaskSelects1 = false;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Indices[i] == -1)
-      continue;
-    if ((unsigned)Indices[i] < InVecNumElts)
-      MaskSelects0 = true;
-    else
-      MaskSelects1 = true;
+  if (!Scalable) {
+    ShuffleVectorInst::getShuffleMask(Mask, Indices);
+    assert(MaskEltCount.Min == Indices.size() &&
+           "Size of Indices not same as number of mask elements?");
+  }
+
+  if (!Scalable) {
+    // Canonicalization: If mask does not select elements from an input vector,
+    // replace that input vector with undef.
+    bool MaskSelects0 = false, MaskSelects1 = false;
+    for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
+      if (Indices[i] == -1)
+        continue;
+      if ((unsigned)Indices[i] < InVecEltCount.Min)
+        MaskSelects0 = true;
+      else
+        MaskSelects1 = true;
+    }
+    if (!MaskSelects0)
+      Op0 = UndefValue::get(InVecTy);
+    if (!MaskSelects1)
+      Op1 = UndefValue::get(InVecTy);
   }
-  if (!MaskSelects0)
-    Op0 = UndefValue::get(InVecTy);
-  if (!MaskSelects1)
-    Op1 = UndefValue::get(InVecTy);
 
   auto *Op0Const = dyn_cast<Constant>(Op0);
   auto *Op1Const = dyn_cast<Constant>(Op1);
 
-  // If all operands are constant, constant fold the shuffle.
-  if (Op0Const && Op1Const)
+  // If all operands are constant, constant fold the shuffle. This
+  // transformation depends on the value of the mask which is not known at
+  // compile time for scalable vectors
+  if (!Scalable && Op0Const && Op1Const)
     return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
 
   // Canonicalization: if only one input vector is constant, it shall be the
-  // second one.
-  if (Op0Const && !Op1Const) {
+  // second one. This transformation depends on the value of the mask which
+  // is not known at compile time for scalable vectors
+  if (!Scalable && Op0Const && !Op1Const) {
     std::swap(Op0, Op1);
-    ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
+    ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
   }
 
   // A splat of an inserted scalar constant becomes a vector constant:
   // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
   // NOTE: We may have commuted above, so analyze the updated Indices, not the
   //       original mask constant.
+  // NOTE: This transformation depends on the value of the mask which is not
+  // known at compile time for scalable vectors
   Constant *C;
   ConstantInt *IndexC;
-  if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
-                                 m_ConstantInt(IndexC)))) {
+  if (!Scalable && match(Op0, m_InsertElement(m_Value(), m_Constant(C),
+                                              m_ConstantInt(IndexC)))) {
     // Match a splat shuffle mask of the insert index allowing undef elements.
     int InsertIndex = IndexC->getZExtValue();
     if (all_of(Indices, [InsertIndex](int MaskElt) {
@@ -4499,8 +4513,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
       assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
 
       // Shuffle mask undefs become undefined constant result elements.
-      SmallVector<Constant *, 16> VecC(MaskNumElts, C);
-      for (unsigned i = 0; i != MaskNumElts; ++i)
+      SmallVector<Constant *, 16> VecC(MaskEltCount.Min, C);
+      for (unsigned i = 0; i != MaskEltCount.Min; ++i)
         if (Indices[i] == -1)
           VecC[i] = UndefValue::get(C->getType());
       return ConstantVector::get(VecC);
@@ -4514,6 +4528,11 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
         OpShuf->getMask()->getSplatValue())
       return Op0;
 
+  // All remaining transformation depend on the value of the mask, which is
+  // not known at compile time for scalable vectors.
+  if (Scalable)
+    return nullptr;
+
   // Don't fold a shuffle with undef mask elements. This may get folded in a
   // better way using demanded bits or other analysis.
   // TODO: Should we allow this?
@@ -4525,7 +4544,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
   // shuffle. This handles simple identity shuffles as well as chains of
   // shuffles that may widen/narrow and/or move elements across lanes and back.
   Value *RootVec = nullptr;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
+  for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
     // Note that recursion is limited for each vector element, so if any element
     // exceeds the limit, this will fail to simplify.
     RootVec =

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 708b02fb5554..453c5cdd5410 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7224,8 +7224,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
   bool AteExtraComma = false;
   // GEP returns a vector of pointers if at least one of parameters is a vector.
   // All vector parameters should have the same vector width.
-  unsigned GEPWidth = BaseType->isVectorTy() ?
-    BaseType->getVectorNumElements() : 0;
+  ElementCount GEPWidth = BaseType->isVectorTy() ?
+    BaseType->getVectorElementCount() : ElementCount(0, false);
 
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
@@ -7237,8 +7237,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       return Error(EltLoc, "getelementptr index must be an integer");
 
     if (Val->getType()->isVectorTy()) {
-      unsigned ValNumEl = Val->getType()->getVectorNumElements();
-      if (GEPWidth && GEPWidth != ValNumEl)
+      ElementCount ValNumEl = Val->getType()->getVectorElementCount();
+      if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl)
         return Error(EltLoc,
           "getelementptr vector index has a wrong number of elements");
       GEPWidth = ValNumEl;

diff  --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 9fd7dbd085e8..cd037f0b2225 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -863,12 +863,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
 Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
                                                      Constant *V2,
                                                      Constant *Mask) {
-  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
   Type *EltTy = V1->getType()->getVectorElementType();
 
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(Mask))
-    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
+    return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
 
   // Don't break the bitcode reader hack.
   if (isa<ConstantExpr>(Mask)) return nullptr;
@@ -879,6 +879,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
   if (ValTy->isScalable())
     return nullptr;
 
+  unsigned MaskNumElts = MaskEltCount.Min;
   unsigned SrcNumElts = V1->getType()->getVectorNumElements();
 
   // Loop over the shuffle mask, evaluating each element.

diff  --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index e93f4b302902..3886494ebf9b 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1887,6 +1887,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
 
 int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
   assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+  assert(!Mask->getType()->getVectorElementCount().Scalable &&
+    "Length of scalable vectors unknown at compile time");
   if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask))
     return CDS->getElementAsInteger(i);
   Constant *C = Mask->getAggregateElement(i);
@@ -1897,6 +1899,8 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
 
 void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
                                        SmallVectorImpl<int> &Result) {
+  assert(!Mask->getType()->getVectorElementCount().Scalable &&
+    "Length of scalable vectors unknown at compile time");
   unsigned NumElts = Mask->getType()->getVectorNumElements();
 
   if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {

diff  --git a/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
new file mode 100644
index 000000000000..8e90961928c9
--- /dev/null
+++ b/llvm/test/Analysis/ConstantFolding/vscale-getelementptr.ll
@@ -0,0 +1,32 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() {
+; CHECK-NEXT:  ret <4 x i32*> undef
+define <4 x i32*> @fixed_length_version_first() {
+  %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef
+  ret <4 x i32*> %ptr
+}
+
+; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() {
+; CHECK-NEXT:  ret <4 x <4 x i32>*> undef
+define <4 x <4 x i32>*> @fixed_length_version_second() {
+  %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef
+  ret <4 x <4 x i32>*> %ptr
+}
+
+; CHECK-LABEL: define <vscale x 4 x i32*> @vscale_version_first() {
+; CHECK-NEXT:  ret <vscale x 4 x i32*> undef
+define <vscale x 4 x i32*> @vscale_version_first() {
+  %ptr = getelementptr i32, <vscale x 4 x i32*> undef, <vscale x 4 x i64> undef
+  ret <vscale x 4 x i32*> %ptr
+}
+
+; CHECK-LABEL: define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
+; CHECK-NEXT:  ret <vscale x 4 x <vscale x 4 x i32>*> undef
+define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
+  %ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* undef, <vscale x 4 x i64> undef
+  ret <vscale x 4 x <vscale x 4 x i32>*> %ptr
+}

diff  --git a/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll
new file mode 100644
index 000000000000..dc3b66e18f87
--- /dev/null
+++ b/llvm/test/Analysis/ConstantFolding/vscale-shufflevector.ll
@@ -0,0 +1,41 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; This test checks that SimplifyInstruction does not blow up in the face of
+; a scalable shufflevector. vscale is a constant value known only at runtime.
+; Therefore, it is not possible to know the concrete value of, or the length
+; of the mask at compile time. Simplifications that depend on the value
+; of the mask cannot be performed.
+
+; Given the fact that the value of the mask is unknown at compile time for
+; scalable vectors, very few simplifications will be done. Here, we want to
+; see that the instruction can be passed to SimplifyInstruction and not crash
+; the compiler. It happens to be the case that this will be the result.
+
+; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version()
+; CHECK-NEXT: %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+; CHECK-NEXT: %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 8 x i1> %foo
+
+define <vscale x 8 x i1> @vscale_version() {
+  %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+  %foo = shufflevector <vscale x 8 x i1> %splatter,
+                       <vscale x 8 x i1> undef,
+                       <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x i1> %foo
+}
+
+; The non-scalable version should be optimized as normal.
+
+; CHECK-LABEL: define <8 x i1> @fixed_length_version() {
+; CHECK-NEXT:  ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+define <8 x i1> @fixed_length_version() {
+  %splatter = insertelement <8 x i1> undef, i1 true, i32 0
+  %foo = shufflevector <8 x i1> %splatter,
+                       <8 x i1> undef,
+                       <8 x i32> zeroinitializer
+  ret <8 x i1> %foo
+}
+