[llvm-branch-commits] [llvm] 52a3267 - [InstCombine] Remove scalable vector restriction in foldVectorBinop

Tue Dec 15 05:19:27 PST 2020

Author: Jun Ma
Date: 2020-12-15T21:14:59+08:00
New Revision: 52a3267ffafc27d2dbe3d419256f18a4a9d8c681

URL: https://github.com/llvm/llvm-project/commit/52a3267ffafc27d2dbe3d419256f18a4a9d8c681
DIFF: https://github.com/llvm/llvm-project/commit/52a3267ffafc27d2dbe3d419256f18a4a9d8c681.diff

LOG: [InstCombine] Remove scalable vector restriction in foldVectorBinop

Differential Revision: https://reviews.llvm.org/D93289

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/fold-bin-operand.ll
    llvm/test/Transforms/InstCombine/vec-binop-select.ll
    llvm/test/Transforms/InstCombine/vec_shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index bbc76325a67b..9306e99f5d52 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -959,8 +959,7 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
       return nullptr;
 
     // If vectors, verify that they have the same number of elements.
-    if (SrcTy && cast<FixedVectorType>(SrcTy)->getNumElements() !=
-                     cast<FixedVectorType>(DestTy)->getNumElements())
+    if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
       return nullptr;
   }
 
@@ -1515,8 +1514,7 @@ Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
 }
 
 Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
-  // FIXME: some of this is likely fine for scalable vectors
-  if (!isa<FixedVectorType>(Inst.getType()))
+  if (!isa<VectorType>(Inst.getType()))
     return nullptr;
 
   BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
@@ -1605,13 +1603,16 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   // intends to move shuffles closer to other shuffles and binops closer to
   // other binops, so they can be folded. It may also enable demanded elements
   // transforms.
-  unsigned NumElts = cast<FixedVectorType>(Inst.getType())->getNumElements();
   Constant *C;
-  if (match(&Inst,
+  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
+  if (InstVTy &&
+      match(&Inst,
             m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
-                      m_Constant(C))) && !isa<ConstantExpr>(C) &&
-      cast<FixedVectorType>(V1->getType())->getNumElements() <= NumElts) {
-    assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
+                      m_Constant(C))) &&
+      !isa<ConstantExpr>(C) &&
+      cast<FixedVectorType>(V1->getType())->getNumElements() <=
+          InstVTy->getNumElements()) {
+    assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
            "Shuffle should not change scalar type");
 
     // Find constant NewC that has property:
@@ -1626,6 +1627,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
     UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
     SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
     bool MayChange = true;
+    unsigned NumElts = InstVTy->getNumElements();
     for (unsigned I = 0; I < NumElts; ++I) {
       Constant *CElt = C->getAggregateElement(I);
       if (ShMask[I] >= 0) {
@@ -2379,9 +2381,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
     };
     if (GEP.getNumOperands() == 3 &&
-        ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() &&
+        ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
           areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
-         (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
+         (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
           areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
 
       // Create a new GEP here, as using `setOperand()` followed by

diff  --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index d3303262be3f..fc0c13a5f1a7 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -1,17 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i1 @f(i1 %x) {
 ; CHECK-LABEL: @f(
-; CHECK: ret i1 false
-	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
-	ret i1 %b
+; CHECK-NEXT:    ret i1 false
+;
+  %b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+  ret i1 %b
 }
 
 define i32 @g(i32 %x) {
 ; CHECK-LABEL: @g(
-; CHECK: ret i32 %x
-	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
-	ret i32 %b
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
+  ret i32 %b
+}
+
+define i32 @h(i1 %A, i32 %B) {
+; CHECK-LABEL: @h(
+; CHECK-NEXT:  EntryBlock:
+; CHECK-NEXT:    [[B_OP:%.*]] = add i32 [[B:%.*]], 2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[A:%.*]], i32 3, i32 [[B_OP]]
+; CHECK-NEXT:    ret i32 [[OP]]
+;
+EntryBlock:
+  %cf = select i1 %A, i32 1, i32 %B
+  %op = add i32 2, %cf
+  ret i32 %op
+}
+
+define <4 x float> @h1(i1 %A, <4 x i32> %B) {
+; CHECK-LABEL: @h1(
+; CHECK-NEXT:  EntryBlock:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> <float 0x36A0000000000000, float 0x36A0000000000000, float 0x36A0000000000000, float 0x36A0000000000000>, <4 x float> [[TMP0]]
+; CHECK-NEXT:    ret <4 x float> [[BC]]
+;
+EntryBlock:
+  %cf = select i1 %A, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %B
+  %bc = bitcast <4 x i32> %cf to <4 x float>
+  ret <4 x float> %bc
+}
+
+define <vscale x 4 x float> @h2(i1 %A, <vscale x 4 x i32> %B) {
+; CHECK-LABEL: @h2(
+; CHECK-NEXT:  EntryBlock:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[B:%.*]] to <vscale x 4 x float>
+; CHECK-NEXT:    [[BC:%.*]] = select i1 [[A:%.*]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP0]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[BC]]
+;
+EntryBlock:
+  %cf = select i1 %A, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %B
+  %bc = bitcast <vscale x 4 x i32> %cf to <vscale x 4 x float>
+  ret <vscale x 4 x float> %bc
+}
+
+define <vscale x 2 x i64> @h3(i1 %A, <vscale x 4 x i32> %B) {
+; CHECK-LABEL: @h3(
+; CHECK-NEXT:  EntryBlock:
+; CHECK-NEXT:    [[CF:%.*]] = select i1 [[A:%.*]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[B:%.*]]
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <vscale x 4 x i32> [[CF]] to <vscale x 2 x i64>
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[BC]]
+;
+EntryBlock:
+  %cf = select i1 %A, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %B
+  %bc = bitcast <vscale x 4 x i32> %cf to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %bc
+
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/vec-binop-select.ll b/llvm/test/Transforms/InstCombine/vec-binop-select.ll
index abf4729e3695..415dc5deff16 100644
--- a/llvm/test/Transforms/InstCombine/vec-binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll
@@ -14,6 +14,18 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
+define <vscale x 4 x i32> @vscaleand(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: @vscaleand(
+; CHECK-NEXT:    [[R:%.*]] = and <vscale x 4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <vscale x 4 x i32> [[R]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[S]]
+;
+  %sel1 = shufflevector <vscale x 4 x i32> %x, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %sel2 = shufflevector <vscale x 4 x i32> %y, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %r = and <vscale x 4 x i32> %sel1, %sel2
+  ret <vscale x 4 x i32> %r
+}
+
 define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @or(
 ; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 3f3431c5d904..4f712ca7b3fb 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1468,6 +1468,20 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
+define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: @vsplat_assoc_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 317426, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+
+  %splatx = shufflevector <vscale x 4 x i32> %x, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %a = add <vscale x 4 x i32> %y, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 317426, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
+  %r = add <vscale x 4 x i32> %splatx, %a
+  ret <vscale x 4 x i32> %r
+}
+
 ; Undefs in splat mask are replaced with defined splat index
 
 define <4 x i32> @splat_assoc_add_undef_mask_elts(<4 x i32> %x, <4 x i32> %y) {