[llvm] 8cf0290 - [SLP]Fix cost estimation for buildvectors with extracts and/or constants.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 19 05:57:28 PDT 2023
Author: Alexey Bataev
Date: 2023-04-19T05:54:58-07:00
New Revision: 8cf0290c4a47aa184b7b28032ec7a0f7311ff7c5
URL: https://github.com/llvm/llvm-project/commit/8cf0290c4a47aa184b7b28032ec7a0f7311ff7c5
DIFF: https://github.com/llvm/llvm-project/commit/8cf0290c4a47aa184b7b28032ec7a0f7311ff7c5.diff
LOG: [SLP]Fix cost estimation for buildvectors with extracts and/or constants.
If the partial matching is found and some other scalars must be
inserted, need to account the cost of the extractelements, transformed
to shuffles, and/or reused entries and calculate the cost of inserting
constants properly into the non-poison vectors.
Also, fixed the cost calculation for final gather/buildvector sequence.
Differential Revision: https://reviews.llvm.org/D148362
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0fd33868375e2..85101d44813ce 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2272,7 +2272,9 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
- return getVectorInstrCostHelper(nullptr, Val, Index, false /* HasRealUse */);
+ bool HasRealUse =
+ Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
+ return getVectorInstrCostHelper(nullptr, Val, Index, HasRealUse);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 685f597eb556c..abe25317fca55 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2459,14 +2459,6 @@ class BoUpSLP {
/// for ease of later optimization.
Value *createBuildVector(const TreeEntry *E);
- /// \returns the scalarization cost for this type. Scalarization in this
- /// context means the creation of vectors from a group of scalars. If \p
- /// NeedToShuffle is true, need to add a cost of reshuffling some of the
- /// vector elements.
- InstructionCost getGatherCost(FixedVectorType *Ty,
- const APInt &ShuffledIndices,
- bool NeedToShuffle) const;
-
/// Returns the instruction in the bundle, which can be used as a base point
/// for scheduling. Usually it is the last instruction in the bundle, except
/// for the case when all operands are external (in this case, it is the first
@@ -2488,7 +2480,8 @@ class BoUpSLP {
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
- InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
+ /// \param ForPoisonSrc true if initial vector is poison, false otherwise.
+ InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -6922,9 +6915,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
/*SubTp=*/nullptr, /*Args=*/*It)
: TTI::TCC_Free);
}
- return GatherCost + (all_of(Gathers, UndefValue::classof)
- ? TTI::TCC_Free
- : R.getGatherCost(Gathers));
+ return GatherCost +
+ (all_of(Gathers, UndefValue::classof)
+ ? TTI::TCC_Free
+ : R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
};
public:
@@ -7176,23 +7170,22 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
GatheredScalars.front()->getType(), GatheredScalars.size())));
return Estimator.finalize(E->ReuseShuffleIndices);
}
- if (ExtractShuffle && all_of(GatheredScalars, PoisonValue::classof)) {
+ InstructionCost Cost = 0;
+ if (ExtractShuffle) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
// single input vector or of 2 input vectors.
- InstructionCost Cost =
- computeExtractCost(VL, VecTy, *ExtractShuffle, ExtractMask, *TTI);
- return Cost + Estimator.finalize(E->ReuseShuffleIndices);
+ Cost += computeExtractCost(VL, VecTy, *ExtractShuffle, ExtractMask, *TTI);
}
Estimator.gather(
GatheredScalars,
- (ExtractShuffle || GatherShuffle)
- ? Constant::getNullValue(FixedVectorType::get(
- GatheredScalars.front()->getType(), GatheredScalars.size()))
- : nullptr);
- return Estimator.finalize(E->ReuseShuffleIndices);
+ VL.equals(GatheredScalars)
+ ? nullptr
+ : Constant::getNullValue(FixedVectorType::get(
+ GatheredScalars.front()->getType(), GatheredScalars.size())));
+ return Cost + Estimator.finalize(E->ReuseShuffleIndices);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
@@ -8791,19 +8784,8 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
return std::nullopt;
}
-InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty,
- const APInt &ShuffledIndices,
- bool NeedToShuffle) const {
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost Cost =
- TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true,
- /*Extract*/ false, CostKind);
- if (NeedToShuffle)
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
- return Cost;
-}
-
-InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
+ bool ForPoisonSrc) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -8815,20 +8797,36 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// shuffle candidates.
APInt ShuffledElements = APInt::getZero(VL.size());
DenseSet<Value *> UniqueElements;
- // Iterate in reverse order to consider insert elements with the high cost.
- for (unsigned I = VL.size(); I > 0; --I) {
- unsigned Idx = I - 1;
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost Cost;
+ auto EstimateInsertCost = [&](unsigned I, Value *V) {
+ if (!ForPoisonSrc)
+ Cost +=
+ TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
+ I, Constant::getNullValue(VecTy), V);
+ };
+ for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ Value *V = VL[I];
// No need to shuffle duplicates for constants.
- if (isConstant(VL[Idx])) {
- ShuffledElements.setBit(Idx);
+ if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
+ ShuffledElements.setBit(I);
continue;
}
- if (!UniqueElements.insert(VL[Idx]).second) {
+ if (!UniqueElements.insert(V).second) {
DuplicateNonConst = true;
- ShuffledElements.setBit(Idx);
+ ShuffledElements.setBit(I);
+ continue;
}
+ EstimateInsertCost(I, V);
}
- return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
+ if (ForPoisonSrc)
+ Cost =
+ TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
+ /*Extract*/ false, CostKind);
+ if (DuplicateNonConst)
+ Cost +=
+ TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ return Cost;
}
// Perform operand reordering on the instructions in VL and return the reordered
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
index 455693e27b349..61aa9110e123c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
@@ -3,21 +3,27 @@
define void @test(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
; CHECK-LABEL: @test(
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2:%.*]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP1:%.*]], <2 x i64> [[TMP0:%.*]], <4 x i32> <i32 0, i32 2, i32 undef, i32 2>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP4]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 undef, i32 undef, i32 1, i32 undef>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP6]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
-; CHECK-NEXT: br label [[TMP11:%.*]]
-; CHECK: 11:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[TMP11]] ], [ [[TMP10]], [[TMP3:%.*]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: br label [[TMP11]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1:%.*]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP2:%.*]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
+; CHECK-NEXT: br label [[TMP17:%.*]]
+; CHECK: 17:
+; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[TMP22:%.*]], [[TMP17]] ], [ [[TMP6]], [[TMP3:%.*]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP9]], [[TMP3]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP13]], [[TMP3]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP16]], [[TMP3]] ]
+; CHECK-NEXT: [[TMP22]] = or i32 [[TMP18]], 0
+; CHECK-NEXT: br label [[TMP17]]
;
%4 = extractelement <2 x i64> %1, i64 0
%5 = or i64 %4, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
index 2704a4439f0d5..169ebc6c1c651 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
@@ -6,21 +6,18 @@ define i64 @fshl(i64 %or1, i64 %or2, i64 %or3 ) {
; CHECK-LABEL: define i64 @fshl
; CHECK-SAME: (i64 [[OR1:%.*]], i64 [[OR2:%.*]], i64 [[OR3:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[OR2]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[OR3]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>)
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[OR1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> <i64 17, i64 21>)
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> <i64 0, i64 poison>, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP2]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], [[TMP3]]
-; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
-; CHECK-NEXT: [[ADD3:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
-; CHECK-NEXT: [[XOR5:%.*]] = xor i64 [[ADD3]], [[TMP12]]
+; CHECK-NEXT: [[OR4:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[OR2]], i64 0, i64 1)
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[OR4]], 0
+; CHECK-NEXT: [[OR5:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[OR3]], i64 0, i64 2)
+; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[OR5]], [[OR1]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[XOR1]], [[OR1]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i64 0, [[XOR2]]
+; CHECK-NEXT: [[OR6:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[OR1]], i64 [[OR2]], i64 17)
+; CHECK-NEXT: [[XOR3:%.*]] = xor i64 [[OR6]], [[ADD1]]
+; CHECK-NEXT: [[OR7:%.*]] = tail call i64 @llvm.fshl.i64(i64 0, i64 0, i64 21)
+; CHECK-NEXT: [[XOR4:%.*]] = xor i64 [[OR7]], [[ADD2]]
+; CHECK-NEXT: [[ADD3:%.*]] = or i64 [[XOR3]], [[ADD2]]
+; CHECK-NEXT: [[XOR5:%.*]] = xor i64 [[ADD3]], [[XOR4]]
; CHECK-NEXT: ret i64 [[XOR5]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index f26d14c44db87..97fb5686c8710 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -95,33 +95,30 @@ define i1 @logical_or_fcmp(<4 x float> %x) {
define i1 @logical_and_icmp_
diff _preds(<4 x i32> %x) {
; SSE-LABEL: @logical_and_icmp_
diff _preds(
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
+; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; SSE-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[TMP2]], [[TMP3]]
-; SSE-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
-; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP5]], i1 false
-; SSE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP6]], i1 false
-; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
+; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 3, i32 1>
+; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
+; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false
+; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
+; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false
; SSE-NEXT: ret i1 [[S3]]
;
; AVX-LABEL: @logical_and_icmp_
diff _preds(
-; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 3, i32 1, i32 7>
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 2>
-; AVX-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
-; AVX-NEXT: [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
-; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
-; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP8]], i1 false
-; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
-; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP9]], i1 false
+; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
+; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
+; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
+; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
+; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0
+; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
+; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
+; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
+; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
+; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
; AVX-NEXT: ret i1 [[S3]]
;
%x0 = extractelement <4 x i32> %x, i32 0
More information about the llvm-commits
mailing list