[llvm] 233fb98 - [ARM] Improve bitwise reduction costs
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 4 08:22:57 PDT 2023
Author: David Green
Date: 2023-09-04T16:22:52+01:00
New Revision: 233fb987fcf6a5dc2d12b07cb8e30fffd5471871
URL: https://github.com/llvm/llvm-project/commit/233fb987fcf6a5dc2d12b07cb8e30fffd5471871
DIFF: https://github.com/llvm/llvm-project/commit/233fb987fcf6a5dc2d12b07cb8e30fffd5471871.diff
LOG: [ARM] Improve bitwise reduction costs
This adds some basic and/or/xor reduction costs for NEON/MVE, handling them
like other reductions where vector operations are used to reduce to legal
sizes, followed by an optional VREV+VAND/VORR/VEOR step and scalarization from
there.
Added:
Modified:
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 4df79da6e7e08a..e0d112c4a7eddb 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1707,6 +1707,34 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
getArithmeticInstrCost(Opcode, ValTy->getElementType(), CostKind);
}
+ if ((ISD == ISD::AND || ISD == ISD::OR || ISD == ISD::XOR) &&
+ (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
+ unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
+ unsigned VecLimit =
+ ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
+ InstructionCost VecCost = 0;
+ while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
+ Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts / 2);
+ VecCost += getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ NumElts /= 2;
+ }
+ // For i16/i8, MVE will perform a VREV + VORR/VAND/VEOR for the 64bit vector
+ // step.
+ if (ST->hasMVEIntegerOps() && ValVT.getScalarSizeInBits() <= 16 &&
+ NumElts * EltSize == 64) {
+ Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts);
+ VecCost += ST->getMVEVectorCostFactor(CostKind) +
+ getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ NumElts /= 2;
+ }
+
+ // From here we extract the elements and perform the and/or/xor.
+ InstructionCost ExtractCost = NumElts;
+ return VecCost + ExtractCost +
+ (NumElts - 1) * getArithmeticInstrCost(
+ Opcode, ValTy->getElementType(), CostKind);
+ }
+
if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD ||
TTI::requiresOrderedReduction(FMF))
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
index e0ea2ab10d2390..b38660df59a3e7 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
@@ -4,39 +4,39 @@
define void @and() {
; CHECK-V8-LABEL: 'and'
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEI-LABEL: 'and'
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
@@ -60,39 +60,39 @@ entry:
define void @or() {
; CHECK-V8-LABEL: 'or'
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEI-LABEL: 'or'
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
@@ -116,39 +116,39 @@ entry:
define void @xor() {
; CHECK-V8-LABEL: 'xor'
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEI-LABEL: 'xor'
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
More information about the llvm-commits
mailing list