[llvm] 9c7aab3 - [SLP] Use vector types for cmp alt instructions costs
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 28 13:02:34 PDT 2023
Author: David Green
Date: 2023-06-28T21:02:29+01:00
New Revision: 9c7aab362a4579d5ce26f4af2564b04720ec23a2
URL: https://github.com/llvm/llvm-project/commit/9c7aab362a4579d5ce26f4af2564b04720ec23a2
DIFF: https://github.com/llvm/llvm-project/commit/9c7aab362a4579d5ce26f4af2564b04720ec23a2.diff
LOG: [SLP] Use vector types for cmp alt instructions costs
Similar to the other code that costs main/alt instructions, the cmp should be
using the VecTy for the costs, not the ScalarTy.
One of the tests look like it gets worse just because it is not simplified to
0.
Differential Revision: https://reviews.llvm.org/D153507
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 20a2d4bd38ea2..efdbc66aef029 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7889,11 +7889,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
VecCost +=
TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
- Builder.getInt1Ty(),
+ auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CI0->getPredicate(), CostKind, VL0);
VecCost += TTI->getCmpSelInstrCost(
- E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
} else {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
index ac4219e4c7f3f..c99dd53117e5f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
@@ -4,9 +4,16 @@
define i1 @degenerate() {
; CHECK-LABEL: define i1 @degenerate() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]])
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0
+; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false
+; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false
+; CHECK-NEXT: ret i1 [[OR_COND30]]
;
entry:
%0 = extractelement <4 x fp128> zeroinitializer, i32 0
@@ -25,13 +32,16 @@ define i1 @with_inputs(<4 x fp128> %a) {
; CHECK-LABEL: define i1 @with_inputs
; CHECK-SAME: (<4 x fp128> [[A:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT: ret i1 [[TMP5]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> [[A]], i32 0
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> [[A]], i32 1
+; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false
+; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false
+; CHECK-NEXT: ret i1 [[OR_COND30]]
;
entry:
%0 = extractelement <4 x fp128> %a, i32 0
More information about the llvm-commits
mailing list