[PATCH] D153507: [SLP] Use vector types for cmp alt instructions costs
Dave Green via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 22 01:09:20 PDT 2023
dmgreen created this revision.
dmgreen added reviewers: ABataev, RKSimon, SjoerdMeijer, vporpo.
Herald added a subscriber: hiraditya.
Herald added a project: All.
dmgreen requested review of this revision.
Herald added a project: LLVM.
Similar to the other code that costs main/alt instructions, the cmp should be using the VecTy for the costs, not the ScalarTy.
One of the tests look like it gets worse just because it is not simplified to 0.
https://reviews.llvm.org/D153507
Files:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
Index: llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
+++ llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
@@ -4,9 +4,16 @@
define i1 @degenerate() {
; CHECK-LABEL: define i1 @degenerate() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]])
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0
+; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false
+; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false
+; CHECK-NEXT: ret i1 [[OR_COND30]]
;
entry:
%0 = extractelement <4 x fp128> zeroinitializer, i32 0
@@ -25,13 +32,16 @@
; CHECK-LABEL: define i1 @with_inputs
; CHECK-SAME: (<4 x fp128> [[A:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT: ret i1 [[TMP5]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> [[A]], i32 0
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> [[A]], i32 1
+; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false
+; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000
+; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false
+; CHECK-NEXT: ret i1 [[OR_COND30]]
;
entry:
%0 = extractelement <4 x fp128> %a, i32 0
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7889,11 +7889,11 @@
VecCost +=
TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
- Builder.getInt1Ty(),
+ auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CI0->getPredicate(), CostKind, VL0);
VecCost += TTI->getCmpSelInstrCost(
- E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
} else {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153507.533494.patch
Type: text/x-patch
Size: 4093 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230622/b7fcaa5c/attachment.bin>
More information about the llvm-commits
mailing list