[llvm] [RISCV][CostModel] Change select cost to 2 (PR #75154)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 18:04:31 PST 2023
https://github.com/ShivaChen updated https://github.com/llvm/llvm-project/pull/75154
>From aae3eef4cbb32b418e06fcabc4176b4dabaebb31 Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Tue, 12 Dec 2023 06:56:53 +0000
Subject: [PATCH 1/3] [RISCV] Precommit test for changing select cost to 2
---
.../LoopVectorize/RISCV/slp-select-cost-2.ll | 79 +++++++++++++++++++
1 file changed, 79 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
new file mode 100644
index 00000000000000..30de6988a41073
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s
+
+ at s1 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
+ at s2 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
+ at s3 = dso_local local_unnamed_addr global [64 x double] zeroinitializer, align 32
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @s1, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @s2, align 8
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ogt double [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND:%.*]] = select fast i1 [[CMP3]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND]], ptr @s3, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
+; CHECK-NEXT: [[CMP3_1:%.*]] = fcmp fast ogt double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[COND_1:%.*]] = select fast i1 [[CMP3_1]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
+; CHECK-NEXT: [[CMP3_2:%.*]] = fcmp fast ogt double [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[COND_2:%.*]] = select fast i1 [[CMP3_2]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
+; CHECK-NEXT: [[CMP3_3:%.*]] = fcmp fast ogt double [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[COND_3:%.*]] = select fast i1 [[CMP3_3]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
+; CHECK-NEXT: [[CMP15:%.*]] = fcmp fast ule double [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[COND16:%.*]] = select fast i1 [[CMP15]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND16]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
+; CHECK-NEXT: [[CMP15_1:%.*]] = fcmp fast ule double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[COND16_1:%.*]] = select fast i1 [[CMP15_1]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND16_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
+; CHECK-NEXT: [[CMP15_2:%.*]] = fcmp fast ule double [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[COND16_2:%.*]] = select fast i1 [[CMP15_2]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND16_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
+; CHECK-NEXT: [[CMP15_3:%.*]] = fcmp fast ule double [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[COND16_3:%.*]] = select fast i1 [[CMP15_3]], double -1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[COND16_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load double, ptr @s1, align 8
+ %1 = load double, ptr @s2, align 8
+ %cmp3 = fcmp fast ogt double %0, %1
+ %cond = select fast i1 %cmp3, double -1.000000e+00, double 0.000000e+00
+ store double %cond, ptr @s3, align 8
+ %2 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
+ %3 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
+ %cmp3.1 = fcmp fast ogt double %2, %3
+ %cond.1 = select fast i1 %cmp3.1, double -1.000000e+00, double 0.000000e+00
+ store double %cond.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
+ %4 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
+ %5 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
+ %cmp3.2 = fcmp fast ogt double %4, %5
+ %cond.2 = select fast i1 %cmp3.2, double -1.000000e+00, double 0.000000e+00
+ store double %cond.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
+ %6 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
+ %7 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
+ %cmp3.3 = fcmp fast ogt double %6, %7
+ %cond.3 = select fast i1 %cmp3.3, double -1.000000e+00, double 0.000000e+00
+ store double %cond.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
+ %cmp15 = fcmp fast ule double %0, %1
+ %cond16 = select fast i1 %cmp15, double -1.000000e+00, double 0.000000e+00
+ store double %cond16, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
+ %cmp15.1 = fcmp fast ule double %2, %3
+ %cond16.1 = select fast i1 %cmp15.1, double -1.000000e+00, double 0.000000e+00
+ store double %cond16.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
+ %cmp15.2 = fcmp fast ule double %4, %5
+ %cond16.2 = select fast i1 %cmp15.2, double -1.000000e+00, double 0.000000e+00
+ store double %cond16.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
+ %cmp15.3 = fcmp fast ule double %6, %7
+ %cond16.3 = select fast i1 %cmp15.3, double -1.000000e+00, double 0.000000e+00
+ store double %cond16.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
+ ret void
+}
>From a2ca69b527bb14cdc87cb7113973c8bd2dbce305 Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Tue, 12 Dec 2023 07:46:51 +0000
Subject: [PATCH 2/3] [RISCV][CostModel] Change select cost to 2
Changing select cost to 2 to reflect that it might be expanded to move and
branch. The benefit could be encouraging SLP to generate vector merge to
reduce branch.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +++
.../LoopVectorize/RISCV/slp-select-cost-2.ll | 40 ++++---------------
2 files changed, 13 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3a2f2f39cd1c9b..0defc69cbd1bc2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1401,6 +1401,11 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
+ // Select might be expanded to move and branch.
+ if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT &&
+ !ValTy->isVectorTy())
+ return 2;
+
if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
index 30de6988a41073..e3bced6377c50e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
@@ -8,38 +8,14 @@
define void @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @s1, align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @s2, align 8
-; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ogt double [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[COND:%.*]] = select fast i1 [[CMP3]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND]], ptr @s3, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
-; CHECK-NEXT: [[CMP3_1:%.*]] = fcmp fast ogt double [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[COND_1:%.*]] = select fast i1 [[CMP3_1]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
-; CHECK-NEXT: [[CMP3_2:%.*]] = fcmp fast ogt double [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[COND_2:%.*]] = select fast i1 [[CMP3_2]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
-; CHECK-NEXT: [[CMP3_3:%.*]] = fcmp fast ogt double [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[COND_3:%.*]] = select fast i1 [[CMP3_3]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
-; CHECK-NEXT: [[CMP15:%.*]] = fcmp fast ule double [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[COND16:%.*]] = select fast i1 [[CMP15]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND16]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
-; CHECK-NEXT: [[CMP15_1:%.*]] = fcmp fast ule double [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[COND16_1:%.*]] = select fast i1 [[CMP15_1]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND16_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
-; CHECK-NEXT: [[CMP15_2:%.*]] = fcmp fast ule double [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[COND16_2:%.*]] = select fast i1 [[CMP15_2]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND16_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
-; CHECK-NEXT: [[CMP15_3:%.*]] = fcmp fast ule double [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[COND16_3:%.*]] = select fast i1 [[CMP15_3]], double -1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: store double [[COND16_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @s1, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @s2, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast ogt <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr @s3, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ule <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
+; CHECK-NEXT: store <4 x double> [[TMP5]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
; CHECK-NEXT: ret void
;
entry:
>From 02559e2782fb7c65a733ae3a996c9baafb5182bb Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Wed, 13 Dec 2023 02:00:01 +0000
Subject: [PATCH 3/3] Increase cost to 2 only when without Zicond
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0defc69cbd1bc2..a6cec7598bddd0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1403,7 +1403,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// Select might be expanded to move and branch.
if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT &&
- !ValTy->isVectorTy())
+ !ValTy->isVectorTy() && !ST->hasStdExtZicond() &&
+ !ST->hasVendorXVentanaCondOps())
return 2;
if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
More information about the llvm-commits
mailing list