[llvm] [RISCV][CostModel] Change select cost to 2 (PR #75154)

Tue Dec 12 01:00:29 PST 2023

llvmbot wrote:



@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-transforms

Author: None (ShivaChen)

<details>
<summary>Changes</summary>

Changing select cost to 2 to reflect that it might be expanded to move and branch.
The benefit could be encouraging SLP to generate vector merge to reduce branch.

---
Full diff: https://github.com/llvm/llvm-project/pull/75154.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+5) 
- (added) llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll (+55) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3a2f2f39cd1c9b..0defc69cbd1bc2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1401,6 +1401,11 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
                                      I);
 
+  // Select might be expanded to move and branch.
+  if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT &&
+      !ValTy->isVectorTy())
+    return 2;
+
   if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
     return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
                                      I);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
new file mode 100644
index 00000000000000..e3bced6377c50e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s
+
+ at s1 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
+ at s2 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
+ at s3 = dso_local local_unnamed_addr global [64 x double] zeroinitializer, align 32
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr @s1, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @s2, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp fast ogt <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
+; CHECK-NEXT:    store <4 x double> [[TMP3]], ptr @s3, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp fast ule <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
+; CHECK-NEXT:    store <4 x double> [[TMP5]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load double, ptr @s1, align 8
+  %1 = load double, ptr @s2, align 8
+  %cmp3 = fcmp fast ogt double %0, %1
+  %cond = select fast i1 %cmp3, double -1.000000e+00, double 0.000000e+00
+  store double %cond, ptr @s3, align 8
+  %2 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
+  %3 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
+  %cmp3.1 = fcmp fast ogt double %2, %3
+  %cond.1 = select fast i1 %cmp3.1, double -1.000000e+00, double 0.000000e+00
+  store double %cond.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
+  %4 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
+  %5 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
+  %cmp3.2 = fcmp fast ogt double %4, %5
+  %cond.2 = select fast i1 %cmp3.2, double -1.000000e+00, double 0.000000e+00
+  store double %cond.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
+  %6 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
+  %7 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
+  %cmp3.3 = fcmp fast ogt double %6, %7
+  %cond.3 = select fast i1 %cmp3.3, double -1.000000e+00, double 0.000000e+00
+  store double %cond.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
+  %cmp15 = fcmp fast ule double %0, %1
+  %cond16 = select fast i1 %cmp15, double -1.000000e+00, double 0.000000e+00
+  store double %cond16, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
+  %cmp15.1 = fcmp fast ule double %2, %3
+  %cond16.1 = select fast i1 %cmp15.1, double -1.000000e+00, double 0.000000e+00
+  store double %cond16.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
+  %cmp15.2 = fcmp fast ule double %4, %5
+  %cond16.2 = select fast i1 %cmp15.2, double -1.000000e+00, double 0.000000e+00
+  store double %cond16.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
+  %cmp15.3 = fcmp fast ule double %6, %7
+  %cond16.3 = select fast i1 %cmp15.3, double -1.000000e+00, double 0.000000e+00
+  store double %cond16.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/75154