[llvm] 0c0b0ea - [SPARC] Mark branches as being expensive in early Niagara CPUs (#166489)

Wed Nov 5 13:33:01 PST 2025

Author: Koakuma
Date: 2025-11-06T04:32:56+07:00
New Revision: 0c0b0ea887c8c82881e91b7a6e7ce48ebbe33e61

URL: https://github.com/llvm/llvm-project/commit/0c0b0ea887c8c82881e91b7a6e7ce48ebbe33e61
DIFF: https://github.com/llvm/llvm-project/commit/0c0b0ea887c8c82881e91b7a6e7ce48ebbe33e61.diff

LOG: [SPARC] Mark branches as being expensive in early Niagara CPUs (#166489)

Early Niagara processors (T1-T3) lacks any branch predictor, yet they
also have a pipeline long enough that the delay slot cannot cover for
all of the branch latency.
This means that branch instructions will stall the processor for a
couple cycles, which makes them an expensive operation. Additionally,
the high cost of branching means that it's still profitable to prefer
conditional moves even when the conditional is predictable, so let LLVM
know about both things.

On SPARC T2, a pgbench test seem to show a modest, but pretty consistent
speedup (up to around 3%).

Added: 
    llvm/test/CodeGen/SPARC/predictable-select.ll

Modified: 
    llvm/lib/Target/Sparc/Sparc.td
    llvm/lib/Target/Sparc/SparcISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 7137e5fbff4ff..38b0508885069 100644

--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -95,6 +95,9 @@ def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
                                     "rd %pc, %XX is slow", [FeatureV9]>;
 
+def TuneNoPredictor : SubtargetFeature<"no-predictor", "HasNoPredictor", "true",
+                                    "Processor has no branch predictor, branches stall execution", []>;
+
 //==== Features added predmoninantly for LEON subtarget support
 include "LeonFeatures.td"
 
@@ -174,12 +177,15 @@ def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated, FeatureVIS,
                                FeatureVIS2],
                               [TuneSlowRDPC]>;
 def : Proc<"niagara",         [FeatureV9, FeatureV8Deprecated, FeatureVIS,
-                               FeatureVIS2, FeatureUA2005]>;
+                               FeatureVIS2, FeatureUA2005],
+                              [TuneNoPredictor]>;
 def : Proc<"niagara2",        [FeatureV9, FeatureV8Deprecated, UsePopc,
-                               FeatureVIS, FeatureVIS2, FeatureUA2005]>;
+                               FeatureVIS, FeatureVIS2, FeatureUA2005],
+                              [TuneNoPredictor]>;
 def : Proc<"niagara3",        [FeatureV9, FeatureV8Deprecated, UsePopc,
                                FeatureVIS, FeatureVIS2, FeatureVIS3,
-                               FeatureUA2005, FeatureUA2007]>;
+                               FeatureUA2005, FeatureUA2007],
+                              [TuneNoPredictor]>;
 def : Proc<"niagara4",        [FeatureV9, FeatureV8Deprecated, UsePopc,
                                FeatureVIS, FeatureVIS2, FeatureVIS3,
                                FeatureUA2005, FeatureUA2007, FeatureOSA2011,

diff  --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index cbb7db68f7e7c..ae3c32687c207 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2000,6 +2000,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  // Some processors have no branch predictor and have pipelines longer than
+  // what can be covered by the delay slot. This results in a stall, so mark
+  // branches to be expensive on those processors.
+  setJumpIsExpensive(Subtarget->hasNoPredictor());
+  // The high cost of branching means that using conditional moves will
+  // still be profitable even if the condition is predictable.
+  PredictableSelectIsExpensive = !isJumpExpensive();
+
   setMinFunctionAlignment(Align(4));
 
   computeRegisterProperties(Subtarget->getRegisterInfo());

diff  --git a/llvm/test/CodeGen/SPARC/predictable-select.ll b/llvm/test/CodeGen/SPARC/predictable-select.ll
new file mode 100644
index 0000000000000..cf200a121d0f1
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/predictable-select.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC-NO-PREDICTOR %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC64-NO-PREDICTOR %s
+
+;; Normally, highly predictable selects should be turned into branches.
+;; On the other hand, early Niagara processors should prefer conditional moves
+;; over branches even when it's predictable.
+
+define i32 @cdiv(i32 %cond, i32 %num) #0 {
+; SPARC-LABEL: cdiv:
+; SPARC:       ! %bb.0: ! %entry
+; SPARC-NEXT:    cmp %o0, 0
+; SPARC-NEXT:    be %icc, .LBB0_2
+; SPARC-NEXT:    mov %o1, %o0
+; SPARC-NEXT:  ! %bb.1: ! %select.end
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+; SPARC-NEXT:  .LBB0_2: ! %select.true.sink
+; SPARC-NEXT:    sethi 1398101, %o1
+; SPARC-NEXT:    or %o1, 342, %o1
+; SPARC-NEXT:    smul %o0, %o1, %o0
+; SPARC-NEXT:    rd %y, %o0
+; SPARC-NEXT:    srl %o0, 31, %o1
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    add %o0, %o1, %o0
+;
+; SPARC64-LABEL: cdiv:
+; SPARC64:       ! %bb.0: ! %entry
+; SPARC64-NEXT:    cmp %o0, 0
+; SPARC64-NEXT:    be %icc, .LBB0_2
+; SPARC64-NEXT:    mov %o1, %o0
+; SPARC64-NEXT:  ! %bb.1: ! %select.end
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:  .LBB0_2: ! %select.true.sink
+; SPARC64-NEXT:    sra %o0, 0, %o0
+; SPARC64-NEXT:    sethi 1398101, %o1
+; SPARC64-NEXT:    or %o1, 342, %o1
+; SPARC64-NEXT:    mulx %o0, %o1, %o0
+; SPARC64-NEXT:    srlx %o0, 63, %o1
+; SPARC64-NEXT:    srlx %o0, 32, %o0
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    add %o0, %o1, %o0
+;
+; SPARC-NO-PREDICTOR-LABEL: cdiv:
+; SPARC-NO-PREDICTOR:       ! %bb.0: ! %entry
+; SPARC-NO-PREDICTOR-NEXT:    sethi 1398101, %o2
+; SPARC-NO-PREDICTOR-NEXT:    or %o2, 342, %o2
+; SPARC-NO-PREDICTOR-NEXT:    smul %o1, %o2, %o2
+; SPARC-NO-PREDICTOR-NEXT:    rd %y, %o2
+; SPARC-NO-PREDICTOR-NEXT:    srl %o2, 31, %o3
+; SPARC-NO-PREDICTOR-NEXT:    add %o2, %o3, %o2
+; SPARC-NO-PREDICTOR-NEXT:    cmp %o0, 0
+; SPARC-NO-PREDICTOR-NEXT:    move %icc, %o2, %o1
+; SPARC-NO-PREDICTOR-NEXT:    retl
+; SPARC-NO-PREDICTOR-NEXT:    mov %o1, %o0
+;
+; SPARC64-NO-PREDICTOR-LABEL: cdiv:
+; SPARC64-NO-PREDICTOR:       ! %bb.0: ! %entry
+; SPARC64-NO-PREDICTOR-NEXT:    sra %o1, 0, %o2
+; SPARC64-NO-PREDICTOR-NEXT:    sethi 1398101, %o3
+; SPARC64-NO-PREDICTOR-NEXT:    or %o3, 342, %o3
+; SPARC64-NO-PREDICTOR-NEXT:    mulx %o2, %o3, %o2
+; SPARC64-NO-PREDICTOR-NEXT:    srlx %o2, 63, %o3
+; SPARC64-NO-PREDICTOR-NEXT:    srlx %o2, 32, %o2
+; SPARC64-NO-PREDICTOR-NEXT:    add %o2, %o3, %o2
+; SPARC64-NO-PREDICTOR-NEXT:    cmp %o0, 0
+; SPARC64-NO-PREDICTOR-NEXT:    move %icc, %o2, %o1
+; SPARC64-NO-PREDICTOR-NEXT:    retl
+; SPARC64-NO-PREDICTOR-NEXT:    mov %o1, %o0
+entry:
+  %div = sdiv i32 %num, 3
+  %cmp = icmp eq i32 %cond, 0
+  %ret = select i1 %cmp, i32 %div, i32 %num
+  ret i32 %ret
+}
+
+attributes #0 = { nounwind }