[llvm] [SPARC] Mark branches as being expensive in early Niagara CPUs (PR #166489)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 08:10:10 PST 2025
https://github.com/koachan updated https://github.com/llvm/llvm-project/pull/166489
>From bc56d3650d6aa4aa585557532ee5b0c766ee8432 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Fri, 4 Apr 2025 13:38:49 +0700
Subject: [PATCH 1/3] [SPARC] Mark branches as being expensive in early Niagara
CPUs
Early Niagara processors (T1-T3) lacks any branch predictor, yet they also
have a pipeline long enough that the delay slot cannot cover for all of
the branch latency.
This means that branch instructions will stall the processor for a couple
cycles, which makes them an expensive operation. Additionally, the high cost
of branching means that it's still profitable to prefer conditional moves
even when the conditional is predictable, so let LLVM know about both things.
---
llvm/lib/Target/Sparc/Sparc.td | 12 ++++--
llvm/lib/Target/Sparc/SparcISelLowering.cpp | 8 ++++
.../test/CodeGen/SPARC/select-earlyniagara.ll | 43 +++++++++++++++++++
3 files changed, 60 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/SPARC/select-earlyniagara.ll
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 7137e5fbff4ff..38b0508885069 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -95,6 +95,9 @@ def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
"rd %pc, %XX is slow", [FeatureV9]>;
+def TuneNoPredictor : SubtargetFeature<"no-predictor", "HasNoPredictor", "true",
+ "Processor has no branch predictor, branches stall execution", []>;
+
//==== Features added predmoninantly for LEON subtarget support
include "LeonFeatures.td"
@@ -174,12 +177,15 @@ def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
FeatureVIS2],
[TuneSlowRDPC]>;
def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
- FeatureVIS2, FeatureUA2005]>;
+ FeatureVIS2, FeatureUA2005],
+ [TuneNoPredictor]>;
def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc,
- FeatureVIS, FeatureVIS2, FeatureUA2005]>;
+ FeatureVIS, FeatureVIS2, FeatureUA2005],
+ [TuneNoPredictor]>;
def : Proc<"niagara3", [FeatureV9, FeatureV8Deprecated, UsePopc,
FeatureVIS, FeatureVIS2, FeatureVIS3,
- FeatureUA2005, FeatureUA2007]>;
+ FeatureUA2005, FeatureUA2007],
+ [TuneNoPredictor]>;
def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc,
FeatureVIS, FeatureVIS2, FeatureVIS3,
FeatureUA2005, FeatureUA2007, FeatureOSA2011,
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index cbb7db68f7e7c..ae3c32687c207 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2000,6 +2000,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // Some processors have no branch predictor and have pipelines longer than
+ // what can be covered by the delay slot. This results in a stall, so mark
+ // branches to be expensive on those processors.
+ setJumpIsExpensive(Subtarget->hasNoPredictor());
+ // The high cost of branching means that using conditional moves will
+ // still be profitable even if the condition is predictable.
+ PredictableSelectIsExpensive = !isJumpExpensive();
+
setMinFunctionAlignment(Align(4));
computeRegisterProperties(Subtarget->getRegisterInfo());
diff --git a/llvm/test/CodeGen/SPARC/select-earlyniagara.ll b/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
new file mode 100644
index 0000000000000..2cec10455d205
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mcpu=v9 | FileCheck --check-prefix=SPARC64 %s
+
+;; Early Niagara processors should prefer conditional moves over branches
+;; even when it's predictable.
+
+define i32 @cinc(i32 %cond, i32 %num) #0 {
+; SPARC-LABEL: cinc:
+; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: cmp %o0, 0
+; SPARC-NEXT: bne %icc, .LBB0_2
+; SPARC-NEXT: mov %o1, %o0
+; SPARC-NEXT: ! %bb.1: ! %inc
+; SPARC-NEXT: add %o0, 1, %o0
+; SPARC-NEXT: .LBB0_2: ! %cont
+; SPARC-NEXT: retl
+; SPARC-NEXT: nop
+;
+; SPARC64-LABEL: cinc:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: cmp %o0, 0
+; SPARC64-NEXT: bne %icc, .LBB0_2
+; SPARC64-NEXT: mov %o1, %o0
+; SPARC64-NEXT: ! %bb.1: ! %inc
+; SPARC64-NEXT: add %o0, 1, %o0
+; SPARC64-NEXT: .LBB0_2: ! %cont
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+entry:
+ %cmp = icmp eq i32 %cond, 0
+ %exp = call i1 @llvm.expect.i1(i1 %cmp, i1 0)
+ br i1 %exp, label %inc, label %cont
+inc:
+ %add = add nsw i32 %num, 1
+ br label %cont
+cont:
+ %phi = phi i32 [ %add, %inc ], [ %num, %entry ]
+ ret i32 %phi
+}
+declare i1 @llvm.expect.i1(i1, i1)
+
+attributes #0 = { nounwind "tune-cpu"="niagara" }
>From 57f8b6c3b7839fcbc5dcbff7d92200707b412ba8 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 5 Nov 2025 10:08:13 +0700
Subject: [PATCH 2/3] Update tests
---
.../test/CodeGen/SPARC/select-earlyniagara.ll | 28 ++++++-------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/llvm/test/CodeGen/SPARC/select-earlyniagara.ll b/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
index 2cec10455d205..8eccf1b72f754 100644
--- a/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
+++ b/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
@@ -8,35 +8,25 @@
define i32 @cinc(i32 %cond, i32 %num) #0 {
; SPARC-LABEL: cinc:
; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: add %o1, 1, %o2
; SPARC-NEXT: cmp %o0, 0
-; SPARC-NEXT: bne %icc, .LBB0_2
-; SPARC-NEXT: mov %o1, %o0
-; SPARC-NEXT: ! %bb.1: ! %inc
-; SPARC-NEXT: add %o0, 1, %o0
-; SPARC-NEXT: .LBB0_2: ! %cont
+; SPARC-NEXT: move %icc, %o2, %o1
; SPARC-NEXT: retl
-; SPARC-NEXT: nop
+; SPARC-NEXT: mov %o1, %o0
;
; SPARC64-LABEL: cinc:
; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: add %o1, 1, %o2
; SPARC64-NEXT: cmp %o0, 0
-; SPARC64-NEXT: bne %icc, .LBB0_2
-; SPARC64-NEXT: mov %o1, %o0
-; SPARC64-NEXT: ! %bb.1: ! %inc
-; SPARC64-NEXT: add %o0, 1, %o0
-; SPARC64-NEXT: .LBB0_2: ! %cont
+; SPARC64-NEXT: move %icc, %o2, %o1
; SPARC64-NEXT: retl
-; SPARC64-NEXT: nop
+; SPARC64-NEXT: mov %o1, %o0
entry:
+ %add = add nsw i32 %num, 1
%cmp = icmp eq i32 %cond, 0
%exp = call i1 @llvm.expect.i1(i1 %cmp, i1 0)
- br i1 %exp, label %inc, label %cont
-inc:
- %add = add nsw i32 %num, 1
- br label %cont
-cont:
- %phi = phi i32 [ %add, %inc ], [ %num, %entry ]
- ret i32 %phi
+ %ret = select i1 %exp, i32 %add, i32 %num
+ ret i32 %ret
}
declare i1 @llvm.expect.i1(i1, i1)
>From cb3e05c89de5e46b79a6abecac6dd697eaf078b7 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 5 Nov 2025 23:09:33 +0700
Subject: [PATCH 3/3] Update tests
---
.../SPARC/predictable-select-earlyniagara.ll | 42 ++++++++++++++++
.../SPARC/predictable-select-generic.ll | 49 +++++++++++++++++++
.../test/CodeGen/SPARC/select-earlyniagara.ll | 33 -------------
3 files changed, 91 insertions(+), 33 deletions(-)
create mode 100644 llvm/test/CodeGen/SPARC/predictable-select-earlyniagara.ll
create mode 100644 llvm/test/CodeGen/SPARC/predictable-select-generic.ll
delete mode 100644 llvm/test/CodeGen/SPARC/select-earlyniagara.ll
diff --git a/llvm/test/CodeGen/SPARC/predictable-select-earlyniagara.ll b/llvm/test/CodeGen/SPARC/predictable-select-earlyniagara.ll
new file mode 100644
index 0000000000000..23ab3984c671a
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/predictable-select-earlyniagara.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; Early Niagara processors should prefer conditional moves over branches
+;; even when it's predictable.
+
+define i32 @cdiv(i32 %cond, i32 %num) #0 {
+; SPARC-LABEL: cdiv:
+; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: sethi 1398101, %o2
+; SPARC-NEXT: or %o2, 342, %o2
+; SPARC-NEXT: smul %o1, %o2, %o2
+; SPARC-NEXT: rd %y, %o2
+; SPARC-NEXT: srl %o2, 31, %o3
+; SPARC-NEXT: add %o2, %o3, %o2
+; SPARC-NEXT: cmp %o0, 0
+; SPARC-NEXT: move %icc, %o2, %o1
+; SPARC-NEXT: retl
+; SPARC-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: cdiv:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: sra %o1, 0, %o2
+; SPARC64-NEXT: sethi 1398101, %o3
+; SPARC64-NEXT: or %o3, 342, %o3
+; SPARC64-NEXT: mulx %o2, %o3, %o2
+; SPARC64-NEXT: srlx %o2, 63, %o3
+; SPARC64-NEXT: srlx %o2, 32, %o2
+; SPARC64-NEXT: add %o2, %o3, %o2
+; SPARC64-NEXT: cmp %o0, 0
+; SPARC64-NEXT: move %icc, %o2, %o1
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+entry:
+ %div = sdiv i32 %num, 3
+ %cmp = icmp eq i32 %cond, 0
+ %ret = select i1 %cmp, i32 %div, i32 %num
+ ret i32 %ret
+}
+
+attributes #0 = { nounwind "tune-cpu"="niagara" }
diff --git a/llvm/test/CodeGen/SPARC/predictable-select-generic.ll b/llvm/test/CodeGen/SPARC/predictable-select-generic.ll
new file mode 100644
index 0000000000000..dc2623a60e465
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/predictable-select-generic.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
+; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; Normally, highly predictable selects should be turned into branches.
+
+define i32 @cdiv(i32 %cond, i32 %num) #0 {
+; SPARC-LABEL: cdiv:
+; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: cmp %o0, 0
+; SPARC-NEXT: be %icc, .LBB0_2
+; SPARC-NEXT: mov %o1, %o0
+; SPARC-NEXT: ! %bb.1: ! %select.end
+; SPARC-NEXT: retl
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_2: ! %select.true.sink
+; SPARC-NEXT: sethi 1398101, %o1
+; SPARC-NEXT: or %o1, 342, %o1
+; SPARC-NEXT: smul %o0, %o1, %o0
+; SPARC-NEXT: rd %y, %o0
+; SPARC-NEXT: srl %o0, 31, %o1
+; SPARC-NEXT: retl
+; SPARC-NEXT: add %o0, %o1, %o0
+;
+; SPARC64-LABEL: cdiv:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: cmp %o0, 0
+; SPARC64-NEXT: be %icc, .LBB0_2
+; SPARC64-NEXT: mov %o1, %o0
+; SPARC64-NEXT: ! %bb.1: ! %select.end
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: .LBB0_2: ! %select.true.sink
+; SPARC64-NEXT: sra %o0, 0, %o0
+; SPARC64-NEXT: sethi 1398101, %o1
+; SPARC64-NEXT: or %o1, 342, %o1
+; SPARC64-NEXT: mulx %o0, %o1, %o0
+; SPARC64-NEXT: srlx %o0, 63, %o1
+; SPARC64-NEXT: srlx %o0, 32, %o0
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: add %o0, %o1, %o0
+entry:
+ %div = sdiv i32 %num, 3
+ %cmp = icmp eq i32 %cond, 0
+ %ret = select i1 %cmp, i32 %div, i32 %num
+ ret i32 %ret
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/SPARC/select-earlyniagara.ll b/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
deleted file mode 100644
index 8eccf1b72f754..0000000000000
--- a/llvm/test/CodeGen/SPARC/select-earlyniagara.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
-; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mcpu=v9 | FileCheck --check-prefix=SPARC64 %s
-
-;; Early Niagara processors should prefer conditional moves over branches
-;; even when it's predictable.
-
-define i32 @cinc(i32 %cond, i32 %num) #0 {
-; SPARC-LABEL: cinc:
-; SPARC: ! %bb.0: ! %entry
-; SPARC-NEXT: add %o1, 1, %o2
-; SPARC-NEXT: cmp %o0, 0
-; SPARC-NEXT: move %icc, %o2, %o1
-; SPARC-NEXT: retl
-; SPARC-NEXT: mov %o1, %o0
-;
-; SPARC64-LABEL: cinc:
-; SPARC64: ! %bb.0: ! %entry
-; SPARC64-NEXT: add %o1, 1, %o2
-; SPARC64-NEXT: cmp %o0, 0
-; SPARC64-NEXT: move %icc, %o2, %o1
-; SPARC64-NEXT: retl
-; SPARC64-NEXT: mov %o1, %o0
-entry:
- %add = add nsw i32 %num, 1
- %cmp = icmp eq i32 %cond, 0
- %exp = call i1 @llvm.expect.i1(i1 %cmp, i1 0)
- %ret = select i1 %exp, i32 %add, i32 %num
- ret i32 %ret
-}
-declare i1 @llvm.expect.i1(i1, i1)
-
-attributes #0 = { nounwind "tune-cpu"="niagara" }
More information about the llvm-commits
mailing list