[llvm] [TargetLowering][ExpandABD] Prefer selects over usubo if we do the same for ucmp (PR #159889)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 11:48:16 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/159889
>From 381bab5e04221655376e78eb648139b1ca68e1f7 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Fri, 19 Sep 2025 21:12:39 -0400
Subject: [PATCH 1/2] [TargetLowering] Prefer selects over usubo
Same deal we use for determining ucmp vs scmp.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 11 ++--
llvm/test/CodeGen/AArch64/abdu-neg.ll | 24 ++++----
llvm/test/CodeGen/AArch64/abdu.ll | 60 ++++++++-----------
3 files changed, 41 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 80500e48351e4..05ebd59f4a8a9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9775,11 +9775,12 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
}
- // Similar to the branchless expansion, use the (sign-extended) usubo overflow
- // flag if the (scalar) type is illegal as this is more likely to legalize
- // cleanly:
- // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
- if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
+ // Similar to the branchless expansion, if we don't prefer selects, use the
+ // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
+ // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
+ // rhs), uof(lhs, rhs)), uof(lhs, rhs))
+ if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
+ !shouldExpandCmpUsingSelects(VT)) {
SDValue USubO =
DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 79fc12ea76f63..269cbf03f32a0 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -180,13 +180,11 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x8, x8, x10
-; CHECK-NEXT: sbc x9, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: csel x9, x9, x11, lo
; CHECK-NEXT: negs x0, x8
; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
@@ -203,13 +201,11 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x8, x8, x10
-; CHECK-NEXT: sbc x9, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: csel x9, x9, x11, lo
; CHECK-NEXT: negs x0, x8
; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 6db7693fb3a1c..3cbe648788a84 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -169,13 +169,11 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lo
+; CHECK-NEXT: csel x1, x9, x11, lo
; CHECK-NEXT: ret
%aext = zext i128 %a to i256
%bext = zext i128 %b to i256
@@ -189,13 +187,11 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lo
+; CHECK-NEXT: csel x1, x9, x11, lo
; CHECK-NEXT: ret
%aext = zext i128 %a to i256
%bext = zext i128 %b to i256
@@ -263,13 +259,11 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_minmax_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lo
+; CHECK-NEXT: csel x1, x9, x11, lo
; CHECK-NEXT: ret
%min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
%max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
@@ -339,13 +333,11 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_cmp_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lo
+; CHECK-NEXT: csel x1, x9, x11, lo
; CHECK-NEXT: ret
%cmp = icmp uge i128 %a, %b
%ab = sub i128 %a, %b
@@ -437,13 +429,11 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbcs x11, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lo
+; CHECK-NEXT: csel x1, x9, x11, lo
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
>From 913a966d550ffb9547b55fbb0abe12eb887a8234 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 20 Sep 2025 14:48:05 -0400
Subject: [PATCH 2/2] Rename shouldExpandCmpUsingSelects to
preferSelectsOverBooleanArithmetic
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 +-
llvm/include/llvm/CodeGen/TargetLowering.h | 5 ++---
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++--
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 2 +-
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 +-
9 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index dce423fc1b18b..42ddb32d24093 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2929,7 +2929,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CostKind);
EVT VT = TLI->getValueType(DL, CmpTy, true);
- if (TLI->shouldExpandCmpUsingSelects(VT)) {
+ if (TLI->preferSelectsOverBooleanArithmetic(VT)) {
// x < y ? -1 : (x > y ? 1 : 0)
Cost += 2 * thisT()->getCmpSelInstrCost(
BinaryOperator::Select, RetTy, CondTy,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4c2d991308d30..d8975d0f9f453 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3505,9 +3505,8 @@ class LLVM_ABI TargetLoweringBase {
return isOperationLegalOrCustom(Op, VT);
}
- /// Should we expand [US]CMP nodes using two selects and two compares, or by
- /// doing arithmetic on boolean types
- virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; }
+ /// Should we prefer selects to doing arithmetic on boolean types
+ virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const { return false; }
/// True if target has some particular form of dealing with pointer arithmetic
/// semantics for pointers with the given value type. False if pointer
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f3e036ed1b947..961d2e40a3462 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8598,7 +8598,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
- if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
+ if (TLI.preferSelectsOverBooleanArithmetic(getApproximateEVTForLLT(SrcTy, Ctx)) ||
BC == TargetLowering::UndefinedBooleanContent) {
auto One = MIRBuilder.buildConstant(DstTy, 1);
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 05ebd59f4a8a9..2e271684c5829 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9780,7 +9780,7 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
// is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
// rhs), uof(lhs, rhs)), uof(lhs, rhs))
if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
- !shouldExpandCmpUsingSelects(VT)) {
+ !preferSelectsOverBooleanArithmetic(VT)) {
SDValue USubO =
DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
@@ -10975,7 +10975,7 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
// because one of the conditions can be merged with one of the selects.
// And finally, if we don't know the contents of high bits of a boolean value
// we can't perform any arithmetic either.
- if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
+ if (preferSelectsOverBooleanArithmetic(VT) || BoolVT.getScalarSizeInBits() == 1 ||
getBooleanContents(BoolVT) == UndefinedBooleanContent) {
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd7f0e719ad0c..bf9a6e4c8c40b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29325,7 +29325,7 @@ bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
}
-bool AArch64TargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+bool AArch64TargetLowering::preferSelectsOverBooleanArithmetic(EVT VT) const {
// Expand scalar and SVE operations using selects. Neon vectors prefer sub to
// avoid vselect becoming bsl / unrolling.
return !VT.isFixedLengthVector();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ff073d3eafb1f..84fd7657abe72 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -435,7 +435,7 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
- bool shouldExpandCmpUsingSelects(EVT VT) const override;
+ bool preferSelectsOverBooleanArithmetic(EVT VT) const override;
bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9052cbfa89deb..9a247bb5a83d9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1482,7 +1482,7 @@ bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
-bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+bool ARMTargetLowering::preferSelectsOverBooleanArithmetic(EVT VT) const {
return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8e417ac3e1a7b..fa130a153b0de 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -605,7 +605,7 @@ class VectorType;
bool preferZeroCompareBranch() const override { return true; }
- bool shouldExpandCmpUsingSelects(EVT VT) const override;
+ bool preferSelectsOverBooleanArithmetic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 707887c59bd65..77b782bf025a7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -528,7 +528,7 @@ class SystemZTargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override { return true; }
- bool shouldExpandCmpUsingSelects(EVT VT) const override { return true; }
+ bool preferSelectsOverBooleanArithmetic(EVT VT) const override { return true; }
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
More information about the llvm-commits
mailing list