[llvm] dab4121 - [PowerPC] Add custom lowering for ssubo (#111748) (#115875)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 28 10:55:57 PST 2024
Author: Maryam Moghadas
Date: 2024-11-28T13:55:53-05:00
New Revision: dab4121a55225fd00f3db1cb232cf563ea573ef2
URL: https://github.com/llvm/llvm-project/commit/dab4121a55225fd00f3db1cb232cf563ea573ef2
DIFF: https://github.com/llvm/llvm-project/commit/dab4121a55225fd00f3db1cb232cf563ea573ef2.diff
LOG: [PowerPC] Add custom lowering for ssubo (#111748) (#115875)
This patch is to improve the codegen for ssubo node for i32 by custom lowering.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
llvm/test/CodeGen/PowerPC/ssubo-32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 93a48ce2b8c72d..e917ef3f5e8c9a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UADDO, RegVT, Custom);
+ // On P10, the default lowering generates better code using the
+ // setbc instruction.
+ if (!Subtarget.hasP10Vector())
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -12041,6 +12046,27 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, LHS, RHS);
+
+ SDValue Xor1 = DAG.getNode(ISD::XOR, dl, MVT::i32, RHS, LHS);
+ SDValue Xor2 = DAG.getNode(ISD::XOR, dl, MVT::i32, Sub, LHS);
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, Xor1, Xor2);
+
+ SDValue Overflow = DAG.getNode(ISD::SRL, dl, MVT::i32, And,
+ DAG.getConstant(31, dl, MVT::i32));
+ SDValue OverflowTrunc =
+ DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+
+ return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12063,6 +12089,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::SSUBO:
+ return LowerSSUBO(Op, DAG);
case ISD::INLINEASM:
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 1fd4b83d6c1192..1c63444db427db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1285,6 +1285,7 @@ namespace llvm {
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index fd5f26ba35742f..4c11f7f919a3ca 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -129,12 +129,11 @@ entry:
define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_ssubo_i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sub 5, 3, 4
-; CHECK-NEXT: cmpwi 1, 4, 0
-; CHECK-NEXT: cmpw 5, 3
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: creqv 20, 5, 0
-; CHECK-NEXT: isel 3, 0, 3, 20
+; CHECK-NEXT: xor 5, 4, 3
+; CHECK-NEXT: sub 4, 3, 4
+; CHECK-NEXT: xor 3, 4, 3
+; CHECK-NEXT: and 3, 5, 3
+; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
entry:
%res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
diff --git a/llvm/test/CodeGen/PowerPC/ssubo-32.ll b/llvm/test/CodeGen/PowerPC/ssubo-32.ll
index 7a42007b8a11a9..488d1e26fa36e8 100644
--- a/llvm/test/CodeGen/PowerPC/ssubo-32.ll
+++ b/llvm/test/CodeGen/PowerPC/ssubo-32.ll
@@ -6,13 +6,12 @@
define i1 @subovfi_i32(i32 noundef %a, i32 noundef %b, ptr %c) {
; CHECK-LABEL: subovfi_i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sub 6, 3, 4
-; CHECK-NEXT: cmpwi 1, 4, 0
-; CHECK-NEXT: cmpw 6, 3
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: stw 6, 0(5)
-; CHECK-NEXT: creqv 20, 5, 0
-; CHECK-NEXT: isel 3, 0, 3, 20
+; CHECK-NEXT: xor 6, 4, 3
+; CHECK-NEXT: sub 4, 3, 4
+; CHECK-NEXT: xor 3, 4, 3
+; CHECK-NEXT: stw 4, 0(5)
+; CHECK-NEXT: and 3, 6, 3
+; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
entry:
%0 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
More information about the llvm-commits
mailing list