[llvm] [PowerPC] Add custom lowering for ssubo (#111748) (PR #115875)

Maryam Moghadas via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 28 08:39:30 PST 2024


https://github.com/maryammo updated https://github.com/llvm/llvm-project/pull/115875

>From dd25cafd139c29f9c1a38ac2097162aa17c45114 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Tue, 29 Oct 2024 15:43:05 -0400
Subject: [PATCH 1/2] [PowerPC] Add custom lowering for ssubo (#111748)

This patch is to improve the codegen for ssubo node for i32 in 64-bit
mode by custom lowering.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 28 +++++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |  1 +
 llvm/test/CodeGen/PowerPC/saddo-ssubo.ll    | 11 ++++----
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 93a48ce2b8c72d..50028a823f2b7c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   setOperationAction(ISD::UADDO, RegVT, Custom);
 
+  // On P10, the default lowering generates better code using the
+  // setbc instruction.
+  if (!Subtarget.hasP10Vector() && isPPC64)
+    setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+
   // Match BITREVERSE to customized fast code sequence in the td file.
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -12041,6 +12046,27 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
   return Res;
 }
 
+SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
+
+  SDLoc dl(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, LHS, RHS);
+
+  SDValue Xor1 = DAG.getNode(ISD::XOR, dl, MVT::i32, RHS, LHS);
+  SDValue Xor2 = DAG.getNode(ISD::XOR, dl, MVT::i32, Sub, LHS);
+
+  SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, Xor1, Xor2);
+
+  SDValue Overflow = DAG.getNode(ISD::SRL, dl, MVT::i32, And,
+                                 DAG.getConstant(31, dl, MVT::i32));
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+
+  return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12063,6 +12089,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::SSUBO:
+    return LowerSSUBO(Op, DAG);
 
   case ISD::INLINEASM:
   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 1fd4b83d6c1192..1c63444db427db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1285,6 +1285,7 @@ namespace llvm {
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index fd5f26ba35742f..4c11f7f919a3ca 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -129,12 +129,11 @@ entry:
 define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test_ssubo_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sub 5, 3, 4
-; CHECK-NEXT:    cmpwi 1, 4, 0
-; CHECK-NEXT:    cmpw 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 4, 3
+; CHECK-NEXT:    sub 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    and 3, 5, 3
+; CHECK-NEXT:    srwi 3, 3, 31
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind

>From 719eb1130d9ab08a843bd17c1b0803ff7dfa0cb8 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Thu, 28 Nov 2024 10:51:06 -0500
Subject: [PATCH 2/2] Address review comments for custom lowering of ssubo in
 32-bit

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  2 +-
 llvm/test/CodeGen/PowerPC/ssubo-32.ll       | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 50028a823f2b7c..e917ef3f5e8c9a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -200,7 +200,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // On P10, the default lowering generates better code using the
   // setbc instruction.
-  if (!Subtarget.hasP10Vector() && isPPC64)
+  if (!Subtarget.hasP10Vector())
     setOperationAction(ISD::SSUBO, MVT::i32, Custom);
 
   // Match BITREVERSE to customized fast code sequence in the td file.
diff --git a/llvm/test/CodeGen/PowerPC/ssubo-32.ll b/llvm/test/CodeGen/PowerPC/ssubo-32.ll
index 7a42007b8a11a9..488d1e26fa36e8 100644
--- a/llvm/test/CodeGen/PowerPC/ssubo-32.ll
+++ b/llvm/test/CodeGen/PowerPC/ssubo-32.ll
@@ -6,13 +6,12 @@
 define i1 @subovfi_i32(i32 noundef %a, i32 noundef %b, ptr %c)  {
 ; CHECK-LABEL: subovfi_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sub 6, 3, 4
-; CHECK-NEXT:    cmpwi 1, 4, 0
-; CHECK-NEXT:    cmpw 6, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    stw 6, 0(5)
-; CHECK-NEXT:    creqv 20, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 6, 4, 3
+; CHECK-NEXT:    sub 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    stw 4, 0(5)
+; CHECK-NEXT:    and 3, 6, 3
+; CHECK-NEXT:    srwi 3, 3, 31
 ; CHECK-NEXT:    blr
 entry:
   %0 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)



More information about the llvm-commits mailing list