[llvm] [PowerPC] Add custom lowering for SADD overflow for i32 and i64 (PR #159255)

Aditi Medhane via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 16 23:57:21 PDT 2025


https://github.com/AditiRM created https://github.com/llvm/llvm-project/pull/159255

This patch is to improve the codegen for saddo node for i32 & i64 in 32-bit and 64-bit mode by custom lowering.

Testcase : 
> ```c
> #include <stdbool.h>
> #include <stdint.h>
> 
> bool test_sadd_overflow(int a, int b, int *c) {
>    return __builtin_sadd_overflow(a, b, c);
> }
>
> bool test_saddl_overflow(long long a, long long b, long long *c){
>    return __builtin_saddll_overflow(a, b, c);
> }
> ```

> ```bash
> ibm-clang -O2 -mcpu=power8 -m64 -S test.c -o test_final.s
> ```

Assembly comparison (test_sadd_overflow):  
| **Before** | **After** |
|------------|-----------|
| add 6, 3, 4<br>srwi 4, 4, 31<br>extsw 6, 6<br>sub 3, 6, 3<br>stw 6, 0(5)<br>rldicl 3, 3, 1, 63<br>xor 3, 4, 3 | xor 6, 3, 4<br>add 4, 3, 4<br>xor 3, 4, 3<br>stw 4, 0(5)<br>andc 3, 3, 6<br>rlwinm 3, 3, 1, 31, 31 |

---

Assembly comparison (test_saddl_overflow):  
| **Before** | **After** |
|------------|-----------|
| add 6, 3, 4<br>rldicl 8, 3, 1, 63<br>rldicl 4, 4, 1, 63<br>std 6, 0(5)<br>sradi 7, 6, 63<br>subc 3, 6, 3<br>adde 3, 8, 7<br>xori 3, 3, 1<br>xor 3, 4, 3 | xor 6, 3, 4<br>add 4, 3, 4<br>xor 3, 4, 3<br>std 4, 0(5)<br>andc 3, 3, 6<br>rldicl 3, 3, 1, 63 |

>From 8e3f617fbc22b0733cf9b3b0415aba6fe6b700da Mon Sep 17 00:00:00 2001
From: AditiRM <aditimedhane73 at gmail.com>
Date: Wed, 17 Sep 2025 06:54:02 +0000
Subject: [PATCH] [PowerPC] Add custom lowering for SADD overflow for i32 and
 i64

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 32 ++++++++++++++++++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |  1 +
 llvm/test/CodeGen/PowerPC/saddo-ssubo.ll    | 22 +++++++-------
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa104e4f69d7f..0798dae3a14a2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -204,8 +204,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // setbc instruction.
   if (!Subtarget.hasP10Vector()) {
     setOperationAction(ISD::SSUBO, MVT::i32, Custom);
-    if (isPPC64)
+    setOperationAction(ISD::SADDO, MVT::i32, Custom);
+    if (isPPC64){
       setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+      setOperationAction(ISD::SADDO, MVT::i64, Custom);
+    }
   }
 
   // Match BITREVERSE to customized fast code sequence in the td file.
@@ -12614,6 +12617,31 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
 }
 
+SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
+
+  SDLoc dl(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  EVT VT = Op.getNode()->getValueType(0);
+
+  SDValue Add = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
+
+  SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+  SDValue NotXor1 = DAG.getNOT(dl, Xor1, VT);
+  SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Add, LHS);
+
+  SDValue And = DAG.getNode(ISD::AND, dl, VT, NotXor1, Xor2);
+
+  SDValue Overflow =
+      DAG.getNode(ISD::SRL, dl, VT, And,
+                  DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
+
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+
+  return DAG.getMergeValues({Add, OverflowTrunc}, dl);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12638,6 +12666,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::SSUBO:
     return LowerSSUBO(Op, DAG);
+  case ISD::SADDO:
+    return LowerSADDO(Op, DAG);
 
   case ISD::INLINEASM:
   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..0c19632ab5b33 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1283,6 +1283,7 @@ namespace llvm {
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSADDO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index c0f3b60122521..5cd96ec219404 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -49,12 +49,11 @@ entry:
 define i1 @test_saddo_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test_saddo_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 5, 3, 4
-; CHECK-NEXT:    cmpwi 1, 4, 0
-; CHECK-NEXT:    cmpw 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 4, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 3, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    andc 3, 3, 5
+; CHECK-NEXT:    srwi 3, 3, 31
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -65,12 +64,11 @@ entry:
 define i1 @test_saddo_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: test_saddo_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 5, 3, 4
-; CHECK-NEXT:    cmpdi 1, 4, 0
-; CHECK-NEXT:    cmpd 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 4, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 3, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    andc 3, 3, 5
+; CHECK-NEXT:    rldicl 3, 3, 1, 63
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind



More information about the llvm-commits mailing list