[llvm] [PPC] Add custom lowering for uaddo (PR #110137)
Zaara Syeda via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 09:04:15 PDT 2024
https://github.com/syzaara created https://github.com/llvm/llvm-project/pull/110137
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.
>From da682f23bc76ee40371a43cf09fe11738f03bab5 Mon Sep 17 00:00:00 2001
From: Zaara Syeda <syzaara at cpap8104.rtp.raleigh.ibm.com>
Date: Thu, 26 Sep 2024 11:49:54 -0400
Subject: [PATCH] [PPC] Add custom lowering for uaddo
Improve the codegen for uaddo node for i64 in 64-bit mode and
i32 in 32-bit mode by custom lowering.
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 43 +++++++++++++++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 +
llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 61 +++++++++++++++++++--
llvm/test/CodeGen/PowerPC/sat-add.ll | 5 +-
llvm/test/CodeGen/PowerPC/uaddo-32.ll | 37 +++++++++++++
llvm/test/CodeGen/PowerPC/uaddo-64.ll | 37 +++++++++++++
6 files changed, 175 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/uaddo-32.ll
create mode 100644 llvm/test/CodeGen/PowerPC/uaddo-64.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d9847a21489e63..c3497314b91e94 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
}
+ if (!Subtarget.hasP10Vector())
+ setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
+
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11970,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("ERROR:Should return for all cases within swtich.");
}
+SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
+ // Default to target independent lowering if there is a logical user of the
+ // carry-bit.
+ for (SDNode *U : Op->uses()) {
+ if (U->getOpcode() == ISD::SELECT || ISD::isBitwiseLogicOp(U->getOpcode()))
+ return SDValue();
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Default to target independent lowering for special cases handled there.
+ if (isOneConstant(RHS) || isAllOnesConstant(RHS))
+ return SDValue();
+
+ EVT VT = Op.getNode()->getValueType(0);
+ bool is64Bit = Subtarget.isPPC64();
+
+ SDValue ADDC;
+ SDValue Overflow;
+ SDVTList VTs = Op.getNode()->getVTList();
+
+ ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT,
+ MVT::Glue, LHS, RHS),
+ 0);
+ SDValue Li = SDValue(DAG.getMachineNode(is64Bit ? PPC::LI8 : PPC::LI, dl, VT,
+ DAG.getTargetConstant(0, dl, VT)),
+ 0);
+ Overflow = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDZE8 : PPC::ADDZE, dl,
+ VT, MVT::Glue, Li, ADDC.getValue(1)),
+ 0);
+ SDValue OverflowTrunc =
+ DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+ SDValue Res =
+ DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
+ return Res;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::UADDO:
+ return LowerUaddo(Op, DAG);
case ISD::FPOW: return lowerPow(Op, DAG);
case ISD::FSIN: return lowerSin(Op, DAG);
case ISD::FCOS: return lowerCos(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 8907c3c5a81c3c..7285f6de4728d5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1277,6 +1277,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index b8abee76cdfa80..362eaf33ba96a8 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -139,6 +139,8 @@ struct PPCMIPeephole : public MachineFunctionPass {
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
+ bool eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+ MachineInstr *&ToErase);
// A number of transformations will eliminate the definition of a register
// as all of its uses will be removed. However, this leaves a register
// without a definition for LiveVariables. Such transformations should
@@ -1071,6 +1073,18 @@ bool PPCMIPeephole::simplifyCode() {
break;
}
case PPC::RLDICL: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ // We can eliminate clearing the left 63 bits when only the carry-bit is
+ // set.
+ if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+ Simplified = true;
+ break;
+ }
+
// We can eliminate RLDICL (e.g. for zero-extension)
// if all bits to clear are already zero in the input.
// This code assume following code sequence for zero-extension.
@@ -1082,11 +1096,6 @@ bool PPCMIPeephole::simplifyCode() {
if (MI.getOperand(2).getImm() != 0)
break;
- Register SrcReg = MI.getOperand(1).getReg();
- if (!SrcReg.isVirtual())
- break;
-
- MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
break;
@@ -1277,7 +1286,15 @@ bool PPCMIPeephole::simplifyCode() {
Simplified = true;
break;
}
- case PPC::RLWINM:
+ case PPC::RLWINM: {
+ // We can eliminate clearing the left 31 bits when only the carry-bit is
+ // set.
+ if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+ Simplified = true;
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
@@ -1889,6 +1906,38 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
return Simplified;
}
+bool PPCMIPeephole::eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+ MachineInstr *&ToErase) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ bool Is64Bit = MI.getOpcode() == PPC::RLDICL;
+ int Imm = Is64Bit ? 63 : 31;
+ if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != Imm)
+ return false;
+ if (SrcMI->getOpcode() != (Is64Bit ? PPC::ADDZE8 : PPC::ADDZE))
+ return false;
+ MachineInstr *LI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (LI->getOpcode() != (Is64Bit ? PPC::LI8 : PPC::LI))
+ return false;
+ if (LI->getOperand(1).getImm() != 0 || MI.getOperand(2).getImm() != 0)
+ return false;
+ Register NewReg = SrcMI->getOperand(0).getReg();
+ ToErase = &MI;
+ Register MIDestReg = MI.getOperand(0).getReg();
+ for (MachineInstr &UseMI : MRI->use_instructions(MIDestReg)) {
+ for (MachineOperand &MO : UseMI.operands()) {
+ if (MO.isReg() && MO.getReg() == MIDestReg) {
+ MO.setReg(NewReg);
+ addRegToUpdate(NewReg);
+ break;
+ }
+ }
+ }
+ return true;
+}
// We miss the opportunity to emit an RLDIC when lowering jump tables
// since ISEL sees only a single basic block. When selecting, the clear
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index f699ea54192d88..8fff2c28da245e 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
; CHECK: # %bb.0:
-; CHECK-NEXT: li 5, -43
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmpld 3, 5
+; CHECK-NEXT: cmpld 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i64 %x, 42
%c = icmp ugt i64 %x, -43
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-32.ll b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
new file mode 100644
index 00000000000000..7c741e3618e6fc
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: addc 3, 3, 4
+; CHECK-NEXT: addze 4, 6
+; CHECK-NEXT: stw 4, 0(5)
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ %2 = extractvalue { i32, i1 } %0, 0
+ %3 = zext i1 %1 to i32
+ store i32 %3, ptr %ovf, align 8
+ ret i32 %2
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add_overflow:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: addc 4, 3, 4
+; CHECK-NEXT: addze 3, 6
+; CHECK-NEXT: stw 4, 0(5)
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ %2 = extractvalue { i32, i1 } %0, 0
+ store i32 %2, ptr %ovf, align 8
+ ret i1 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-64.ll b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
new file mode 100644
index 00000000000000..ef4eccd329d9f1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
+
+define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: li 6, 0
+; PPC64-NEXT: addc 3, 3, 4
+; PPC64-NEXT: addze 4, 6
+; PPC64-NEXT: std 4, 0(5)
+; PPC64-NEXT: blr
+entry:
+ %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %1 = extractvalue { i64, i1 } %0, 1
+ %2 = extractvalue { i64, i1 } %0, 0
+ %3 = zext i1 %1 to i64
+ store i64 %3, ptr %ovf, align 8
+ ret i64 %2
+}
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
+
+define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add_overflow:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: li 6, 0
+; PPC64-NEXT: addc 4, 3, 4
+; PPC64-NEXT: addze 3, 6
+; PPC64-NEXT: std 4, 0(5)
+; PPC64-NEXT: blr
+entry:
+ %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %1 = extractvalue { i64, i1 } %0, 1
+ %2 = extractvalue { i64, i1 } %0, 0
+ store i64 %2, ptr %ovf, align 8
+ ret i1 %1
+}
More information about the llvm-commits
mailing list