[llvm] [PowerPC] Lower ucmp using subtractions (PR #146446)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 17:57:37 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/146446
Source: Hacker's delight, page 21.
Using the carry, we can use contractions to use the ucmp.
>From e3791b95dc77ab6594264c8a1f39edc49a4164ce Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 30 Jun 2025 20:55:55 -0400
Subject: [PATCH] [PowerPC] Lower ucmp using subtractions
Source: Hacker's delight, page 21.
Using the carry, we can use contractions to use the ucmp.
---
.../PowerPC/GISel/PPCInstructionSelector.cpp | 52 +++++++++++++
.../Target/PowerPC/GISel/PPCLegalizerInfo.cpp | 5 ++
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 39 ++++++++++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 +
llvm/test/CodeGen/PowerPC/memcmp.ll | 20 +++--
llvm/test/CodeGen/PowerPC/ucmp.ll | 74 +++++++------------
6 files changed, 134 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index 3283a5bb69404..8f2e570299b3b 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -65,6 +65,9 @@ class PPCInstructionSelector : public InstructionSelector {
bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB,
MachineRegisterInfo &MRI) const;
+ bool selectUCMP(MachineInstr &I, MachineBasicBlock &MBB,
+ MachineRegisterInfo &MRI) const;
+
const PPCTargetMachine &TM;
const PPCSubtarget &STI;
const PPCInstrInfo &TII;
@@ -705,6 +708,53 @@ bool PPCInstructionSelector::selectConstantPool(
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}
+bool PPCInstructionSelector::selectUCMP(MachineInstr &I, MachineBasicBlock &MBB,
+ MachineRegisterInfo &MRI) const {
+ const DebugLoc &DbgLoc = I.getDebugLoc();
+ Register DstReg = I.getOperand(0).getReg();
+ Register LHS = I.getOperand(1).getReg();
+ Register RHS = I.getOperand(2).getReg();
+
+ LLT Ty = MRI.getType(LHS);
+ bool Is64Bit = Ty.getSizeInBits() == 64;
+
+ // Select appropriate opcodes based on operand size
+ unsigned SubfOp = Is64Bit ? PPC::SUBF8 : PPC::SUBF;
+ unsigned SubfcOp = Is64Bit ? PPC::SUBFC8 : PPC::SUBFC;
+ unsigned SubfeOp = Is64Bit ? PPC::SUBFE8 : PPC::SUBFE;
+
+ const TargetRegisterClass *RC =
+ Is64Bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+
+ // diff = LHS - RHS (subf RHS, LHS -> LHS - RHS)
+ Register DiffReg = MRI.createVirtualRegister(RC);
+ auto Diff =
+ BuildMI(MBB, I, DbgLoc, TII.get(SubfOp), DiffReg).addReg(RHS).addReg(LHS);
+
+ // t1 = RHS - LHS, set carry (subfc LHS, RHS -> RHS - LHS)
+ Register T1Reg = MRI.createVirtualRegister(RC);
+ auto T1 =
+ BuildMI(MBB, I, DbgLoc, TII.get(SubfcOp), T1Reg).addReg(LHS).addReg(RHS);
+
+ // t2 = LHS - RHS + carry (subfe RHS, LHS -> LHS - RHS + CA)
+ Register T2Reg = MRI.createVirtualRegister(RC);
+ auto T2 =
+ BuildMI(MBB, I, DbgLoc, TII.get(SubfeOp), T2Reg).addReg(RHS).addReg(LHS);
+
+ // result = diff - t2 + carry (subfe T2Reg, DiffReg -> diff - t2 + CA)
+ auto Result = BuildMI(MBB, I, DbgLoc, TII.get(SubfeOp), DstReg)
+ .addReg(T2Reg)
+ .addReg(DiffReg);
+
+ I.eraseFromParent();
+
+ // Constrain registers
+ return constrainSelectedInstRegOperands(*Diff, TII, TRI, RBI) &&
+ constrainSelectedInstRegOperands(*T1, TII, TRI, RBI) &&
+ constrainSelectedInstRegOperands(*T2, TII, TRI, RBI) &&
+ constrainSelectedInstRegOperands(*Result, TII, TRI, RBI);
+}
+
bool PPCInstructionSelector::select(MachineInstr &I) {
auto &MBB = *I.getParent();
auto &MF = *MBB.getParent();
@@ -775,6 +825,8 @@ bool PPCInstructionSelector::select(MachineInstr &I) {
return selectI64Imm(I, MBB, MRI);
case TargetOpcode::G_CONSTANT_POOL:
return selectConstantPool(I, MBB, MRI);
+ case TargetOpcode::G_UCMP:
+ return selectUCMP(I, MBB, MRI);
}
return false;
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
index afc8f6bbde1b7..9f48333b5be8d 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -72,6 +72,11 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1},
{S32, S64});
+ // Add unsigned 3-way comparison support
+ getActionDefinitionsBuilder(G_UCMP)
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64);
+
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalForCartesianProduct({S64}, {S32, S64});
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5a4a63469ad6e..b70ef182a2a72 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1409,6 +1409,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
+ // Custom handling for PowerPC ucmp instruction
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ if (Subtarget.isPPC64())
+ setOperationAction(ISD::UCMP, MVT::i64, Custom);
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
@@ -12470,6 +12475,38 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
}
+// Lower unsigned 3-way compare producing -1/0/1.
+SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+ EVT OpVT = A.getValueType(); // operand type (i32 or i64)
+ EVT ResVT = Op.getValueType(); // result type (should be i32)
+
+ // First compute diff = A - B (will become subf).
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
+
+ // Generate B - A using SUBC to capture carry.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
+ SDValue CA0 = SubC.getValue(1);
+
+ // t2 = A - B + CA0 using SUBE.
+ SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
+ SDValue CA1 = SubE1.getValue(1);
+
+ // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
+ SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
+
+ // Extract the first result and truncate to result type if needed
+ SDValue Result = ResPair.getValue(0);
+ if (OpVT != ResVT) {
+ Result = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Result);
+ }
+
+ return Result;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12574,6 +12611,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerADDSUBO_CARRY(Op, DAG);
+ case ISD::UCMP:
+ return LowerUCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4c88bd372b106..7e8dd166ad724 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1329,6 +1329,7 @@ namespace llvm {
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
index 39f9269997315..4998d87cf397b 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp.ll
@@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
; CHECK: # %bb.0:
; CHECK-NEXT: ldbrx 3, 0, 3
; CHECK-NEXT: ldbrx 4, 0, 4
-; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: subc 3, 4, 3
-; CHECK-NEXT: subfe 3, 4, 4
-; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: neg 3, 3
-; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: extsw 3, 3
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
; CHECK: # %bb.0:
; CHECK-NEXT: lwbrx 3, 0, 3
; CHECK-NEXT: lwbrx 4, 0, 4
-; CHECK-NEXT: cmplw 3, 4
-; CHECK-NEXT: sub 5, 4, 3
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
+; CHECK-NEXT: extsw 3, 3
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
ret i32 %call
diff --git a/llvm/test/CodeGen/PowerPC/ucmp.ll b/llvm/test/CodeGen/PowerPC/ucmp.ll
index d2dff6e7e05c8..4d393dd00e3db 100644
--- a/llvm/test/CodeGen/PowerPC/ucmp.ll
+++ b/llvm/test/CodeGen/PowerPC/ucmp.ll
@@ -4,12 +4,10 @@
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: cmplw 3, 4
-; CHECK-NEXT: sub 5, 4, 3
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: rldic 3, 3, 0, 32
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: cmplw 3, 4
-; CHECK-NEXT: sub 5, 4, 3
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: rldic 3, 3, 0, 32
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrldi 5, 4, 32
-; CHECK-NEXT: clrldi 6, 3, 32
-; CHECK-NEXT: sub 5, 5, 6
-; CHECK-NEXT: cmplw 3, 4
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: rldic 3, 3, 0, 32
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -48,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_8_64:
; CHECK: # %bb.0:
-; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: subc 3, 4, 3
-; CHECK-NEXT: subfe 3, 4, 4
-; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: neg 3, 3
-; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
@@ -82,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrldi 5, 4, 32
-; CHECK-NEXT: clrldi 6, 3, 32
-; CHECK-NEXT: sub 5, 5, 6
-; CHECK-NEXT: cmplw 3, 4
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: rldic 3, 3, 0, 32
-; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -98,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_32_64:
; CHECK: # %bb.0:
-; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: subc 3, 4, 3
-; CHECK-NEXT: subfe 3, 4, 4
-; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: neg 3, 3
-; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
@@ -112,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ucmp_64_64:
; CHECK: # %bb.0:
-; CHECK-NEXT: subc 5, 4, 3
-; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: subfe 5, 4, 4
-; CHECK-NEXT: neg 5, 5
-; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: subc 6, 4, 3
+; CHECK-NEXT: sub 5, 3, 4
+; CHECK-NEXT: subfe 3, 4, 3
+; CHECK-NEXT: subfe 3, 3, 5
; CHECK-NEXT: blr
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
More information about the llvm-commits
mailing list