[llvm] [ARM] Replace ABS and tABS machine nodes with custom lowering (PR #156717)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 14 12:10:33 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/156717
>From 86aa03696aa4804af4330a50056c031504a2c5bb Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 3 Sep 2025 12:52:18 -0400
Subject: [PATCH] [ARM] Replace ABS and tABS machine nodes with custom lowering
Just do a custom lowering instead.
Also copy paste the cmov-neg fold to prevent regressions in nabs.
---
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 48 -------
llvm/lib/Target/ARM/ARMISelLowering.cpp | 139 ++++++++------------
llvm/lib/Target/ARM/ARMISelLowering.h | 1 +
llvm/lib/Target/ARM/ARMInstrInfo.td | 6 -
llvm/lib/Target/ARM/ARMInstrThumb2.td | 7 -
llvm/lib/Target/ARM/ARMScheduleA57.td | 2 +-
llvm/lib/Target/ARM/ARMScheduleR52.td | 2 +-
llvm/lib/Target/ARM/ARMScheduleSwift.td | 3 -
llvm/test/Analysis/CostModel/ARM/mve-abs.ll | 8 +-
llvm/test/CodeGen/Thumb2/abs.ll | 59 ++++++---
10 files changed, 103 insertions(+), 172 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad46df159c20..847b7af5a9b11 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -306,9 +306,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
bool tryInsertVectorElt(SDNode *N);
- // Select special operations if node forms integer ABS pattern
- bool tryABSOp(SDNode *N);
-
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
@@ -3459,45 +3456,6 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
}
-/// Target-specific DAG combining for ISD::SUB.
-/// Target-independent combining lowers SELECT_CC nodes of the form
-/// select_cc setg[ge] X, 0, X, -X
-/// select_cc setgt X, -1, X, -X
-/// select_cc setl[te] X, 0, -X, X
-/// select_cc setlt X, 1, -X, X
-/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
-/// ARM instruction selection detects the latter and matches it to
-/// ARM::ABS or ARM::t2ABS machine node.
-bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
- SDValue SUBSrc0 = N->getOperand(0);
- SDValue SUBSrc1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- if (Subtarget->isThumb1Only())
- return false;
-
- if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
- return false;
-
- SDValue XORSrc0 = SUBSrc0.getOperand(0);
- SDValue XORSrc1 = SUBSrc0.getOperand(1);
- SDValue SRASrc0 = SUBSrc1.getOperand(0);
- SDValue SRASrc1 = SUBSrc1.getOperand(1);
- ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
- EVT XType = SRASrc0.getValueType();
- unsigned Size = XType.getSizeInBits() - 1;
-
- if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
- SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
- unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
- return true;
- }
-
- return false;
-}
-
/// We've got special pseudo-instructions for these
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
@@ -3685,12 +3643,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryInlineAsm(N))
return;
break;
- case ISD::SUB:
- // Select special operations if SUB node forms integer ABS pattern
- if (tryABSOp(N))
- return;
- // Other cases are autogenerated.
- break;
case ISD::Constant: {
unsigned Val = N->getAsZExtVal();
// If we can't materialize the constant we need to use a literal pool
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4af2721562d7c..6e22ddaa0c753 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasV8_1MMainlineOps())
setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -5621,6 +5624,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Generate CMP + CMOV for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32,
+ DAG.getConstant(0, DL, MVT::i32), Op.getOperand(0));
+ // Generate CMP & CMOV.
+ SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
+ DAG.getConstant(0, DL, MVT::i32));
+ return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
+ DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -10703,6 +10719,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UCMP:
case ISD::SCMP:
return LowerCMP(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
}
}
@@ -12288,89 +12306,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
-
- case ARM::ABS:
- case ARM::t2ABS: {
- // To insert an ABS instruction, we have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // source vreg to test against 0, the destination vreg to set,
- // the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- // It transforms
- // V1 = ABS V0
- // into
- // V2 = MOVS V0
- // BCC (branch to SinkBB if V0 >= 0)
- // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
- // SinkBB: V1 = PHI(V2, V3)
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator BBI = ++BB->getIterator();
- MachineFunction *Fn = BB->getParent();
- MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
- Fn->insert(BBI, RSBBB);
- Fn->insert(BBI, SinkBB);
-
- // Set the call frame size on entry to the new basic blocks.
- unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
- RSBBB->setCallFrameSize(CallFrameSize);
- SinkBB->setCallFrameSize(CallFrameSize);
-
- Register ABSSrcReg = MI.getOperand(1).getReg();
- Register ABSDstReg = MI.getOperand(0).getReg();
- bool ABSSrcKIll = MI.getOperand(1).isKill();
- bool isThumb2 = Subtarget->isThumb2();
- MachineRegisterInfo &MRI = Fn->getRegInfo();
- // In Thumb mode S must not be specified if source register is the SP or
- // PC and if destination register is the SP, so restrict register class
- Register NewRsbDstReg = MRI.createVirtualRegister(
- isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- SinkBB->splice(SinkBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- SinkBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BB->addSuccessor(RSBBB);
- BB->addSuccessor(SinkBB);
-
- // fall through to SinkMBB
- RSBBB->addSuccessor(SinkBB);
-
- // insert a cmp at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(ABSSrcReg)
- .addImm(0)
- .add(predOps(ARMCC::AL));
-
- // insert a bcc with opposite CC to ARMCC::MI at the end of BB
- BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
- .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
-
- // insert rsbri in RSBBB
- // Note: BCC and rsbri will be converted into predicated rsbmi
- // by if-conversion pass
- BuildMI(*RSBBB, RSBBB->begin(), dl,
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
- .addImm(0)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
-
- // insert PHI in SinkBB,
- // reuse ABSDstReg to not change uses of ABS instruction
- BuildMI(*SinkBB, SinkBB->begin(), dl,
- TII->get(ARM::PHI), ABSDstReg)
- .addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(ABSSrcReg).addMBB(BB);
-
- // remove ABS instruction
- MI.eraseFromParent();
-
- // return last added BB
- return SinkBB;
- }
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
@@ -14082,6 +14017,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
CSINC.getOperand(3));
}
+static bool isNegatedInteger(SDValue Op) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
+}
+
+// Try to fold
+//
+// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
+//
+// The folding helps cmov to be matched with csneg without generating
+// redundant neg instruction.
+static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
+ if (!isNegatedInteger(SDValue(N, 0)))
+ return SDValue();
+
+ SDValue CMov = N->getOperand(1);
+ if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
+ return SDValue();
+
+ SDValue N0 = CMov.getOperand(0);
+ SDValue N1 = CMov.getOperand(1);
+
+ // If neither of them are negations, it's not worth the folding as it
+ // introduces two additional negations while reducing one negation.
+ if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = CMov.getValueType();
+
+ SDValue N0N = DAG.getNegative(N0, DL, VT);
+ SDValue N1N = DAG.getNegative(N1, DL, VT);
+ return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
+ CMov.getOperand(3));
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
///
static SDValue PerformSUBCombine(SDNode *N,
@@ -14098,6 +14068,9 @@ static SDValue PerformSUBCombine(SDNode *N,
if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
return R;
+ if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
+ return Val;
+
if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
return SDValue();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index ccf6d509313b9..8e417ac3e1a7b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -908,6 +908,7 @@ class VectorType;
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index bdb16d7d39266..282ff534fc112 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5185,12 +5185,6 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
let hasSideEffects = 1;
}
-let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
- // Pseudo instruction that combines movs + predicated rsbmi
- // to implement integer ABS
- def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-}
-
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c00d616670b5a..c229c8e4491df 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -4393,13 +4393,6 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
-// Pseudo instruction that combines movs + predicated rsbmi
-// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
-def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
- NoItinerary, []>, Requires<[IsThumb2]>;
-}
-
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index 3baac6b233c45..e20bbadbc8eee 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -137,7 +137,7 @@ def : InstRW<[WriteNoop, WriteNoop], (instregex
def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
// Pseudos
-def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
+def : InstRW<[WriteNoop], (instregex
"(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
"tLDRpci_pic", "(t2)?SUBS_PC_LR",
"JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td
index e85646915117c..c350180baa250 100644
--- a/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -235,7 +235,7 @@ def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
"UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
"t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
"t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
- "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
+ "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
// Parallel arithmetic
def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
diff --git a/llvm/lib/Target/ARM/ARMScheduleSwift.td b/llvm/lib/Target/ARM/ARMScheduleSwift.td
index 88682f5c0d2c4..8d53119f4747b 100644
--- a/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -1081,9 +1081,6 @@ let SchedModel = SwiftModel in {
def : WriteRes<WriteVST2, []>;
def : WriteRes<WriteVST3, []>;
def : WriteRes<WriteVST4, []>;
-
- // Not specified.
- def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
let ReleaseAtCycles = [0];
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
index 254c191569f8b..42563e8e34a94 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
@@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)
define i32 @abs(i32 %arg) {
; MVE-LABEL: 'abs'
-; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll
index 88259ba758803..3cc3ec93e2404 100644
--- a/llvm/test/CodeGen/Thumb2/abs.ll
+++ b/llvm/test/CodeGen/Thumb2/abs.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECKT1
-; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2
-; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2
+; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V7
+; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V8
declare i64 @llvm.abs.i64(i64, i1 immarg)
@@ -39,11 +39,18 @@ define i32 @neg_abs32(i32 %x) {
; CHECKT1-NEXT: subs r0, r1, r0
; CHECKT1-NEXT: bx lr
;
-; CHECKT2-LABEL: neg_abs32:
-; CHECKT2: @ %bb.0:
-; CHECKT2-NEXT: eor.w r1, r0, r0, asr #31
-; CHECKT2-NEXT: rsb r0, r1, r0, asr #31
-; CHECKT2-NEXT: bx lr
+; CHECKT2V7-LABEL: neg_abs32:
+; CHECKT2V7: @ %bb.0:
+; CHECKT2V7-NEXT: cmp r0, #0
+; CHECKT2V7-NEXT: it pl
+; CHECKT2V7-NEXT: rsbpl r0, r0, #0
+; CHECKT2V7-NEXT: bx lr
+;
+; CHECKT2V8-LABEL: neg_abs32:
+; CHECKT2V8: @ %bb.0:
+; CHECKT2V8-NEXT: cmp r0, #0
+; CHECKT2V8-NEXT: cneg r0, r0, pl
+; CHECKT2V8-NEXT: bx lr
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
%neg = sub nsw i32 0, %abs
ret i32 %neg
@@ -145,12 +152,18 @@ define i32 @abs32(i32 %x) {
; CHECKT1-NEXT: subs r0, r0, r1
; CHECKT1-NEXT: bx lr
;
-; CHECKT2-LABEL: abs32:
-; CHECKT2: @ %bb.0:
-; CHECKT2-NEXT: cmp r0, #0
-; CHECKT2-NEXT: it mi
-; CHECKT2-NEXT: rsbmi r0, r0, #0
-; CHECKT2-NEXT: bx lr
+; CHECKT2V7-LABEL: abs32:
+; CHECKT2V7: @ %bb.0:
+; CHECKT2V7-NEXT: cmp r0, #0
+; CHECKT2V7-NEXT: it mi
+; CHECKT2V7-NEXT: rsbmi r0, r0, #0
+; CHECKT2V7-NEXT: bx lr
+;
+; CHECKT2V8-LABEL: abs32:
+; CHECKT2V8: @ %bb.0:
+; CHECKT2V8-NEXT: cmp r0, #0
+; CHECKT2V8-NEXT: cneg r0, r0, mi
+; CHECKT2V8-NEXT: bx lr
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
}
@@ -164,12 +177,20 @@ define i16 @abs16(i16 %x) {
; CHECKT1-NEXT: subs r0, r0, r1
; CHECKT1-NEXT: bx lr
;
-; CHECKT2-LABEL: abs16:
-; CHECKT2: @ %bb.0:
-; CHECKT2-NEXT: sxth r1, r0
-; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15
-; CHECKT2-NEXT: sub.w r0, r0, r1, asr #15
-; CHECKT2-NEXT: bx lr
+; CHECKT2V7-LABEL: abs16:
+; CHECKT2V7: @ %bb.0:
+; CHECKT2V7-NEXT: sxth r0, r0
+; CHECKT2V7-NEXT: cmp r0, #0
+; CHECKT2V7-NEXT: it mi
+; CHECKT2V7-NEXT: rsbmi r0, r0, #0
+; CHECKT2V7-NEXT: bx lr
+;
+; CHECKT2V8-LABEL: abs16:
+; CHECKT2V8: @ %bb.0:
+; CHECKT2V8-NEXT: sxth r0, r0
+; CHECKT2V8-NEXT: cmp r0, #0
+; CHECKT2V8-NEXT: cneg r0, r0, mi
+; CHECKT2V8-NEXT: bx lr
%abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
ret i16 %abs
}
More information about the llvm-commits
mailing list