[llvm] [ARM] Replace ABS and tABS machine nodes with custom lowering (PR #156717)

Sun Sep 14 12:10:33 PDT 2025

https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/156717

>From 86aa03696aa4804af4330a50056c031504a2c5bb Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 3 Sep 2025 12:52:18 -0400
Subject: [PATCH] [ARM] Replace ABS and tABS machine nodes with custom lowering

Just do a custom lowering instead.

Also copy paste the cmov-neg fold to prevent regressions in nabs.
---
 llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp     |  48 -------
 llvm/lib/Target/ARM/ARMISelLowering.cpp     | 139 ++++++++------------
 llvm/lib/Target/ARM/ARMISelLowering.h       |   1 +
 llvm/lib/Target/ARM/ARMInstrInfo.td         |   6 -
 llvm/lib/Target/ARM/ARMInstrThumb2.td       |   7 -
 llvm/lib/Target/ARM/ARMScheduleA57.td       |   2 +-
 llvm/lib/Target/ARM/ARMScheduleR52.td       |   2 +-
 llvm/lib/Target/ARM/ARMScheduleSwift.td     |   3 -
 llvm/test/Analysis/CostModel/ARM/mve-abs.ll |   8 +-
 llvm/test/CodeGen/Thumb2/abs.ll             |  59 ++++++---
 10 files changed, 103 insertions(+), 172 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad46df159c20..847b7af5a9b11 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -306,9 +306,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
 
   bool tryInsertVectorElt(SDNode *N);
 
-  // Select special operations if node forms integer ABS pattern
-  bool tryABSOp(SDNode *N);
-
   bool tryReadRegister(SDNode *N);
   bool tryWriteRegister(SDNode *N);
 
@@ -3459,45 +3456,6 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
   return false;
 }
 
-/// Target-specific DAG combining for ISD::SUB.
-/// Target-independent combining lowers SELECT_CC nodes of the form
-/// select_cc setg[ge] X,  0,  X, -X
-/// select_cc setgt    X, -1,  X, -X
-/// select_cc setl[te] X,  0, -X,  X
-/// select_cc setlt    X,  1, -X,  X
-/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
-/// ARM instruction selection detects the latter and matches it to
-/// ARM::ABS or ARM::t2ABS machine node.
-bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
-  SDValue SUBSrc0 = N->getOperand(0);
-  SDValue SUBSrc1 = N->getOperand(1);
-  EVT VT = N->getValueType(0);
-
-  if (Subtarget->isThumb1Only())
-    return false;
-
-  if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
-    return false;
-
-  SDValue XORSrc0 = SUBSrc0.getOperand(0);
-  SDValue XORSrc1 = SUBSrc0.getOperand(1);
-  SDValue SRASrc0 = SUBSrc1.getOperand(0);
-  SDValue SRASrc1 = SUBSrc1.getOperand(1);
-  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
-  EVT XType = SRASrc0.getValueType();
-  unsigned Size = XType.getSizeInBits() - 1;
-
-  if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
-      SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
-    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
-    CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
-    return true;
-  }
-
-  return false;
-}
-
 /// We've got special pseudo-instructions for these
 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   unsigned Opcode;
@@ -3685,12 +3643,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     if (tryInlineAsm(N))
       return;
     break;
-  case ISD::SUB:
-    // Select special operations if SUB node forms integer ABS pattern
-    if (tryABSOp(N))
-      return;
-    // Other cases are autogenerated.
-    break;
   case ISD::Constant: {
     unsigned Val = N->getAsZExtVal();
     // If we can't materialize the constant we need to use a literal pool
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4af2721562d7c..6e22ddaa0c753 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   if (!Subtarget->hasV8_1MMainlineOps())
     setOperationAction(ISD::UCMP, MVT::i32, Custom);
 
+  if (!Subtarget->isThumb1Only())
+    setOperationAction(ISD::ABS, MVT::i32, Custom);
+
   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 
@@ -5621,6 +5624,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// Generate CMP + CMOV for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32,
+                            DAG.getConstant(0, DL, MVT::i32), Op.getOperand(0));
+  // Generate CMP & CMOV.
+  SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
+                            DAG.getConstant(0, DL, MVT::i32));
+  return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
+                     DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Cond = Op.getOperand(1);
@@ -10703,6 +10719,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UCMP:
   case ISD::SCMP:
     return LowerCMP(Op, DAG);
+  case ISD::ABS:
+    return LowerABS(Op, DAG);
   }
 }
 
@@ -12288,89 +12306,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case ARM::Int_eh_sjlj_setup_dispatch:
     EmitSjLjDispatchBlock(MI, BB);
     return BB;
-
-  case ARM::ABS:
-  case ARM::t2ABS: {
-    // To insert an ABS instruction, we have to insert the
-    // diamond control-flow pattern.  The incoming instruction knows the
-    // source vreg to test against 0, the destination vreg to set,
-    // the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use.
-    // It transforms
-    //     V1 = ABS V0
-    // into
-    //     V2 = MOVS V0
-    //     BCC                      (branch to SinkBB if V0 >= 0)
-    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
-    //     SinkBB: V1 = PHI(V2, V3)
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator BBI = ++BB->getIterator();
-    MachineFunction *Fn = BB->getParent();
-    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
-    Fn->insert(BBI, RSBBB);
-    Fn->insert(BBI, SinkBB);
-
-    // Set the call frame size on entry to the new basic blocks.
-    unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
-    RSBBB->setCallFrameSize(CallFrameSize);
-    SinkBB->setCallFrameSize(CallFrameSize);
-
-    Register ABSSrcReg = MI.getOperand(1).getReg();
-    Register ABSDstReg = MI.getOperand(0).getReg();
-    bool ABSSrcKIll = MI.getOperand(1).isKill();
-    bool isThumb2 = Subtarget->isThumb2();
-    MachineRegisterInfo &MRI = Fn->getRegInfo();
-    // In Thumb mode S must not be specified if source register is the SP or
-    // PC and if destination register is the SP, so restrict register class
-    Register NewRsbDstReg = MRI.createVirtualRegister(
-        isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
-
-    // Transfer the remainder of BB and its successor edges to sinkMBB.
-    SinkBB->splice(SinkBB->begin(), BB,
-                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
-    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    BB->addSuccessor(RSBBB);
-    BB->addSuccessor(SinkBB);
-
-    // fall through to SinkMBB
-    RSBBB->addSuccessor(SinkBB);
-
-    // insert a cmp at the end of BB
-    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
-        .addReg(ABSSrcReg)
-        .addImm(0)
-        .add(predOps(ARMCC::AL));
-
-    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
-    BuildMI(BB, dl,
-      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
-      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
-
-    // insert rsbri in RSBBB
-    // Note: BCC and rsbri will be converted into predicated rsbmi
-    // by if-conversion pass
-    BuildMI(*RSBBB, RSBBB->begin(), dl,
-            TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
-        .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
-        .addImm(0)
-        .add(predOps(ARMCC::AL))
-        .add(condCodeOp());
-
-    // insert PHI in SinkBB,
-    // reuse ABSDstReg to not change uses of ABS instruction
-    BuildMI(*SinkBB, SinkBB->begin(), dl,
-      TII->get(ARM::PHI), ABSDstReg)
-      .addReg(NewRsbDstReg).addMBB(RSBBB)
-      .addReg(ABSSrcReg).addMBB(BB);
-
-    // remove ABS instruction
-    MI.eraseFromParent();
-
-    // return last added BB
-    return SinkBB;
-  }
   case ARM::COPY_STRUCT_BYVAL_I32:
     ++NumLoopByVals;
     return EmitStructByval(MI, BB);
@@ -14082,6 +14017,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
                      CSINC.getOperand(3));
 }
 
+static bool isNegatedInteger(SDValue Op) {
+  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
+}
+
+// Try to fold
+//
+// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
+//
+// The folding helps cmov to be matched with csneg without generating
+// redundant neg instruction.
+static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
+  if (!isNegatedInteger(SDValue(N, 0)))
+    return SDValue();
+
+  SDValue CMov = N->getOperand(1);
+  if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
+    return SDValue();
+
+  SDValue N0 = CMov.getOperand(0);
+  SDValue N1 = CMov.getOperand(1);
+
+  // If neither of them are negations, it's not worth the folding as it
+  // introduces two additional negations while reducing one negation.
+  if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = CMov.getValueType();
+
+  SDValue N0N = DAG.getNegative(N0, DL, VT);
+  SDValue N1N = DAG.getNegative(N1, DL, VT);
+  return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
+                     CMov.getOperand(3));
+}
+
 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
 ///
 static SDValue PerformSUBCombine(SDNode *N,
@@ -14098,6 +14068,9 @@ static SDValue PerformSUBCombine(SDNode *N,
   if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
     return R;
 
+  if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
+    return Val;
+
   if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
     return SDValue();
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index ccf6d509313b9..8e417ac3e1a7b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -908,6 +908,7 @@ class VectorType;
                    SelectionDAG &DAG) const;
     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
 
     Register getRegisterByName(const char* RegName, LLT VT,
                                const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index bdb16d7d39266..282ff534fc112 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5185,12 +5185,6 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
   let hasSideEffects = 1;
 }
 
-let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
-  // Pseudo instruction that combines movs + predicated rsbmi
-  // to implement integer ABS
-  def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-}
-
 let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
     def COPY_STRUCT_BYVAL_I32 : PseudoInst<
       (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c00d616670b5a..c229c8e4491df 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -4393,13 +4393,6 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                                            imm:$cp))]>,
                Requires<[IsThumb2]>;
 
-// Pseudo instruction that combines movs + predicated rsbmi
-// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
-def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
-                       NoItinerary, []>, Requires<[IsThumb2]>;
-}
-
 //===----------------------------------------------------------------------===//
 // Coprocessor load/store -- for disassembly only
 //
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index 3baac6b233c45..e20bbadbc8eee 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -137,7 +137,7 @@ def : InstRW<[WriteNoop, WriteNoop], (instregex
 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
 
 // Pseudos
-def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
+def : InstRW<[WriteNoop], (instregex
   "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
   "tLDRpci_pic", "(t2)?SUBS_PC_LR",
   "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td
index e85646915117c..c350180baa250 100644
--- a/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -235,7 +235,7 @@ def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
-      "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
+      "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
 
 // Parallel arithmetic
 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
diff --git a/llvm/lib/Target/ARM/ARMScheduleSwift.td b/llvm/lib/Target/ARM/ARMScheduleSwift.td
index 88682f5c0d2c4..8d53119f4747b 100644
--- a/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -1081,9 +1081,6 @@ let SchedModel = SwiftModel in {
   def : WriteRes<WriteVST2, []>;
   def : WriteRes<WriteVST3, []>;
   def : WriteRes<WriteVST4, []>;
-
-  // Not specified.
-  def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
   // Preload.
   def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
     let ReleaseAtCycles = [0];
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
index 254c191569f8b..42563e8e34a94 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
@@ -31,22 +31,22 @@ declare <64 x i8>  @llvm.abs.v64i8(<64 x i8>, i1)
 
 define i32 @abs(i32 %arg) {
 ; MVE-LABEL: 'abs'
-; MVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
 ; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll
index 88259ba758803..3cc3ec93e2404 100644
--- a/llvm/test/CodeGen/Thumb2/abs.ll
+++ b/llvm/test/CodeGen/Thumb2/abs.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECKT1
-; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2
-; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2
+; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V7
+; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V8
 
 declare i64 @llvm.abs.i64(i64, i1 immarg)
 
@@ -39,11 +39,18 @@ define i32 @neg_abs32(i32 %x) {
 ; CHECKT1-NEXT:    subs r0, r1, r0
 ; CHECKT1-NEXT:    bx lr
 ;
-; CHECKT2-LABEL: neg_abs32:
-; CHECKT2:       @ %bb.0:
-; CHECKT2-NEXT:    eor.w r1, r0, r0, asr #31
-; CHECKT2-NEXT:    rsb r0, r1, r0, asr #31
-; CHECKT2-NEXT:    bx lr
+; CHECKT2V7-LABEL: neg_abs32:
+; CHECKT2V7:       @ %bb.0:
+; CHECKT2V7-NEXT:    cmp r0, #0
+; CHECKT2V7-NEXT:    it pl
+; CHECKT2V7-NEXT:    rsbpl r0, r0, #0
+; CHECKT2V7-NEXT:    bx lr
+;
+; CHECKT2V8-LABEL: neg_abs32:
+; CHECKT2V8:       @ %bb.0:
+; CHECKT2V8-NEXT:    cmp r0, #0
+; CHECKT2V8-NEXT:    cneg r0, r0, pl
+; CHECKT2V8-NEXT:    bx lr
   %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
   %neg = sub nsw i32 0, %abs
   ret i32 %neg
@@ -145,12 +152,18 @@ define i32 @abs32(i32 %x) {
 ; CHECKT1-NEXT:    subs r0, r0, r1
 ; CHECKT1-NEXT:    bx lr
 ;
-; CHECKT2-LABEL: abs32:
-; CHECKT2:       @ %bb.0:
-; CHECKT2-NEXT:    cmp r0, #0
-; CHECKT2-NEXT:    it mi
-; CHECKT2-NEXT:    rsbmi r0, r0, #0
-; CHECKT2-NEXT:    bx lr
+; CHECKT2V7-LABEL: abs32:
+; CHECKT2V7:       @ %bb.0:
+; CHECKT2V7-NEXT:    cmp r0, #0
+; CHECKT2V7-NEXT:    it mi
+; CHECKT2V7-NEXT:    rsbmi r0, r0, #0
+; CHECKT2V7-NEXT:    bx lr
+;
+; CHECKT2V8-LABEL: abs32:
+; CHECKT2V8:       @ %bb.0:
+; CHECKT2V8-NEXT:    cmp r0, #0
+; CHECKT2V8-NEXT:    cneg r0, r0, mi
+; CHECKT2V8-NEXT:    bx lr
   %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
   ret i32 %abs
 }
@@ -164,12 +177,20 @@ define i16 @abs16(i16 %x) {
 ; CHECKT1-NEXT:    subs r0, r0, r1
 ; CHECKT1-NEXT:    bx lr
 ;
-; CHECKT2-LABEL: abs16:
-; CHECKT2:       @ %bb.0:
-; CHECKT2-NEXT:    sxth r1, r0
-; CHECKT2-NEXT:    eor.w r0, r0, r1, asr #15
-; CHECKT2-NEXT:    sub.w r0, r0, r1, asr #15
-; CHECKT2-NEXT:    bx lr
+; CHECKT2V7-LABEL: abs16:
+; CHECKT2V7:       @ %bb.0:
+; CHECKT2V7-NEXT:    sxth r0, r0
+; CHECKT2V7-NEXT:    cmp r0, #0
+; CHECKT2V7-NEXT:    it mi
+; CHECKT2V7-NEXT:    rsbmi r0, r0, #0
+; CHECKT2V7-NEXT:    bx lr
+;
+; CHECKT2V8-LABEL: abs16:
+; CHECKT2V8:       @ %bb.0:
+; CHECKT2V8-NEXT:    sxth r0, r0
+; CHECKT2V8-NEXT:    cmp r0, #0
+; CHECKT2V8-NEXT:    cneg r0, r0, mi
+; CHECKT2V8-NEXT:    bx lr
   %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
   ret i16 %abs
 }