[llvm] Use addc nodes when lowering overflow (PR #162583)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 12:40:51 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/162583
>From 4694bc6e6b8ab4f563155b55adc457e9d16e997a Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 8 Oct 2025 21:19:04 -0400
Subject: [PATCH] Use addc nodes when lowering overflow
Cannot for thumb1 at this moment because of scheduler issues.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 93 +++++-----
.../CodeGen/ARM/arm-shrink-wrapping-linux.ll | 12 +-
llvm/test/CodeGen/ARM/intrinsics-overflow.ll | 150 ++++++++++-----
llvm/test/CodeGen/ARM/sadd_sat.ll | 173 ++++++++++++------
llvm/test/CodeGen/ARM/sadd_sat_plus.ll | 7 +-
llvm/test/CodeGen/ARM/ssub_sat.ll | 20 +-
llvm/test/CodeGen/ARM/ssub_sat_plus.ll | 8 +-
llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll | 70 +++----
8 files changed, 321 insertions(+), 212 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2a40fb9b476f8..2ed2915567d44 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4832,7 +4832,7 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
}
// This function returns three things: the arithmetic computation itself
-// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
+// (Value), a comparison (Overflow), and a condition code (ARMcc). The
// comparison and the condition code define the case in which the arithmetic
// computation *does not* overflow.
std::pair<SDValue, SDValue>
@@ -4840,42 +4840,30 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
SDValue &ARMcc) const {
assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
- SDValue Value, OverflowCmp;
+ SDValue Value, Overflow;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDLoc dl(Op);
-
- // FIXME: We are currently always generating CMPs because we don't support
- // generating CMN through the backend. This is not as good as the natural
- // CMP case because it causes a register dependency and cannot be folded
- // later.
+ unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
+ Opc = ARMISD::ADDC;
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
- Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
break;
case ISD::UADDO:
- ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
- // We use ADDC here to correspond to its use in LowerUnsignedALUO.
- // We do not use it in the USUBO case as Value may not be used.
- Value = DAG.getNode(ARMISD::ADDC, dl,
- DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
- .getValue(0);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+ Opc = ARMISD::ADDC;
+ ARMcc = DAG.getConstant(ARMCC::LO, dl, MVT::i32);
break;
case ISD::SSUBO:
+ Opc = ARMISD::SUBC;
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
- Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::USUBO:
+ Opc = ARMISD::SUBC;
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
- Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::UMULO:
// We generate a UMUL_LOHI and then check if the high word is 0.
@@ -4883,8 +4871,8 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
- DAG.getConstant(0, dl, MVT::i32));
+ Overflow = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
case ISD::SMULO:
@@ -4894,15 +4882,34 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
- DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Value.getValue(0),
- DAG.getConstant(31, dl, MVT::i32)));
+ Overflow = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ DAG.getNode(ISD::SRA, dl, Op.getValueType(),
+ Value.getValue(0),
+ DAG.getConstant(31, dl, MVT::i32)));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
} // switch (...)
+ if (Opc) {
+ if (Subtarget->isThumb1Only() &&
+ (Op.getOpcode() == ISD::SADDO || Op.getOpcode() == ISD::SSUBO)) {
+ // FIXME: Thumb1 has to split between the cmp and the add/sub.
+ // Remove when the peephole optimizer handles this or we no longer need to
+ // split.
+ if (Opc == ARMISD::ADDC) {
+ Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+ Overflow = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
+ } else {
+ Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
+ Overflow = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
+ }
+ } else {
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), FlagsVT);
+ Value = DAG.getNode(Opc, dl, VTs, LHS, RHS);
+ Overflow = Value.getValue(1);
+ }
+ }
- return std::make_pair(Value, OverflowCmp);
+ return std::make_pair(Value, Overflow);
}
SDValue
@@ -4911,20 +4918,18 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
if (!isTypeLegal(Op.getValueType()))
return SDValue();
- SDValue Value, OverflowCmp;
- SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
SDLoc dl(Op);
+ SDValue Value, Overflow;
+ SDValue ARMcc;
+ std::tie(Value, Overflow) = getARMXALUOOp(Op, DAG, ARMcc);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
- EVT VT = Op.getValueType();
- SDValue Overflow =
- DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, TVal, FVal, ARMcc, Overflow);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
+ return DAG.getMergeValues({Value, Overflow}, dl);
}
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
@@ -5055,12 +5060,12 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (!isTypeLegal(Cond->getValueType(0)))
return SDValue();
- SDValue Value, OverflowCmp;
+ SDValue Value, Overflow;
SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ std::tie(Value, Overflow) = getARMXALUOOp(Cond, DAG, ARMcc);
EVT VT = Op.getValueType();
- return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
+ return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, Overflow, DAG);
}
// Convert:
@@ -5657,9 +5662,9 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// The actual operation with overflow check.
- SDValue Value, OverflowCmp;
+ SDValue Value, Overflow;
SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ std::tie(Value, Overflow) = getARMXALUOOp(Cond, DAG, ARMcc);
// Reverse the condition code.
ARMCC::CondCodes CondCode =
@@ -5668,7 +5673,7 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
- OverflowCmp);
+ Overflow);
}
return SDValue();
@@ -5707,9 +5712,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// The actual operation with overflow check.
- SDValue Value, OverflowCmp;
+ SDValue Value, Overflow;
SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
+ std::tie(Value, Overflow) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
// Reverse the condition code.
@@ -5720,7 +5725,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
- OverflowCmp);
+ Overflow);
}
if (LHS.getValueType() == MVT::i32) {
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
index b92f03d43bb4c..b070d17be227f 100644
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -34,14 +34,12 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
; ENABLE-NEXT: .LBB0_4: @ %while.body
; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: ldrb r3, [r0]
+; ENABLE-NEXT: subs r1, r1, #1
; ENABLE-NEXT: ldrb r3, [r12, r3]
; ENABLE-NEXT: add r0, r0, r3
-; ENABLE-NEXT: sub r3, r1, #1
-; ENABLE-NEXT: cmp r3, r1
-; ENABLE-NEXT: bhs .LBB0_6
+; ENABLE-NEXT: blo .LBB0_6
; ENABLE-NEXT: @ %bb.5: @ %while.body
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
-; ENABLE-NEXT: mov r1, r3
; ENABLE-NEXT: cmp r0, r2
; ENABLE-NEXT: blo .LBB0_4
; ENABLE-NEXT: .LBB0_6: @ %if.end29
@@ -124,14 +122,12 @@ define fastcc ptr @wrongUseOfPostDominate(ptr readonly %s, i32 %off, ptr readnon
; DISABLE-NEXT: .LBB0_4: @ %while.body
; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: ldrb r3, [r0]
+; DISABLE-NEXT: subs r1, r1, #1
; DISABLE-NEXT: ldrb r3, [r12, r3]
; DISABLE-NEXT: add r0, r0, r3
-; DISABLE-NEXT: sub r3, r1, #1
-; DISABLE-NEXT: cmp r3, r1
-; DISABLE-NEXT: bhs .LBB0_6
+; DISABLE-NEXT: blo .LBB0_6
; DISABLE-NEXT: @ %bb.5: @ %while.body
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
-; DISABLE-NEXT: mov r1, r3
; DISABLE-NEXT: cmp r0, r2
; DISABLE-NEXT: blo .LBB0_4
; DISABLE-NEXT: .LBB0_6: @ %if.end29
diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
index 8bd78dd0f6ab5..796e9304a6486 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
@@ -1,104 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6
; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7
define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
+; ARM-LABEL: uadd_overflow:
+; ARM: @ %bb.0:
+; ARM-NEXT: adds r0, r0, r1
+; ARM-NEXT: mov r2, #0
+; ARM-NEXT: adc r0, r2, #0
+; ARM-NEXT: mov pc, lr
+;
+; THUMBV6-LABEL: uadd_overflow:
+; THUMBV6: @ %bb.0:
+; THUMBV6-NEXT: movs r2, #0
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: adcs r2, r2
+; THUMBV6-NEXT: mov r0, r2
+; THUMBV6-NEXT: bx lr
+;
+; THUMBV7-LABEL: uadd_overflow:
+; THUMBV7: @ %bb.0:
+; THUMBV7-NEXT: adds r0, r0, r1
+; THUMBV7-NEXT: mov.w r2, #0
+; THUMBV7-NEXT: adc r0, r2, #0
+; THUMBV7-NEXT: bx lr
%sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %sadd, 1
%2 = zext i1 %1 to i32
ret i32 %2
- ; CHECK-LABEL: uadd_overflow:
- ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; ARM: mov r[[R2:[0-9]+]], #0
- ; ARM: adc r[[R0]], r[[R2]], #0
- ; THUMBV6: movs r[[R2:[0-9]+]], #0
- ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; THUMBV6: adcs r[[R2]], r[[R2]]
- ; THUMBV6: mov r[[R0]], r[[R2]]
- ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
- ; THUMBV7: adc r[[R0]], r[[R2]], #0
}
define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
+; ARM-LABEL: sadd_overflow:
+; ARM: @ %bb.0:
+; ARM-NEXT: mov r2, #1
+; ARM-NEXT: adds r0, r0, r1
+; ARM-NEXT: movvc r2, #0
+; ARM-NEXT: mov r0, r2
+; ARM-NEXT: mov pc, lr
+;
+; THUMBV6-LABEL: sadd_overflow:
+; THUMBV6: @ %bb.0:
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: bvc .LBB1_2
+; THUMBV6-NEXT: @ %bb.1:
+; THUMBV6-NEXT: movs r0, #1
+; THUMBV6-NEXT: bx lr
+; THUMBV6-NEXT: .LBB1_2:
+; THUMBV6-NEXT: movs r0, #0
+; THUMBV6-NEXT: bx lr
+;
+; THUMBV7-LABEL: sadd_overflow:
+; THUMBV7: @ %bb.0:
+; THUMBV7-NEXT: movs r2, #1
+; THUMBV7-NEXT: adds r0, r0, r1
+; THUMBV7-NEXT: it vc
+; THUMBV7-NEXT: movvc r2, #0
+; THUMBV7-NEXT: mov r0, r2
+; THUMBV7-NEXT: bx lr
%sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %sadd, 1
%2 = zext i1 %1 to i32
ret i32 %2
- ; CHECK-LABEL: sadd_overflow:
- ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
- ; ARM: mov r[[R0]], #1
- ; ARM: movvc r[[R0]], #0
- ; ARM: mov pc, lr
- ; THUMBV6: adds r0, r0, r1
- ; THUMBV6: bvc .LBB1_2
- ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; THUMBV7: mov.w r[[R0:[0-9]+]], #1
- ; THUMBV7: it vc
- ; THUMBV7: movvc r[[R0]], #0
}
define i32 @usub_overflow(i32 %a, i32 %b) #0 {
+; ARM-LABEL: usub_overflow:
+; ARM: @ %bb.0:
+; ARM-NEXT: subs r0, r0, r1
+; ARM-NEXT: mov r2, #0
+; ARM-NEXT: adc r0, r2, #0
+; ARM-NEXT: eor r0, r0, #1
+; ARM-NEXT: mov pc, lr
+;
+; THUMBV6-LABEL: usub_overflow:
+; THUMBV6: @ %bb.0:
+; THUMBV6-NEXT: movs r2, #0
+; THUMBV6-NEXT: subs r0, r0, r1
+; THUMBV6-NEXT: adcs r2, r2
+; THUMBV6-NEXT: movs r0, #1
+; THUMBV6-NEXT: eors r0, r2
+; THUMBV6-NEXT: bx lr
+;
+; THUMBV7-LABEL: usub_overflow:
+; THUMBV7: @ %bb.0:
+; THUMBV7-NEXT: subs r0, r0, r1
+; THUMBV7-NEXT: mov.w r2, #0
+; THUMBV7-NEXT: adc r0, r2, #0
+; THUMBV7-NEXT: eor r0, r0, #1
+; THUMBV7-NEXT: bx lr
%sadd = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %sadd, 1
%2 = zext i1 %1 to i32
ret i32 %2
- ; CHECK-LABEL: usub_overflow:
- ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; ARM: mov r[[R2:[0-9]+]], #0
- ; ARM: adc r[[R0]], r[[R2]], #0
- ; ARM: eor r[[R0]], r[[R0]], #1
- ; THUMBV6: movs r[[R2:[0-9]+]], #0
- ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; THUMBV6: adcs r[[R2]], r[[R2]]
- ; THUMBV6: movs r[[R0]], #1
- ; THUMBV6: eors r[[R0]], r[[R2]]
- ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
- ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
- ; THUMBV7: adc r[[R0]], r[[R2]], #0
- ; THUMBV7: eor r[[R0]], r[[R0]], #1
; We should know that the overflow is just 1 bit,
; no need to clear any other bit
- ; CHECK-NOT: and
}
define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
+; ARM-LABEL: ssub_overflow:
+; ARM: @ %bb.0:
+; ARM-NEXT: mov r2, #1
+; ARM-NEXT: subs r0, r0, r1
+; ARM-NEXT: movvc r2, #0
+; ARM-NEXT: mov r0, r2
+; ARM-NEXT: mov pc, lr
+;
+; THUMBV6-LABEL: ssub_overflow:
+; THUMBV6: @ %bb.0:
+; THUMBV6-NEXT: cmp r0, r1
+; THUMBV6-NEXT: bvc .LBB3_2
+; THUMBV6-NEXT: @ %bb.1:
+; THUMBV6-NEXT: movs r0, #1
+; THUMBV6-NEXT: bx lr
+; THUMBV6-NEXT: .LBB3_2:
+; THUMBV6-NEXT: movs r0, #0
+; THUMBV6-NEXT: bx lr
+;
+; THUMBV7-LABEL: ssub_overflow:
+; THUMBV7: @ %bb.0:
+; THUMBV7-NEXT: movs r2, #1
+; THUMBV7-NEXT: subs r0, r0, r1
+; THUMBV7-NEXT: it vc
+; THUMBV7-NEXT: movvc r2, #0
+; THUMBV7-NEXT: mov r0, r2
+; THUMBV7-NEXT: bx lr
%sadd = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %sadd, 1
%2 = zext i1 %1 to i32
ret i32 %2
- ; CHECK-LABEL: ssub_overflow:
- ; ARM: mov r[[R2]], #1
- ; ARM: cmp r[[R0]], r[[R1]]
- ; ARM: movvc r[[R2]], #0
- ; THUMBV6: cmp r0, r1
- ; THUMBV6: bvc .LBB3_2
- ; THUMBV7: movs r[[R2:[0-9]+]], #1
- ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]]
- ; THUMBV7: it vc
- ; THUMBV7: movvc r[[R2]], #0
- ; THUMBV7: mov r[[R0]], r[[R2]]
}
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #2
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #3
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #4
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index b8f7a2daaeaba..5b993034e6d54 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -31,9 +31,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: func:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
-; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
+; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
-; CHECK-T2NODSP-NEXT: eorvs.w r0, r1, r0, asr #31
+; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func:
@@ -44,8 +44,8 @@ define i32 @func(i32 %x, i32 %y) nounwind {
; CHECK-ARMNODPS-LABEL: func:
; CHECK-ARMNODPS: @ %bb.0:
; CHECK-ARMNODPS-NEXT: adds r0, r0, r1
-; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648
-; CHECK-ARMNODPS-NEXT: eorvs r0, r1, r0, asr #31
+; CHECK-ARMNODPS-NEXT: mov r2, #-2147483648
+; CHECK-ARMNODPS-NEXT: eorvs r0, r2, r0, asr #31
; CHECK-ARMNODPS-NEXT: bx lr
;
; CHECK-ARMBASEDSP-LABEL: func:
@@ -149,28 +149,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
}
define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
-; CHECK-T1-LABEL: func16:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: ldr r1, .LCPI2_0
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: blt .LBB2_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB2_2:
-; CHECK-T1-NEXT: ldr r1, .LCPI2_1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB2_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB2_4:
-; CHECK-T1-NEXT: bx lr
-; CHECK-T1-NEXT: .p2align 2
-; CHECK-T1-NEXT: @ %bb.5:
-; CHECK-T1-NEXT: .LCPI2_0:
-; CHECK-T1-NEXT: .long 32767 @ 0x7fff
-; CHECK-T1-NEXT: .LCPI2_1:
-; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
+; CHECK-T16-LABEL: func16:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: ldr r1, .LCPI2_0
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: blt .LBB2_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB2_2:
+; CHECK-T16-NEXT: ldr r1, .LCPI2_1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB2_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB2_4:
+; CHECK-T16-NEXT: bx lr
+; CHECK-T16-NEXT: .p2align 2
+; CHECK-T16-NEXT: @ %bb.5:
+; CHECK-T16-NEXT: .LCPI2_0:
+; CHECK-T16-NEXT: .long 32767 @ 0x7fff
+; CHECK-T16-NEXT: .LCPI2_1:
+; CHECK-T16-NEXT: .long 4294934528 @ 0xffff8000
;
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
@@ -208,6 +208,29 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func16:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: ldr r1, .LCPI2_0
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: blt .LBB2_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB2_2:
+; CHECK-T15TE-NEXT: ldr r1, .LCPI2_1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB2_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB2_4:
+; CHECK-T15TE-NEXT: bx lr
+; CHECK-T15TE-NEXT: .p2align 2
+; CHECK-T15TE-NEXT: @ %bb.5:
+; CHECK-T15TE-NEXT: .LCPI2_0:
+; CHECK-T15TE-NEXT: .long 32767 @ 0x7fff
+; CHECK-T15TE-NEXT: .LCPI2_1:
+; CHECK-T15TE-NEXT: .long 4294934528 @ 0xffff8000
+;
; CHECK-ARMDSP-LABEL: func16:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1
@@ -218,22 +241,22 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
}
define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
-; CHECK-T1-LABEL: func8:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: movs r1, #127
-; CHECK-T1-NEXT: cmp r0, #127
-; CHECK-T1-NEXT: blt .LBB3_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB3_2:
-; CHECK-T1-NEXT: mvns r1, r1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB3_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB3_4:
-; CHECK-T1-NEXT: bx lr
+; CHECK-T16-LABEL: func8:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: movs r1, #127
+; CHECK-T16-NEXT: cmp r0, #127
+; CHECK-T16-NEXT: blt .LBB3_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB3_2:
+; CHECK-T16-NEXT: mvns r1, r1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB3_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB3_4:
+; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
@@ -264,6 +287,23 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func8:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: movs r1, #127
+; CHECK-T15TE-NEXT: cmp r0, #127
+; CHECK-T15TE-NEXT: blt .LBB3_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB3_2:
+; CHECK-T15TE-NEXT: mvns r1, r1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB3_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB3_4:
+; CHECK-T15TE-NEXT: bx lr
+;
; CHECK-ARMDSP-LABEL: func8:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1
@@ -274,22 +314,22 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
}
define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
-; CHECK-T1-LABEL: func3:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: movs r1, #7
-; CHECK-T1-NEXT: cmp r0, #7
-; CHECK-T1-NEXT: blt .LBB4_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB4_2:
-; CHECK-T1-NEXT: mvns r1, r1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB4_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB4_4:
-; CHECK-T1-NEXT: bx lr
+; CHECK-T16-LABEL: func3:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: movs r1, #7
+; CHECK-T16-NEXT: cmp r0, #7
+; CHECK-T16-NEXT: blt .LBB4_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB4_2:
+; CHECK-T16-NEXT: mvns r1, r1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB4_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB4_4:
+; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
@@ -322,6 +362,23 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func3:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: movs r1, #7
+; CHECK-T15TE-NEXT: cmp r0, #7
+; CHECK-T15TE-NEXT: blt .LBB4_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB4_2:
+; CHECK-T15TE-NEXT: mvns r1, r1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB4_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB4_4:
+; CHECK-T15TE-NEXT: bx lr
+;
; CHECK-ARMDSP-LABEL: func3:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: lsl r0, r0, #28
diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
index 0ddb64fc3f2d1..26fa25dafc132 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -26,12 +26,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; CHECK-T2NODSP-LABEL: func32:
; CHECK-T2NODSP: @ %bb.0:
-; CHECK-T2NODSP-NEXT: mla r1, r1, r2, r0
+; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
-; CHECK-T2NODSP-NEXT: cmp r1, r0
+; CHECK-T2NODSP-NEXT: adds r0, r0, r1
; CHECK-T2NODSP-NEXT: it vs
-; CHECK-T2NODSP-NEXT: eorvs.w r1, r2, r1, asr #31
-; CHECK-T2NODSP-NEXT: mov r0, r1
+; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func32:
diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll
index 0978bfd1f0140..e04d61f782293 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat.ll
@@ -29,9 +29,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
; CHECK-T2NODSP-LABEL: func:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
+; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
; CHECK-T2NODSP-NEXT: it vs
-; CHECK-T2NODSP-NEXT: eorvs.w r0, r1, r0, asr #31
+; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func:
@@ -42,8 +42,8 @@ define i32 @func(i32 %x, i32 %y) nounwind {
; CHECK-ARMNODPS-LABEL: func:
; CHECK-ARMNODPS: @ %bb.0:
; CHECK-ARMNODPS-NEXT: subs r0, r0, r1
-; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648
-; CHECK-ARMNODPS-NEXT: eorvs r0, r1, r0, asr #31
+; CHECK-ARMNODPS-NEXT: mov r2, #-2147483648
+; CHECK-ARMNODPS-NEXT: eorvs r0, r2, r0, asr #31
; CHECK-ARMNODPS-NEXT: bx lr
;
; CHECK-ARMBASEDSP-LABEL: func:
@@ -347,10 +347,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: .save {r7, lr}
; CHECK-T2NODSP-NEXT: push {r7, lr}
-; CHECK-T2NODSP-NEXT: ldr.w r12, [sp, #8]
-; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #12]
-; CHECK-T2NODSP-NEXT: subs.w r0, r0, r12
+; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #8]
; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648
+; CHECK-T2NODSP-NEXT: subs.w r0, r0, lr
+; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #12]
; CHECK-T2NODSP-NEXT: it vs
; CHECK-T2NODSP-NEXT: eorvs.w r0, r12, r0, asr #31
; CHECK-T2NODSP-NEXT: subs.w r1, r1, lr
@@ -382,10 +382,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-ARMNODPS: @ %bb.0:
; CHECK-ARMNODPS-NEXT: .save {r11, lr}
; CHECK-ARMNODPS-NEXT: push {r11, lr}
-; CHECK-ARMNODPS-NEXT: ldr r12, [sp, #8]
-; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #12]
-; CHECK-ARMNODPS-NEXT: subs r0, r0, r12
+; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #8]
; CHECK-ARMNODPS-NEXT: mov r12, #-2147483648
+; CHECK-ARMNODPS-NEXT: subs r0, r0, lr
+; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #12]
; CHECK-ARMNODPS-NEXT: eorvs r0, r12, r0, asr #31
; CHECK-ARMNODPS-NEXT: subs r1, r1, lr
; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #16]
diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
index adf6cafc6ccb8..2ea85f2697d97 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -26,13 +26,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
;
; CHECK-T2NODSP-LABEL: func32:
; CHECK-T2NODSP: @ %bb.0:
-; CHECK-T2NODSP-NEXT: mls r3, r1, r2, r0
-; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
-; CHECK-T2NODSP-NEXT: cmp r0, r1
+; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT: subs r0, r0, r1
; CHECK-T2NODSP-NEXT: it vs
-; CHECK-T2NODSP-NEXT: eorvs.w r3, r12, r3, asr #31
-; CHECK-T2NODSP-NEXT: mov r0, r3
+; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func32:
diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index 43ed5eefbf4c7..963fec79aff81 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -10,24 +10,22 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
; CHECK-NEXT: cmp r2, #16
; CHECK-NEXT: blo .LBB0_5
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: movs r6, #2
-; CHECK-NEXT: lsrs r7, r2, #3
-; CHECK-NEXT: rsb r6, r6, r2, lsr #3
-; CHECK-NEXT: cmp r7, #2
-; CHECK-NEXT: mov.w r5, #0
-; CHECK-NEXT: csel r7, r6, r5, hs
-; CHECK-NEXT: add.w lr, r7, #1
-; CHECK-NEXT: mov r4, r5
+; CHECK-NEXT: movs r7, #2
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: rsbs r7, r7, r2, lsr #3
; CHECK-NEXT: vldrh.u16 q0, [r0], #32
-; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: mov r8, r5
+; CHECK-NEXT: csel r7, r7, r5, hs
+; CHECK-NEXT: add.w lr, r7, #1
+; CHECK-NEXT: mov r6, r5
; CHECK-NEXT: vldrh.u16 q1, [r1], #32
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
+; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q0, q1
; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
+; CHECK-NEXT: mov r8, r5
; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
; CHECK-NEXT: vldrh.u16 q0, [r1], #32
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q2, q3
; CHECK-NEXT: sub.w lr, lr, #1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vldrh.u16 q1, [r0], #32
@@ -37,30 +35,30 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q1, q0
; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
; CHECK-NEXT: vldrh.u16 q1, [r0], #32
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q2, q3
; CHECK-NEXT: vldrh.u16 q0, [r1], #32
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
-; CHECK-NEXT: movs r6, #14
-; CHECK-NEXT: and.w r2, r6, r2, lsl #1
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: and.w r2, r4, r2, lsl #1
; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q1, q0
; CHECK-NEXT: vldrh.u16 q0, [r1, #-16]
; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q0
; CHECK-NEXT: vctp.16 r2
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q0
+; CHECK-NEXT: vmlsldava.s16 r6, r7, q2, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrht.u16 q1, [r0]
; CHECK-NEXT: cmp r2, #9
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrht.u16 q0, [r1]
-; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q0
+; CHECK-NEXT: vmlsldavat.s16 r6, r7, q1, q0
; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q1, q0
; CHECK-NEXT: blo .LBB0_10
; CHECK-NEXT: @ %bb.4: @ %do.body.1
@@ -69,40 +67,42 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
; CHECK-NEXT: vpstttt
; CHECK-NEXT: vldrht.u16 q0, [r0, #16]
; CHECK-NEXT: vldrht.u16 q1, [r1, #16]
-; CHECK-NEXT: vmlsldavat.s16 r4, r7, q0, q1
+; CHECK-NEXT: vmlsldavat.s16 r6, r7, q0, q1
; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q0, q1
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB0_5: @ %if.else
-; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: cbz r2, .LBB0_9
; CHECK-NEXT: @ %bb.6: @ %while.body14.preheader
-; CHECK-NEXT: lsls r6, r2, #1
-; CHECK-NEXT: mov r5, r4
-; CHECK-NEXT: mov r7, r4
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: dlstp.16 lr, r6
+; CHECK-NEXT: movs r7, #8
+; CHECK-NEXT: rsbs r7, r7, r2, lsl #1
+; CHECK-NEXT: lsl.w r2, r2, #1
+; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: mov r5, r6
+; CHECK-NEXT: mov r7, r6
+; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB0_7: @ %while.body14
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r0], #16
; CHECK-NEXT: vldrh.u16 q1, [r1], #16
-; CHECK-NEXT: vmlsldava.s16 r2, r7, q0, q1
-; CHECK-NEXT: vmlaldavax.s16 r4, r5, q0, q1
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
+; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q1
; CHECK-NEXT: letp lr, .LBB0_7
; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177
-; CHECK-NEXT: mov r8, r4
-; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: mov r6, r4
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB0_9:
-; CHECK-NEXT: mov r7, r4
+; CHECK-NEXT: mov r7, r6
; CHECK-NEXT: mov.w r8, #0
-; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: mov r5, r6
; CHECK-NEXT: .LBB0_10: @ %if.end
-; CHECK-NEXT: asrl r4, r7, #6
+; CHECK-NEXT: asrl r6, r7, #6
; CHECK-NEXT: asrl r8, r5, #6
-; CHECK-NEXT: str r4, [r3]
+; CHECK-NEXT: str r6, [r3]
; CHECK-NEXT: str.w r8, [r12]
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
More information about the llvm-commits
mailing list