[llvm] aff98e4 - [ARM] Stop gluing 1-bit shifts (#116547)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 06:46:52 PST 2024
Author: Sergei Barannikov
Date: 2024-11-19T17:46:48+03:00
New Revision: aff98e4be05a1060e489ce62a88ee0ff365e571a
URL: https://github.com/llvm/llvm-project/commit/aff98e4be05a1060e489ce62a88ee0ff365e571a
DIFF: https://github.com/llvm/llvm-project/commit/aff98e4be05a1060e489ce62a88ee0ff365e571a.diff
LOG: [ARM] Stop gluing 1-bit shifts (#116547)
1. When two (or more) nodes are glued, DAG scheduler will always
schedule them as one piece, i.e. it will not allow any instructions to
be scheduled between them. It does so because if nodes are glued this
usually means that there is an implicit register dependency between
them, and an intervening node could clobber this physical register. When
emitting such nodes into machine IR, they will also be stuck together,
e.g.:
```
%9:gpr = MOVsrl_glue killed %8, implicit-def $cpsr
%10:gpr = RRX %3, implicit $cpsr
```
2. If a node has Glue result, SelectionDAG will not try to CSE this
node. If it did, it would break the implicit physical register
dependency. In practice this means that if a node with Glue result has
multiple uses, it has to be duplicated before each use. This the reason
for `ARMTargetLowering::duplicateCmp` to exist.
When using normal data dependency, dependent nodes can freely be
scheduled around. If there is a physical register dependency between
nodes, the physical register will be copied to/from a virtual register,
allowing other nodes to intervene between them. The resulting machine IR
might look like this:
```
%9:gpr = LSRs1 killed %8, implicit-def $cpsr
%10:gpr = COPY $cpsr
%11:gpr = ORRrsi killed %9, %3, 242, 14 /* CC::al */, $noreg, $noreg
%12:gpr = BICri killed %11, -2147483648, 14 /* CC::al */, $noreg, $noreg
$cpsr = COPY %10
%13:gpr = RRX %3, implicit $cpsr
```
The two copies are likely to be eliminated by register coalescer, given
that there are no instructions between them that clobber this physical
register. If the copies are unwanted in the first place (they could be
expensive or impossible), DAG scheduler will try to avoid inserting them
wherever possible, and the resulting machine IR will look like this:
```
%9:gpr = LSRs1 killed %8, implicit-def $cpsr
%10:gpr = ORRrsi killed %9, %3, 242, 14 /* CC::al */, $noreg, $noreg
%11:gpr = BICri killed %10, -2147483648, 14 /* CC::al */, $noreg, $noreg
%12:gpr = RRX %3, implicit $cpsr
```
On ARM, arithmetic operations and LSLS already use the new data flow
approach. This patch extends it to include 1-bit shifts.
Pull Request: https://github.com/llvm/llvm-project/pull/116547
Added:
Modified:
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
llvm/lib/Target/ARM/ARMScheduleM7.td
llvm/lib/Target/ARM/ARMScheduleM85.td
llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 8e79a0a344067f..3fda15a4290178 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -2590,14 +2590,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::MOVsrl_glue:
- case ARM::MOVsra_glue: {
+ case ARM::LSRs1:
+ case ARM::ASRs1: {
// These are just fancy MOVs instructions.
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.add(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(
- (Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1))
+ (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1))
.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 554f7337a6a5a0..84b37ae6833aed 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+constexpr MVT FlagsVT = MVT::i32;
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -1730,14 +1733,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::SRL_GLUE)
- MAKE_CASE(ARMISD::SRA_GLUE)
+ MAKE_CASE(ARMISD::LSLS)
+ MAKE_CASE(ARMISD::LSRS1)
+ MAKE_CASE(ARMISD::ASRS1)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
MAKE_CASE(ARMISD::SUBC)
MAKE_CASE(ARMISD::SUBE)
- MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::VMOVRRD)
MAKE_CASE(ARMISD::VMOVDRR)
MAKE_CASE(ARMISD::VMOVhr)
@@ -6846,10 +6849,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
- // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
- // captures the result into a carry flag.
- unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
+ // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
+ // captures the shifted out bit into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0e086f3340ccb4..344a0ad91e5178 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -101,15 +101,15 @@ class VectorType;
BCC_i64,
- SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
- SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
- RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+ LSLS, // Flag-setting shift left.
+ LSRS1, // Flag-setting logical shift right by one bit.
+ ASRS1, // Flag-setting arithmetic shift right by one bit.
+ RRX, // Shift right one bit with carry in.
ADDC, // Add with carry
ADDE, // Add using carry
SUBC, // Sub with carry
SUBE, // Sub using carry
- LSLS, // Shift left producing carry
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 72146f2a717ade..db38b43279b866 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -14,6 +14,9 @@
// ARM specific DAG Nodes.
//
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+defvar FlagsVT = i32;
+
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
SDTCisVT<4, i32>]>;
+def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
+ SDTCisInt<0>, // result
+ SDTCisVT<1, FlagsVT>, // out flags
+ SDTCisSameAs<2, 0> // operand
+]>;
+
+def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, // result
+ SDTCisSameAs<1, 0>, // operand
+ SDTCisVT<1, FlagsVT> // in flags
+]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
-def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>;
+def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
@@ -3730,20 +3745,17 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
Requires<[IsARM, HasV6T2]>;
let Uses = [CPSR] in
-def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
- [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
- Requires<[IsARM]>, Sched<[WriteALU]>;
-
-// These aren't really mov instructions, but we have to define them this way
-// due to glue operands.
+def RRX : PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
+ UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
let Defs = [CPSR] in {
-def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
-def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
+ def LSRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMlsrs1 GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
+ def ASRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMasrs1 GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 4e9160bcfd5ec9..aa5c0a58897688 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "rrx", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2800,12 +2801,13 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
}
+// These
diff er from t2LSRri / t2ASRri in that they are flag-setting
+// and have a hardcoded shift amount = 1.
let isCodeGenOnly = 1, Defs = [CPSR] in {
-def t2MOVsrl_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2816,11 +2818,10 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
diff --git a/llvm/lib/Target/ARM/ARMScheduleM7.td b/llvm/lib/Target/ARM/ARMScheduleM7.td
index 25bc8401ca84ab..99d2e4a832220a 100644
--- a/llvm/lib/Target/ARM/ARMScheduleM7.td
+++ b/llvm/lib/Target/ARM/ARMScheduleM7.td
@@ -325,7 +325,7 @@ def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
(instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
"t2(SUB|CMP|CMNz|TEQ|TST)rs$",
- "t2MOVsr(a|l)")>;
+ "t2(A|L)SRs1$")>;
def : InstRW<[WriteALUsi, M7Read_ISS],
(instregex "t2MVNs")>;
@@ -335,7 +335,7 @@ def : InstRW<[WriteALUsi, M7Read_ISS],
// but the results prove to be better than trying to get them exact.
def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
-def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
+def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)r", "tROR")>;
// Instructions that use the shifter, but have normal timing.
diff --git a/llvm/lib/Target/ARM/ARMScheduleM85.td b/llvm/lib/Target/ARM/ARMScheduleM85.td
index cd375a16305ec8..e9938d857e6afc 100644
--- a/llvm/lib/Target/ARM/ARMScheduleM85.td
+++ b/llvm/lib/Target/ARM/ARMScheduleM85.td
@@ -436,7 +436,7 @@ def : InstRW<[M85WriteALUsi, M85ReadALUsi],
def : InstRW<[M85WriteShift2],
(instregex "t2RRX$")>;
def : InstRW<[WriteALU],
- (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
+ (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>;
// Instructions that use the shifter, but have normal timing
diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
index 8900d5f541e8aa..b85cb3a4f191c8 100644
--- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
@@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM5-NEXT: mla r0, r1, r12, r4
; ARM5-NEXT: bic r0, r0, #-2147483648
; ARM5-NEXT: lsrs r0, r0, #1
-; ARM5-NEXT: rrx r1, r3
+; ARM5-NEXT: rrx r2, r3
; ARM5-NEXT: orr r0, r0, r3, lsl #30
; ARM5-NEXT: ldr r3, .LCPI5_2
-; ARM5-NEXT: bic r2, r0, #-2147483648
+; ARM5-NEXT: bic r1, r0, #-2147483648
; ARM5-NEXT: mov r0, #0
-; ARM5-NEXT: subs r1, r1, r3
-; ARM5-NEXT: sbcs r1, r2, #1
+; ARM5-NEXT: subs r2, r2, r3
+; ARM5-NEXT: sbcs r1, r1, #1
; ARM5-NEXT: movlo r0, #1
; ARM5-NEXT: pop {r4, pc}
; ARM5-NEXT: .p2align 2
@@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM6-NEXT: mla r0, r1, r12, r0
; ARM6-NEXT: bic r0, r0, #-2147483648
; ARM6-NEXT: lsrs r0, r0, #1
-; ARM6-NEXT: rrx r1, r3
+; ARM6-NEXT: rrx r2, r3
; ARM6-NEXT: orr r0, r0, r3, lsl #30
; ARM6-NEXT: ldr r3, .LCPI5_2
-; ARM6-NEXT: bic r2, r0, #-2147483648
+; ARM6-NEXT: bic r1, r0, #-2147483648
; ARM6-NEXT: mov r0, #0
-; ARM6-NEXT: subs r1, r1, r3
-; ARM6-NEXT: sbcs r1, r2, #1
+; ARM6-NEXT: subs r2, r2, r3
+; ARM6-NEXT: sbcs r1, r1, #1
; ARM6-NEXT: movlo r0, #1
; ARM6-NEXT: pop {r11, pc}
; ARM6-NEXT: .p2align 2
@@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM7-NEXT: mla r0, r1, r12, r0
; ARM7-NEXT: bic r0, r0, #-2147483648
; ARM7-NEXT: lsrs r0, r0, #1
-; ARM7-NEXT: rrx r1, r3
+; ARM7-NEXT: rrx r2, r3
; ARM7-NEXT: orr r0, r0, r3, lsl #30
; ARM7-NEXT: movw r3, #24026
-; ARM7-NEXT: bic r2, r0, #-2147483648
+; ARM7-NEXT: bic r1, r0, #-2147483648
; ARM7-NEXT: movt r3, #48461
-; ARM7-NEXT: subs r1, r1, r3
+; ARM7-NEXT: subs r2, r2, r3
; ARM7-NEXT: mov r0, #0
-; ARM7-NEXT: sbcs r1, r2, #1
+; ARM7-NEXT: sbcs r1, r1, #1
; ARM7-NEXT: movwlo r0, #1
; ARM7-NEXT: pop {r11, pc}
;
@@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM8-NEXT: mla r0, r1, r12, r0
; ARM8-NEXT: bic r0, r0, #-2147483648
; ARM8-NEXT: lsrs r0, r0, #1
-; ARM8-NEXT: rrx r1, r3
+; ARM8-NEXT: rrx r2, r3
; ARM8-NEXT: orr r0, r0, r3, lsl #30
; ARM8-NEXT: movw r3, #24026
-; ARM8-NEXT: bic r2, r0, #-2147483648
+; ARM8-NEXT: bic r1, r0, #-2147483648
; ARM8-NEXT: movt r3, #48461
-; ARM8-NEXT: subs r1, r1, r3
+; ARM8-NEXT: subs r2, r2, r3
; ARM8-NEXT: mov r0, #0
-; ARM8-NEXT: sbcs r1, r2, #1
+; ARM8-NEXT: sbcs r1, r1, #1
; ARM8-NEXT: movwlo r0, #1
; ARM8-NEXT: pop {r11, pc}
;
@@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON7-NEXT: mla r0, r1, r12, r0
; NEON7-NEXT: bic r0, r0, #-2147483648
; NEON7-NEXT: lsrs r0, r0, #1
-; NEON7-NEXT: rrx r1, r3
+; NEON7-NEXT: rrx r2, r3
; NEON7-NEXT: orr r0, r0, r3, lsl #30
; NEON7-NEXT: movw r3, #24026
-; NEON7-NEXT: bic r2, r0, #-2147483648
+; NEON7-NEXT: bic r1, r0, #-2147483648
; NEON7-NEXT: movt r3, #48461
-; NEON7-NEXT: subs r1, r1, r3
+; NEON7-NEXT: subs r2, r2, r3
; NEON7-NEXT: mov r0, #0
-; NEON7-NEXT: sbcs r1, r2, #1
+; NEON7-NEXT: sbcs r1, r1, #1
; NEON7-NEXT: movwlo r0, #1
; NEON7-NEXT: pop {r11, pc}
;
@@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON8-NEXT: mla r0, r1, r12, r0
; NEON8-NEXT: bic r0, r0, #-2147483648
; NEON8-NEXT: lsrs r0, r0, #1
-; NEON8-NEXT: rrx r1, r3
+; NEON8-NEXT: rrx r2, r3
; NEON8-NEXT: orr r0, r0, r3, lsl #30
; NEON8-NEXT: movw r3, #24026
-; NEON8-NEXT: bic r2, r0, #-2147483648
+; NEON8-NEXT: bic r1, r0, #-2147483648
; NEON8-NEXT: movt r3, #48461
-; NEON8-NEXT: subs r1, r1, r3
+; NEON8-NEXT: subs r2, r2, r3
; NEON8-NEXT: mov r0, #0
-; NEON8-NEXT: sbcs r1, r2, #1
+; NEON8-NEXT: sbcs r1, r1, #1
; NEON8-NEXT: movwlo r0, #1
; NEON8-NEXT: pop {r11, pc}
%urem = urem i63 %X, 1234567890
More information about the llvm-commits
mailing list