[llvm] [ARM] Stop gluing 1-bit shifts (PR #116547)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 05:53:21 PST 2024
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/116547
>From 637ab085014f776cb97cc53ef6d34a255c0a201d Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 17 Nov 2024 19:20:14 +0300
Subject: [PATCH 1/3] [ARM] Stop gluing 1-bit shifts
Use normal data flow instead.
There are several more nodes that are still glued, I'll try to change
that in subsequent patches.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 +-
llvm/lib/Target/ARM/ARMInstrInfo.td | 37 +++++++++-----
llvm/lib/Target/ARM/ARMInstrThumb2.td | 25 +++++-----
.../CodeGen/ARM/urem-seteq-illegal-types.ll | 48 +++++++++----------
4 files changed, 67 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7fce91f97f3618..aed91c7bb8a2a9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+constexpr MVT FlagsVT = MVT::i32;
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -6850,7 +6853,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
// First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index d24d4af36f0d86..9fa3c9a6c25fd5 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -14,6 +14,9 @@
// ARM specific DAG Nodes.
//
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+defvar FlagsVT = i32;
+
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
SDTCisVT<4, i32>]>;
+def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
+ SDTCisInt<0>, // result
+ SDTCisVT<1, FlagsVT>, // out flags
+ SDTCisSameAs<2, 0> // operand
+]>;
+
+def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, // result
+ SDTCisSameAs<1, 0>, // operand
+ SDTCisVT<1, FlagsVT> // in flags
+]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
-def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOpWithFlagsOut>;
+def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOpWithFlagsOut>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
@@ -3731,19 +3746,19 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
let Uses = [CPSR] in
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
- [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
- Requires<[IsARM]>, Sched<[WriteALU]>;
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
+ UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
// These aren't really mov instructions, but we have to define them this way
// due to glue operands.
let Defs = [CPSR] in {
-def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
-def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
+ def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMsrl_glue GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
+ def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMsra_glue GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index cb20aacb539ad9..260d48491bd24a 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "rrx", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2801,11 +2802,11 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
let isCodeGenOnly = 1, Defs = [CPSR] in {
-def t2MOVsrl_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2MOVsrl_glue
+ : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMsrl_glue rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2816,11 +2817,11 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2MOVsra_glue
+ : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMsra_glue rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
index 8900d5f541e8aa..b85cb3a4f191c8 100644
--- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
@@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM5-NEXT: mla r0, r1, r12, r4
; ARM5-NEXT: bic r0, r0, #-2147483648
; ARM5-NEXT: lsrs r0, r0, #1
-; ARM5-NEXT: rrx r1, r3
+; ARM5-NEXT: rrx r2, r3
; ARM5-NEXT: orr r0, r0, r3, lsl #30
; ARM5-NEXT: ldr r3, .LCPI5_2
-; ARM5-NEXT: bic r2, r0, #-2147483648
+; ARM5-NEXT: bic r1, r0, #-2147483648
; ARM5-NEXT: mov r0, #0
-; ARM5-NEXT: subs r1, r1, r3
-; ARM5-NEXT: sbcs r1, r2, #1
+; ARM5-NEXT: subs r2, r2, r3
+; ARM5-NEXT: sbcs r1, r1, #1
; ARM5-NEXT: movlo r0, #1
; ARM5-NEXT: pop {r4, pc}
; ARM5-NEXT: .p2align 2
@@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM6-NEXT: mla r0, r1, r12, r0
; ARM6-NEXT: bic r0, r0, #-2147483648
; ARM6-NEXT: lsrs r0, r0, #1
-; ARM6-NEXT: rrx r1, r3
+; ARM6-NEXT: rrx r2, r3
; ARM6-NEXT: orr r0, r0, r3, lsl #30
; ARM6-NEXT: ldr r3, .LCPI5_2
-; ARM6-NEXT: bic r2, r0, #-2147483648
+; ARM6-NEXT: bic r1, r0, #-2147483648
; ARM6-NEXT: mov r0, #0
-; ARM6-NEXT: subs r1, r1, r3
-; ARM6-NEXT: sbcs r1, r2, #1
+; ARM6-NEXT: subs r2, r2, r3
+; ARM6-NEXT: sbcs r1, r1, #1
; ARM6-NEXT: movlo r0, #1
; ARM6-NEXT: pop {r11, pc}
; ARM6-NEXT: .p2align 2
@@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM7-NEXT: mla r0, r1, r12, r0
; ARM7-NEXT: bic r0, r0, #-2147483648
; ARM7-NEXT: lsrs r0, r0, #1
-; ARM7-NEXT: rrx r1, r3
+; ARM7-NEXT: rrx r2, r3
; ARM7-NEXT: orr r0, r0, r3, lsl #30
; ARM7-NEXT: movw r3, #24026
-; ARM7-NEXT: bic r2, r0, #-2147483648
+; ARM7-NEXT: bic r1, r0, #-2147483648
; ARM7-NEXT: movt r3, #48461
-; ARM7-NEXT: subs r1, r1, r3
+; ARM7-NEXT: subs r2, r2, r3
; ARM7-NEXT: mov r0, #0
-; ARM7-NEXT: sbcs r1, r2, #1
+; ARM7-NEXT: sbcs r1, r1, #1
; ARM7-NEXT: movwlo r0, #1
; ARM7-NEXT: pop {r11, pc}
;
@@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM8-NEXT: mla r0, r1, r12, r0
; ARM8-NEXT: bic r0, r0, #-2147483648
; ARM8-NEXT: lsrs r0, r0, #1
-; ARM8-NEXT: rrx r1, r3
+; ARM8-NEXT: rrx r2, r3
; ARM8-NEXT: orr r0, r0, r3, lsl #30
; ARM8-NEXT: movw r3, #24026
-; ARM8-NEXT: bic r2, r0, #-2147483648
+; ARM8-NEXT: bic r1, r0, #-2147483648
; ARM8-NEXT: movt r3, #48461
-; ARM8-NEXT: subs r1, r1, r3
+; ARM8-NEXT: subs r2, r2, r3
; ARM8-NEXT: mov r0, #0
-; ARM8-NEXT: sbcs r1, r2, #1
+; ARM8-NEXT: sbcs r1, r1, #1
; ARM8-NEXT: movwlo r0, #1
; ARM8-NEXT: pop {r11, pc}
;
@@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON7-NEXT: mla r0, r1, r12, r0
; NEON7-NEXT: bic r0, r0, #-2147483648
; NEON7-NEXT: lsrs r0, r0, #1
-; NEON7-NEXT: rrx r1, r3
+; NEON7-NEXT: rrx r2, r3
; NEON7-NEXT: orr r0, r0, r3, lsl #30
; NEON7-NEXT: movw r3, #24026
-; NEON7-NEXT: bic r2, r0, #-2147483648
+; NEON7-NEXT: bic r1, r0, #-2147483648
; NEON7-NEXT: movt r3, #48461
-; NEON7-NEXT: subs r1, r1, r3
+; NEON7-NEXT: subs r2, r2, r3
; NEON7-NEXT: mov r0, #0
-; NEON7-NEXT: sbcs r1, r2, #1
+; NEON7-NEXT: sbcs r1, r1, #1
; NEON7-NEXT: movwlo r0, #1
; NEON7-NEXT: pop {r11, pc}
;
@@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON8-NEXT: mla r0, r1, r12, r0
; NEON8-NEXT: bic r0, r0, #-2147483648
; NEON8-NEXT: lsrs r0, r0, #1
-; NEON8-NEXT: rrx r1, r3
+; NEON8-NEXT: rrx r2, r3
; NEON8-NEXT: orr r0, r0, r3, lsl #30
; NEON8-NEXT: movw r3, #24026
-; NEON8-NEXT: bic r2, r0, #-2147483648
+; NEON8-NEXT: bic r1, r0, #-2147483648
; NEON8-NEXT: movt r3, #48461
-; NEON8-NEXT: subs r1, r1, r3
+; NEON8-NEXT: subs r2, r2, r3
; NEON8-NEXT: mov r0, #0
-; NEON8-NEXT: sbcs r1, r2, #1
+; NEON8-NEXT: sbcs r1, r1, #1
; NEON8-NEXT: movwlo r0, #1
; NEON8-NEXT: pop {r11, pc}
%urem = urem i63 %X, 1234567890
>From 776a241b15812f10f6dd7c3606027ca31fe41cae Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 17 Nov 2024 23:23:28 +0300
Subject: [PATCH 2/3] Try to get the naming right
---
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 6 ++---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 12 ++++-----
llvm/lib/Target/ARM/ARMISelLowering.h | 8 +++---
llvm/lib/Target/ARM/ARMInstrInfo.td | 27 +++++++++-----------
llvm/lib/Target/ARM/ARMInstrThumb2.td | 16 ++++++------
llvm/lib/Target/ARM/ARMScheduleM7.td | 4 +--
llvm/lib/Target/ARM/ARMScheduleM85.td | 2 +-
7 files changed, 36 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 8e79a0a344067f..3fda15a4290178 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -2590,14 +2590,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::MOVsrl_glue:
- case ARM::MOVsra_glue: {
+ case ARM::LSRs1:
+ case ARM::ASRs1: {
// These are just fancy MOVs instructions.
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.add(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(
- (Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1))
+ (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1))
.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index aed91c7bb8a2a9..ab9a8ce3c55dfc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1733,14 +1733,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::SRL_GLUE)
- MAKE_CASE(ARMISD::SRA_GLUE)
+ MAKE_CASE(ARMISD::LSLS)
+ MAKE_CASE(ARMISD::LSRS1)
+ MAKE_CASE(ARMISD::ASRS1)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
MAKE_CASE(ARMISD::SUBC)
MAKE_CASE(ARMISD::SUBE)
- MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::VMOVRRD)
MAKE_CASE(ARMISD::VMOVDRR)
MAKE_CASE(ARMISD::VMOVhr)
@@ -6850,9 +6850,9 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
- // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
- // captures the result into a carry flag.
- unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
+ // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
+ // captures the shifted out bit into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0e086f3340ccb4..1cc014a5333c9a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -101,15 +101,15 @@ class VectorType;
BCC_i64,
- SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
- SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
- RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+ LSLS, // Flag-setting shift left.
+ LSRS1, // Flag-setting logical shift right by one bit.
+ ASRS1, // Flag-setting arithmetic shift right by one bit.
+ RRX, // Flag-setting shift right one bit with carry in.
ADDC, // Add with carry
ADDE, // Add using carry
SUBC, // Sub with carry
SUBE, // Sub using carry
- LSLS, // Shift left producing carry
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 9fa3c9a6c25fd5..f3900eab92770b 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -206,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
-def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOpWithFlagsOut>;
-def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOpWithFlagsOut>;
-def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
+def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>;
+def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
@@ -3745,20 +3745,17 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
Requires<[IsARM, HasV6T2]>;
let Uses = [CPSR] in
-def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
- [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
- UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
-
-// These aren't really mov instructions, but we have to define them this way
-// due to glue operands.
+def RRX : PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
+ UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
let Defs = [CPSR] in {
- def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, CPSR, (ARMsrl_glue GPR:$src))]>,
- UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
- def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, CPSR, (ARMsra_glue GPR:$src))]>,
- UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
+ def LSRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMlsrs1 GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
+ def ASRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMasrs1 GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 260d48491bd24a..fdf808e648037c 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2801,11 +2801,12 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
}
+// These differ from t2LSRri / t2ASRri in that they are flag-setting
+// and have a hardcoded shift amount = 1.
let isCodeGenOnly = 1, Defs = [CPSR] in {
-def t2MOVsrl_glue
- : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, CPSR, (ARMsrl_glue rGPR:$Rm))]>,
+def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2817,10 +2818,9 @@ def t2MOVsrl_glue
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_glue
- : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, CPSR, (ARMsra_glue rGPR:$Rm))]>,
+def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
diff --git a/llvm/lib/Target/ARM/ARMScheduleM7.td b/llvm/lib/Target/ARM/ARMScheduleM7.td
index 25bc8401ca84ab..f81a1d87e7e5e4 100644
--- a/llvm/lib/Target/ARM/ARMScheduleM7.td
+++ b/llvm/lib/Target/ARM/ARMScheduleM7.td
@@ -325,7 +325,7 @@ def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
(instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
"t2(SUB|CMP|CMNz|TEQ|TST)rs$",
- "t2MOVsr(a|l)")>;
+ "t2(A|L)SRs1")>;
def : InstRW<[WriteALUsi, M7Read_ISS],
(instregex "t2MVNs")>;
@@ -335,7 +335,7 @@ def : InstRW<[WriteALUsi, M7Read_ISS],
// but the results prove to be better than trying to get them exact.
def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
-def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
+def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)r")>;
// Instructions that use the shifter, but have normal timing.
diff --git a/llvm/lib/Target/ARM/ARMScheduleM85.td b/llvm/lib/Target/ARM/ARMScheduleM85.td
index cd375a16305ec8..e9938d857e6afc 100644
--- a/llvm/lib/Target/ARM/ARMScheduleM85.td
+++ b/llvm/lib/Target/ARM/ARMScheduleM85.td
@@ -436,7 +436,7 @@ def : InstRW<[M85WriteALUsi, M85ReadALUsi],
def : InstRW<[M85WriteShift2],
(instregex "t2RRX$")>;
def : InstRW<[WriteALU],
- (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
+ (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>;
// Instructions that use the shifter, but have normal timing
>From c36315322e269d03f37f3ebd4bfb3c60ecd37241 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 17 Nov 2024 23:25:10 +0300
Subject: [PATCH 3/3] Fix comment & indentation
---
llvm/lib/Target/ARM/ARMISelLowering.h | 2 +-
llvm/lib/Target/ARM/ARMInstrThumb2.td | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 1cc014a5333c9a..344a0ad91e5178 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -104,7 +104,7 @@ class VectorType;
LSLS, // Flag-setting shift left.
LSRS1, // Flag-setting logical shift right by one bit.
ASRS1, // Flag-setting arithmetic shift right by one bit.
- RRX, // Flag-setting shift right one bit with carry in.
+ RRX, // Shift right one bit with carry in.
ADDC, // Add with carry
ADDE, // Add using carry
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index fdf808e648037c..ca4f13e1ab8a1a 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2807,7 +2807,7 @@ let isCodeGenOnly = 1, Defs = [CPSR] in {
def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2821,7 +2821,7 @@ def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
More information about the llvm-commits
mailing list