[llvm] [ARM] Stop gluing 1-bit shifts (PR #116547)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 17 08:22:05 PST 2024
https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/116547
Use normal data flow instead.
There are several more nodes that are still glued, I'll try to change that in subsequent patches.
>From 637ab085014f776cb97cc53ef6d34a255c0a201d Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 17 Nov 2024 19:20:14 +0300
Subject: [PATCH] [ARM] Stop gluing 1-bit shifts
Use normal data flow instead.
There are several more nodes that are still glued, I'll try to change
that in subsequent patches.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 +-
llvm/lib/Target/ARM/ARMInstrInfo.td | 37 +++++++++-----
llvm/lib/Target/ARM/ARMInstrThumb2.td | 25 +++++-----
.../CodeGen/ARM/urem-seteq-illegal-types.ll | 48 +++++++++----------
4 files changed, 67 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7fce91f97f3618..aed91c7bb8a2a9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+constexpr MVT FlagsVT = MVT::i32;
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -6850,7 +6853,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
// First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index d24d4af36f0d86..9fa3c9a6c25fd5 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -14,6 +14,9 @@
// ARM specific DAG Nodes.
//
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+defvar FlagsVT = i32;
+
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
SDTCisVT<4, i32>]>;
+def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
+ SDTCisInt<0>, // result
+ SDTCisVT<1, FlagsVT>, // out flags
+ SDTCisSameAs<2, 0> // operand
+]>;
+
+def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, // result
+ SDTCisSameAs<1, 0>, // operand
+ SDTCisVT<1, FlagsVT> // in flags
+]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
-def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOpWithFlagsOut>;
+def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOpWithFlagsOut>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
@@ -3731,19 +3746,19 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
let Uses = [CPSR] in
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
- [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
- Requires<[IsARM]>, Sched<[WriteALU]>;
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
+ UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
// These aren't really mov instructions, but we have to define them this way
// due to glue operands.
let Defs = [CPSR] in {
-def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
-def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
- Sched<[WriteALU]>, Requires<[IsARM]>;
+ def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMsrl_glue GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
+ def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, CPSR, (ARMsra_glue GPR:$src))]>,
+ UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index cb20aacb539ad9..260d48491bd24a 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "rrx", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2801,11 +2802,11 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
let isCodeGenOnly = 1, Defs = [CPSR] in {
-def t2MOVsrl_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2MOVsrl_glue
+ : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMsrl_glue rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2816,11 +2817,11 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_glue : T2TwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
- "asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
- Sched<[WriteALU]> {
+def t2MOVsra_glue
+ : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, CPSR, (ARMsra_glue rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
index 8900d5f541e8aa..b85cb3a4f191c8 100644
--- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
@@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM5-NEXT: mla r0, r1, r12, r4
; ARM5-NEXT: bic r0, r0, #-2147483648
; ARM5-NEXT: lsrs r0, r0, #1
-; ARM5-NEXT: rrx r1, r3
+; ARM5-NEXT: rrx r2, r3
; ARM5-NEXT: orr r0, r0, r3, lsl #30
; ARM5-NEXT: ldr r3, .LCPI5_2
-; ARM5-NEXT: bic r2, r0, #-2147483648
+; ARM5-NEXT: bic r1, r0, #-2147483648
; ARM5-NEXT: mov r0, #0
-; ARM5-NEXT: subs r1, r1, r3
-; ARM5-NEXT: sbcs r1, r2, #1
+; ARM5-NEXT: subs r2, r2, r3
+; ARM5-NEXT: sbcs r1, r1, #1
; ARM5-NEXT: movlo r0, #1
; ARM5-NEXT: pop {r4, pc}
; ARM5-NEXT: .p2align 2
@@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM6-NEXT: mla r0, r1, r12, r0
; ARM6-NEXT: bic r0, r0, #-2147483648
; ARM6-NEXT: lsrs r0, r0, #1
-; ARM6-NEXT: rrx r1, r3
+; ARM6-NEXT: rrx r2, r3
; ARM6-NEXT: orr r0, r0, r3, lsl #30
; ARM6-NEXT: ldr r3, .LCPI5_2
-; ARM6-NEXT: bic r2, r0, #-2147483648
+; ARM6-NEXT: bic r1, r0, #-2147483648
; ARM6-NEXT: mov r0, #0
-; ARM6-NEXT: subs r1, r1, r3
-; ARM6-NEXT: sbcs r1, r2, #1
+; ARM6-NEXT: subs r2, r2, r3
+; ARM6-NEXT: sbcs r1, r1, #1
; ARM6-NEXT: movlo r0, #1
; ARM6-NEXT: pop {r11, pc}
; ARM6-NEXT: .p2align 2
@@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM7-NEXT: mla r0, r1, r12, r0
; ARM7-NEXT: bic r0, r0, #-2147483648
; ARM7-NEXT: lsrs r0, r0, #1
-; ARM7-NEXT: rrx r1, r3
+; ARM7-NEXT: rrx r2, r3
; ARM7-NEXT: orr r0, r0, r3, lsl #30
; ARM7-NEXT: movw r3, #24026
-; ARM7-NEXT: bic r2, r0, #-2147483648
+; ARM7-NEXT: bic r1, r0, #-2147483648
; ARM7-NEXT: movt r3, #48461
-; ARM7-NEXT: subs r1, r1, r3
+; ARM7-NEXT: subs r2, r2, r3
; ARM7-NEXT: mov r0, #0
-; ARM7-NEXT: sbcs r1, r2, #1
+; ARM7-NEXT: sbcs r1, r1, #1
; ARM7-NEXT: movwlo r0, #1
; ARM7-NEXT: pop {r11, pc}
;
@@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM8-NEXT: mla r0, r1, r12, r0
; ARM8-NEXT: bic r0, r0, #-2147483648
; ARM8-NEXT: lsrs r0, r0, #1
-; ARM8-NEXT: rrx r1, r3
+; ARM8-NEXT: rrx r2, r3
; ARM8-NEXT: orr r0, r0, r3, lsl #30
; ARM8-NEXT: movw r3, #24026
-; ARM8-NEXT: bic r2, r0, #-2147483648
+; ARM8-NEXT: bic r1, r0, #-2147483648
; ARM8-NEXT: movt r3, #48461
-; ARM8-NEXT: subs r1, r1, r3
+; ARM8-NEXT: subs r2, r2, r3
; ARM8-NEXT: mov r0, #0
-; ARM8-NEXT: sbcs r1, r2, #1
+; ARM8-NEXT: sbcs r1, r1, #1
; ARM8-NEXT: movwlo r0, #1
; ARM8-NEXT: pop {r11, pc}
;
@@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON7-NEXT: mla r0, r1, r12, r0
; NEON7-NEXT: bic r0, r0, #-2147483648
; NEON7-NEXT: lsrs r0, r0, #1
-; NEON7-NEXT: rrx r1, r3
+; NEON7-NEXT: rrx r2, r3
; NEON7-NEXT: orr r0, r0, r3, lsl #30
; NEON7-NEXT: movw r3, #24026
-; NEON7-NEXT: bic r2, r0, #-2147483648
+; NEON7-NEXT: bic r1, r0, #-2147483648
; NEON7-NEXT: movt r3, #48461
-; NEON7-NEXT: subs r1, r1, r3
+; NEON7-NEXT: subs r2, r2, r3
; NEON7-NEXT: mov r0, #0
-; NEON7-NEXT: sbcs r1, r2, #1
+; NEON7-NEXT: sbcs r1, r1, #1
; NEON7-NEXT: movwlo r0, #1
; NEON7-NEXT: pop {r11, pc}
;
@@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON8-NEXT: mla r0, r1, r12, r0
; NEON8-NEXT: bic r0, r0, #-2147483648
; NEON8-NEXT: lsrs r0, r0, #1
-; NEON8-NEXT: rrx r1, r3
+; NEON8-NEXT: rrx r2, r3
; NEON8-NEXT: orr r0, r0, r3, lsl #30
; NEON8-NEXT: movw r3, #24026
-; NEON8-NEXT: bic r2, r0, #-2147483648
+; NEON8-NEXT: bic r1, r0, #-2147483648
; NEON8-NEXT: movt r3, #48461
-; NEON8-NEXT: subs r1, r1, r3
+; NEON8-NEXT: subs r2, r2, r3
; NEON8-NEXT: mov r0, #0
-; NEON8-NEXT: sbcs r1, r2, #1
+; NEON8-NEXT: sbcs r1, r1, #1
; NEON8-NEXT: movwlo r0, #1
; NEON8-NEXT: pop {r11, pc}
%urem = urem i63 %X, 1234567890
More information about the llvm-commits
mailing list