[llvm] [AArch64] Remove copy in SVE/SME predicate spill and fill (PR #81716)
Sam Tebbs via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 07:02:46 PST 2024
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/81716
>From f0df48acfb63e163ad29e69e5e64f4ee8f315cec Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 13 Feb 2024 11:23:34 +0000
Subject: [PATCH 1/9] [AArch64] Remove copy in SVE/SME predicate spill
7dc20ab introduced an extra COPY when spilling a PNR register, which
can't be elided as the input (PNR predicate) and output (PPR predicate)
register classes differ. This patch emits a new ConvertPNRtoPPR
pseudo instruction instead. When this is expanded, it gets erased if the
PNR is a subregister of the PPR, since the conversion is implicit,
otherwise it is lowered to an ORR.
---
.../AArch64/AArch64ExpandPseudoInsts.cpp | 17 +++++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 ++--
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 ++
.../spillfill-sve-different-predicate.mir | 30 +++++++++++++++++++
llvm/test/CodeGen/AArch64/spillfill-sve.mir | 3 +-
5 files changed, 54 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b2c52b443753dc1..fe51a03bccc3392 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1112,6 +1112,23 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
default:
break;
+ case AArch64::ConvertPNRtoPPR: {
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineOperand DstMO = MI.getOperand(0);
+ MachineOperand SrcMO = MI.getOperand(1);
+ unsigned SrcReg = SrcMO.getReg();
+ if (!TRI->isSubRegister(DstMO.getReg(), SrcReg)) {
+ unsigned SrcSuperReg = TRI->getMatchingSuperReg(SrcReg, AArch64::psub,
+ &AArch64::PPRRegClass);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_PPzPP))
+ .add(DstMO)
+ .addReg(SrcSuperReg)
+ .addReg(SrcSuperReg)
+ .addReg(SrcSuperReg);
+ }
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::BSPv8i8:
case AArch64::BSPv16i8: {
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 39c96092f10319d..10a846bdb157d55 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4805,6 +4805,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
bool Offset = true;
MCRegister PNRReg = MCRegister::NoRegister;
unsigned StackID = TargetStackID::Default;
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4823,8 +4824,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Unexpected register store without SVE2p1 or SME2");
if (SrcReg.isVirtual()) {
auto NewSrcReg =
- MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
- BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
+ MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ConvertPNRtoPPR),
+ NewSrcReg)
.addReg(SrcReg);
SrcReg = NewSrcReg;
} else
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b0524754b2e9d3..c9dc5a84aa3254e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2313,6 +2313,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
} // End HasBF16, HasSVEorSME
+def ConvertPNRtoPPR : Pseudo<(outs PPRAny:$Pd), (ins PNRAny:$Pm), []>, Sched<[]>;
+
let Predicates = [HasSVEorSME] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
new file mode 100644
index 000000000000000..abbc1d615b66f7d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -start-after=virtregrewriter -stop-after=aarch64-expand-pseudo -mattr=+sme2 -verify-machineinstrs -o - %s \
+# RUN: | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ define void @test_convert_different_reg() #0 { entry: unreachable }
+
+ attributes #0 = { "target-features"="+sme2" }
+
+---
+name: test_convert_different_reg
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_convert_different_reg
+ ; CHECK: renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+ ; CHECK-NEXT: renamable $p9 = ORR_PPzPP $p8, $p8, $p8
+ ; CHECK-NEXT: STR_PXI killed renamable $p9, $sp, 7
+ ; CHECK-NEXT: renamable $p0 = LDR_PXI $sp, 7
+ early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16
+ frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+ renamable $p9 = ConvertPNRtoPPR killed renamable $pn8
+ STR_PXI killed renamable $p9, $sp, 7
+ renamable $p0 = LDR_PXI $sp, 7
+ early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16
+ RET undef $lr
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index ef7d55a1c2395f0..af062d33c5642f6 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -213,8 +213,7 @@ body: |
; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
; EXPAND: renamable $pn8 = WHILEGE_CXX_B
- ; EXPAND: $p0 = ORR_PPzPP $p8, $p8, killed $p8
- ; EXPAND: STR_PXI killed renamable $p0, $sp, 7
+ ; EXPAND: STR_PXI killed renamable $p8, $sp, 7
;
; EXPAND: renamable $p0 = LDR_PXI $sp, 7
; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8
>From 4990b25b57b42d716fba50c7caef353f8a5cf63c Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 21 Feb 2024 16:30:21 +0000
Subject: [PATCH 2/9] fixup: constrain reg class instead of convert
---
.../AArch64/AArch64ExpandPseudoInsts.cpp | 17 -----------
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 ++-----
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 --
.../spillfill-sve-different-predicate.mir | 30 -------------------
llvm/test/CodeGen/AArch64/spillfill-sve.mir | 5 ++--
5 files changed, 4 insertions(+), 60 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index fe51a03bccc3392..b2c52b443753dc1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1112,23 +1112,6 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
default:
break;
- case AArch64::ConvertPNRtoPPR: {
- auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- MachineOperand DstMO = MI.getOperand(0);
- MachineOperand SrcMO = MI.getOperand(1);
- unsigned SrcReg = SrcMO.getReg();
- if (!TRI->isSubRegister(DstMO.getReg(), SrcReg)) {
- unsigned SrcSuperReg = TRI->getMatchingSuperReg(SrcReg, AArch64::psub,
- &AArch64::PPRRegClass);
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_PPzPP))
- .add(DstMO)
- .addReg(SrcSuperReg)
- .addReg(SrcSuperReg)
- .addReg(SrcSuperReg);
- }
- MI.eraseFromParent();
- return true;
- }
case AArch64::BSPv8i8:
case AArch64::BSPv16i8: {
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 10a846bdb157d55..4593ea67c00f22e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4805,7 +4805,6 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
bool Offset = true;
MCRegister PNRReg = MCRegister::NoRegister;
unsigned StackID = TargetStackID::Default;
- const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4823,12 +4822,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
"Unexpected register store without SVE2p1 or SME2");
if (SrcReg.isVirtual()) {
- auto NewSrcReg =
- MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);
- BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ConvertPNRtoPPR),
- NewSrcReg)
- .addReg(SrcReg);
- SrcReg = NewSrcReg;
+ MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass);
} else
SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
Opc = AArch64::STR_PXI;
@@ -5008,7 +5002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
"Unexpected register load without SVE2p1 or SME2");
PNRReg = DestReg;
if (DestReg.isVirtual())
- DestReg = MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
+ MF.getRegInfo().constrainRegClass(DestReg, &AArch64::PPRRegClass);
else
DestReg = (DestReg - AArch64::PN0) + AArch64::P0;
Opc = AArch64::LDR_PXI;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c9dc5a84aa3254e..2b0524754b2e9d3 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2313,8 +2313,6 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
} // End HasBF16, HasSVEorSME
-def ConvertPNRtoPPR : Pseudo<(outs PPRAny:$Pd), (ins PNRAny:$Pm), []>, Sched<[]>;
-
let Predicates = [HasSVEorSME] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
deleted file mode 100644
index abbc1d615b66f7d..000000000000000
--- a/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
+++ /dev/null
@@ -1,30 +0,0 @@
-# RUN: llc -mtriple=aarch64-linux-gnu -start-after=virtregrewriter -stop-after=aarch64-expand-pseudo -mattr=+sme2 -verify-machineinstrs -o - %s \
-# RUN: | FileCheck %s
-
---- |
- target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
- target triple = "aarch64-unknown-linux-gnu"
-
- define void @test_convert_different_reg() #0 { entry: unreachable }
-
- attributes #0 = { "target-features"="+sme2" }
-
----
-name: test_convert_different_reg
-tracksRegLiveness: true
-body: |
- bb.0.entry:
- ; CHECK-LABEL: name: test_convert_different_reg
- ; CHECK: renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
- ; CHECK-NEXT: renamable $p9 = ORR_PPzPP $p8, $p8, $p8
- ; CHECK-NEXT: STR_PXI killed renamable $p9, $sp, 7
- ; CHECK-NEXT: renamable $p0 = LDR_PXI $sp, 7
- early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16
- frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
- frame-setup CFI_INSTRUCTION offset $w29, -16
- renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
- renamable $p9 = ConvertPNRtoPPR killed renamable $pn8
- STR_PXI killed renamable $p9, $sp, 7
- renamable $p0 = LDR_PXI $sp, 7
- early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16
- RET undef $lr
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index af062d33c5642f6..fba8af5a7028ae7 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -213,10 +213,9 @@ body: |
; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
; EXPAND: renamable $pn8 = WHILEGE_CXX_B
- ; EXPAND: STR_PXI killed renamable $p8, $sp, 7
+ ; EXPAND: STR_PXI killed renamable $pn8, $sp, 7
;
- ; EXPAND: renamable $p0 = LDR_PXI $sp, 7
- ; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8
+ ; EXPAND: renamable $pn8 = LDR_PXI $sp, 7
; EXPAND: $p0 = PEXT_PCI_B killed renamable $pn8, 0
>From 08677896c4f65e579565bdd8f8b522a1179408c1 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 27 Feb 2024 14:29:47 +0000
Subject: [PATCH 3/9] fixup: let predicate spill and fill take ppr or pnr
---
llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 10 ++++++++++
llvm/lib/Target/AArch64/SVEInstrFormats.td | 6 +++---
llvm/test/CodeGen/AArch64/spillfill-sve.mir | 2 +-
3 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index fef1748021b07c2..0e2f1b6c20a853f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1004,6 +1004,16 @@ let Namespace = "AArch64" in {
def psub1 : SubRegIndex<16, -1>;
}
+class PPRorPNRClass : RegisterClass<
+ "AArch64",
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
+ (add PPR, PNR)> {
+ let Size = 16;
+}
+def PPRorPNR : PPRorPNRClass;
+def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>;
+def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>;
+
// Pairs of SVE predicate vector registers.
def PSeqPairs : RegisterTuples<[psub0, psub1], [(rotl PPR, 0), (rotl PPR, 1)]>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 789ec817d3d8b89..e829788cdc39402 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6680,7 +6680,7 @@ multiclass sve_mem_z_spill<string asm> {
}
class sve_mem_p_spill<string asm>
-: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9),
+: I<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9),
asm, "\t$Pt, [$Rn, $imm9, mul vl]",
"",
[]>, Sched<[]> {
@@ -6703,7 +6703,7 @@ multiclass sve_mem_p_spill<string asm> {
def NAME : sve_mem_p_spill<asm>;
def : InstAlias<asm # "\t$Pt, [$Rn]",
- (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
+ (!cast<Instruction>(NAME) PPRorPNRAny:$Pt, GPR64sp:$Rn, 0), 1>;
}
//===----------------------------------------------------------------------===//
@@ -7915,7 +7915,7 @@ multiclass sve_mem_z_fill<string asm> {
}
class sve_mem_p_fill<string asm>
-: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9),
+: I<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9),
asm, "\t$Pt, [$Rn, $imm9, mul vl]",
"",
[]>, Sched<[]> {
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index fba8af5a7028ae7..e30853c8d087b1e 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -1,5 +1,5 @@
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s
-# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo %s -o - | FileCheck %s --check-prefix=EXPAND
+# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
--- |
; ModuleID = '<stdin>'
source_filename = "<stdin>"
>From c8a6a5dfb18cc0a427c88b017ea4c800264ea519 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 27 Feb 2024 14:29:58 +0000
Subject: [PATCH 4/9] fixup: remove braces
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4593ea67c00f22e..ebfb2304d92e619 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4821,9 +4821,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
"Unexpected register store without SVE2p1 or SME2");
- if (SrcReg.isVirtual()) {
+ if (SrcReg.isVirtual())
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass);
- } else
+ else
SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
Opc = AArch64::STR_PXI;
StackID = TargetStackID::ScalableVector;
>From fc6be26bbd7c3968fb1a97d6ecb298664f12c87f Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 28 Feb 2024 13:49:53 +0000
Subject: [PATCH 5/9] fixup: add decode function
---
.../AArch64/Disassembler/AArch64Disassembler.cpp | 15 +++++++++++++++
.../AArch64/GlobalISel/regbank-inlineasm.mir | 8 ++++----
.../emit_fneg_with_non_register_operand.mir | 8 ++++----
llvm/test/CodeGen/AArch64/peephole-insvigpr.mir | 4 ++--
4 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a21b4b77166ede8..b596d3505a8e491 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -143,6 +143,9 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -741,6 +744,18 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
return Success;
}
+static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ if (RegNo > 15)
+ return Fail;
+
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::PPRorPNRRegClassID].getRegister(RegNo);
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const MCDisassembler *Decoder) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
index e77fac19e0a78a5..ae4619e9e797785 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
@@ -57,11 +57,11 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_reg_output
- ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0
+ ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
- INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common
+ INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common
%1:_(s32) = COPY %0
$w0 = COPY %1(s32)
RET_ReallyLR implicit $w0
@@ -75,12 +75,12 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_mixed_types
- ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0, 2162698 /* regdef:WSeqPairsClass */, def %1
+ ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, {{[0-9]+}} /* regdef:FPR64 */, def %1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1
; CHECK-NEXT: $d0 = COPY [[COPY1]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
- INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common, 2162698 /* regdef:FPR64 */, def %1:fpr64
+ INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common, 2621450 /* regdef:FPR64 */, def %1:fpr64
%3:_(s32) = COPY %0
%4:_(s64) = COPY %1
$d0 = COPY %4(s64)
diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
index 92fb053b0db7260..1dae8a8ad4d23ec 100644
--- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
+++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
@@ -91,10 +91,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2
; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2
; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr
; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv
@@ -111,10 +111,10 @@ body: |
%6:gpr64common = LOADgot target-flags(aarch64-got) @c
%3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c)
- INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3)
+ INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3)
%0:fpr64 = COPY %2
%5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c)
- INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3)
+ INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3)
%7:fpr64 = FNEGDr %2
nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr
Bcc 1, %bb.2, implicit $nzcv
diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
index 65148344096cd79..a86d3fb1094c502 100644
--- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
@@ -487,7 +487,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]]
- ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, {{[0-9]+}} /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
@@ -505,7 +505,7 @@ body: |
%0:gpr64common = COPY $x0
%2:gpr64all = IMPLICIT_DEF
%3:gpr64sp = COPY %2
- INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2359306 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3
+ INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2621450 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3
%4:fpr128 = MOVIv2d_ns 0
%5:fpr64 = COPY %4.dsub
%7:fpr128 = IMPLICIT_DEF
>From 03b0337c5d550d2d1b5910ea2ece52425d33a467 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 28 Feb 2024 14:58:55 +0000
Subject: [PATCH 6/9] fixup: remove inst aliases
---
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b0524754b2e9d3..391f48238069fc9 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4017,16 +4017,6 @@ let Predicates = [HasSVEorSME] in {
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
-def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]",
- (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
-def : InstAlias<"ldr $Pt, [$Rn]",
- (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
-
-def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]",
- (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
-def : InstAlias<"str $Pt, [$Rn]",
- (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
-
def : InstAlias<"mov $Pd, $Pn",
(ORR_PPzPP PNRasPPR8:$Pd, PNRasPPR8:$Pn, PNRasPPR8:$Pn, PNRasPPR8:$Pn), 0>;
>From 70564cb4c8940381be5d91973ed43511a41f0940 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 28 Feb 2024 14:59:13 +0000
Subject: [PATCH 7/9] fixup: match reg class in AArch64AsmParser
---
llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b807aaf76fdb006..d40b78e9fdcaa43 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6019,6 +6019,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "Invalid restricted vector register, expected z0.d..z15.d");
case Match_InvalidSVEPattern:
return Error(Loc, "invalid predicate pattern");
+ case Match_InvalidSVEPPRorPNRAnyReg:
case Match_InvalidSVEPredicateAnyReg:
case Match_InvalidSVEPredicateBReg:
case Match_InvalidSVEPredicateHReg:
@@ -6653,6 +6654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidZPR_4b16:
case Match_InvalidZPR_4b32:
case Match_InvalidZPR_4b64:
+ case Match_InvalidSVEPPRorPNRAnyReg:
case Match_InvalidSVEPredicateAnyReg:
case Match_InvalidSVEPattern:
case Match_InvalidSVEVecLenSpecifier:
>From 37e909efa9f3d10d290bed33aab184aa912a273b Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 28 Feb 2024 14:59:27 +0000
Subject: [PATCH 8/9] Add svcount to reg class
---
llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 0e2f1b6c20a853f..e4d6e657e3e0cc0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1006,7 +1006,7 @@ let Namespace = "AArch64" in {
class PPRorPNRClass : RegisterClass<
"AArch64",
- [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,
(add PPR, PNR)> {
let Size = 16;
}
>From 5d8fb1fce8e4f41b2cb383134402daf0087b9bb0 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 28 Feb 2024 14:59:39 +0000
Subject: [PATCH 9/9] fixup: format definitions
---
llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index e4d6e657e3e0cc0..81936c6adb2d85a 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1010,9 +1010,9 @@ class PPRorPNRClass : RegisterClass<
(add PPR, PNR)> {
let Size = 16;
}
-def PPRorPNR : PPRorPNRClass;
-def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>;
-def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>;
+def PPRorPNR : PPRorPNRClass;
+def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>;
+def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>;
// Pairs of SVE predicate vector registers.
def PSeqPairs : RegisterTuples<[psub0, psub1], [(rotl PPR, 0), (rotl PPR, 1)]>;
More information about the llvm-commits
mailing list