[llvm] [WIP][PowerPC] Add phony subregisters to cover the high half of the VSX registers. (PR #94628)
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 6 08:15:40 PDT 2024
https://github.com/stefanp-ibm created https://github.com/llvm/llvm-project/pull/94628
On PowerPC we have VSX registers which overlap with floating point registers.
However, the floating point registers only overlap with half of each VSX
register while the other half is never used alone. This patch adds phony
registers for the other half of the VSX registers in order to fully cover them
and to make sure that the lane masks are not the same for the VSX and the
floating point register.
Note: This patch is still Work in Progress as there are a number of LIT
failures that need to be investigated.
>From eaaad1691cc732c4c4e53197d2d629c1bb5dcb74 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Thu, 6 Jun 2024 10:07:00 -0500
Subject: [PATCH] [WIP][PowerPC] Add phony subregisters to cover the high half
of the VSX registers.
On PowerPC we have VSX registers which overlap with floating point registers.
However, the floating point registers only overlap with half of each VSX
register while the other half is never used alone. This patch adds phony
registers for the other half of the VSX registers in order to fully cover them
and to make sure that the lane masks are not the same for the VSX and the
floating point register.
Note: This patch is still Work in Progress as there are a number of LIT
failures that need to be investigated.
---
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 86 ++++++++++++++++++-
llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 31 +++++--
llvm/test/CodeGen/PowerPC/frem.ll | 5 --
.../test/CodeGen/PowerPC/subreg-lanemasks.mir | 63 ++++++++++++++
4 files changed, 170 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9e8da59615dfb..a2d089f32cabf 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -435,7 +435,91 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- assert(checkAllSuperRegsMarked(Reserved));
+ // Mark phony regsiters for the VSR high bits as reserved so that they are
+ // not used.
+ Reserved.set(PPC::FH0);
+ Reserved.set(PPC::FH1);
+ Reserved.set(PPC::FH2);
+ Reserved.set(PPC::FH3);
+ Reserved.set(PPC::FH4);
+ Reserved.set(PPC::FH5);
+ Reserved.set(PPC::FH6);
+ Reserved.set(PPC::FH7);
+ Reserved.set(PPC::FH8);
+ Reserved.set(PPC::FH9);
+ Reserved.set(PPC::FH10);
+ Reserved.set(PPC::FH11);
+ Reserved.set(PPC::FH12);
+ Reserved.set(PPC::FH13);
+ Reserved.set(PPC::FH14);
+ Reserved.set(PPC::FH15);
+ Reserved.set(PPC::FH16);
+ Reserved.set(PPC::FH17);
+ Reserved.set(PPC::FH18);
+ Reserved.set(PPC::FH19);
+ Reserved.set(PPC::FH20);
+ Reserved.set(PPC::FH21);
+ Reserved.set(PPC::FH22);
+ Reserved.set(PPC::FH23);
+ Reserved.set(PPC::FH24);
+ Reserved.set(PPC::FH25);
+ Reserved.set(PPC::FH26);
+ Reserved.set(PPC::FH27);
+ Reserved.set(PPC::FH28);
+ Reserved.set(PPC::FH29);
+ Reserved.set(PPC::FH30);
+ Reserved.set(PPC::FH31);
+
+ Reserved.set(PPC::VFH0);
+ Reserved.set(PPC::VFH1);
+ Reserved.set(PPC::VFH2);
+ Reserved.set(PPC::VFH3);
+ Reserved.set(PPC::VFH4);
+ Reserved.set(PPC::VFH5);
+ Reserved.set(PPC::VFH6);
+ Reserved.set(PPC::VFH7);
+ Reserved.set(PPC::VFH8);
+ Reserved.set(PPC::VFH9);
+ Reserved.set(PPC::VFH10);
+ Reserved.set(PPC::VFH11);
+ Reserved.set(PPC::VFH12);
+ Reserved.set(PPC::VFH13);
+ Reserved.set(PPC::VFH14);
+ Reserved.set(PPC::VFH15);
+ Reserved.set(PPC::VFH16);
+ Reserved.set(PPC::VFH17);
+ Reserved.set(PPC::VFH18);
+ Reserved.set(PPC::VFH19);
+ Reserved.set(PPC::VFH20);
+ Reserved.set(PPC::VFH21);
+ Reserved.set(PPC::VFH22);
+ Reserved.set(PPC::VFH23);
+ Reserved.set(PPC::VFH24);
+ Reserved.set(PPC::VFH25);
+ Reserved.set(PPC::VFH26);
+ Reserved.set(PPC::VFH27);
+ Reserved.set(PPC::VFH28);
+ Reserved.set(PPC::VFH29);
+ Reserved.set(PPC::VFH30);
+ Reserved.set(PPC::VFH31);
+
+ assert(checkAllSuperRegsMarked(Reserved,
+ {PPC::FH0, PPC::FH1, PPC::FH2, PPC::FH3,
+ PPC::FH4, PPC::FH5, PPC::FH6, PPC::FH7,
+ PPC::FH8, PPC::FH9, PPC::FH10, PPC::FH11,
+ PPC::FH12, PPC::FH13, PPC::FH14, PPC::FH15,
+ PPC::FH16, PPC::FH17, PPC::FH18, PPC::FH19,
+ PPC::FH20, PPC::FH21, PPC::FH22, PPC::FH23,
+ PPC::FH24, PPC::FH25, PPC::FH26, PPC::FH27,
+ PPC::FH28, PPC::FH29, PPC::FH30, PPC::FH31,
+ PPC::VFH0, PPC::VFH1, PPC::VFH2, PPC::VFH3,
+ PPC::VFH4, PPC::VFH5, PPC::VFH6, PPC::VFH7,
+ PPC::VFH8, PPC::VFH9, PPC::VFH10, PPC::VFH11,
+ PPC::VFH12, PPC::VFH13, PPC::VFH14, PPC::VFH15,
+ PPC::VFH16, PPC::VFH17, PPC::VFH18, PPC::VFH19,
+ PPC::VFH20, PPC::VFH21, PPC::VFH22, PPC::VFH23,
+ PPC::VFH24, PPC::VFH25, PPC::VFH26, PPC::VFH27,
+ PPC::VFH28, PPC::VFH29, PPC::VFH30, PPC::VFH31}));
return Reserved;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8a37e40414eee..30e936a157e01 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
def sub_32_hi_phony : SubRegIndex<32,32>;
def sub_64 : SubRegIndex<64>;
+def sub_64_hi_phony : SubRegIndex<64,64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
def sub_gp8_x0 : SubRegIndex<64>;
@@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> {
}
// VR - One of the 32 128-bit vector registers
-class VR<VF SubReg, string n> : PPCReg<n> {
+class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> {
let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
let HWEncoding{5} = 0;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
+ let SubRegs = [SubReg, SubRegH];
+ let SubRegIndices = [sub_64, sub_64_hi_phony];
}
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
// floating-point registers.
-class VSRL<FPR SubReg, string n> : PPCReg<n> {
+class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
+ let SubRegs = [SubReg, SubRegH];
+ let SubRegIndices = [sub_64, sub_64_hi_phony];
}
// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
@@ -155,6 +156,12 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+let isArtificial = 1 in {
+ foreach Index = 0-31 in {
+ def FH#Index : FPR<-1, "">;
+ }
+}
+
// Floating-point pair registers
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
def Fpair#Index : FPPair<"fp"#Index, Index>;
@@ -168,15 +175,21 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+let isArtificial = 1 in {
+ foreach Index = 0-31 in {
+ def VFH#Index : VF<-1, "">;
+ }
+}
+
// Vector registers
foreach Index = 0-31 in {
- def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
- DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+ def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>,
+ DwarfRegNum<[!add(Index, 77)]>;
}
// VSX registers
foreach Index = 0-31 in {
- def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>,
DwarfRegAlias<!cast<FPR>("F"#Index)>;
}
diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 8cb68e60f7f9b..19b4b1c9cdf95 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -70,7 +70,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 1, 61
; CHECK-NEXT: xscvspdpn 1, 62
; CHECK-NEXT: xscvspdpn 2, 63
@@ -84,7 +83,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 61, 1
; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,11 +122,8 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: xscpsgndp 61, 1, 1
; CHECK-NEXT: xxswapd 1, 62
; CHECK-NEXT: xxswapd 2, 63
-; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; CHECK-NEXT: bl fmod
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 34, 61, 1
; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
new file mode 100644
index 0000000000000..28a4d0347f105
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
@@ -0,0 +1,63 @@
+# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \
+# RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \
+# RUN: -debug-only=regalloc -o - %s 2>&1 >/dev/null | FileCheck %s
+
+# Keep track of all of the lanemasks for various subregsiters.
+#
+# CHECK: %3 [80r,80d:0) 0 at 80r L000000000000000C [80r,80d:0) 0 at 80r weight:0.000000e+00
+# CHECK: %4 [96r,96d:0) 0 at 96r L0000000000003000 [96r,96d:0) 0 at 96r weight:0.000000e+00
+# CHECK: %5 [112r,112d:0) 0 at 112r L000000000000000C [112r,112d:0) 0 at 112r weight:0.000000e+00
+# CHECK: %6 [128r,128d:0) 0 at 128r L0000000000003000 [128r,128d:0) 0 at 128r weight:0.000000e+00
+# CHECK: %7 [144r,144d:0) 0 at 144r L0000000000000004 [144r,144d:0) 0 at 144r weight:0.000000e+00
+# CHECK: %8 [160r,160d:0) 0 at 160r L0000000000001000 [160r,160d:0) 0 at 160r weight:0.000000e+00
+# CHECK: %9 [176r,176d:0) 0 at 176r L0000000000000004 [176r,176d:0) 0 at 176r weight:0.000000e+00
+# CHECK: %10 [192r,192d:0) 0 at 192r L0000000000001000 [192r,192d:0) 0 at 192r weight:0.000000e+00
+# CHECK: %11 [208r,208d:0) 0 at 208r L0000000000004000 [208r,208d:0) 0 at 208r weight:0.000000e+00
+# CHECK: %12 [224r,224d:0) 0 at 224r L0000000000010000 [224r,224d:0) 0 at 224r weight:0.000000e+00
+# CHECK: %13 [240r,240d:0) 0 at 240r L000000000000300C [240r,240d:0) 0 at 240r weight:0.000000e+00
+# CHECK: %14 [256r,256d:0) 0 at 256r L000000000003C000 [256r,256d:0) 0 at 256r weight:0.000000e+00
+
+
+# CHECK: 0B bb.0
+# CHECK-NEXT: liveins
+# CHECK-NEXT: 16B %0:vsrc = COPY $v2
+# CHECK-NEXT: 32B %float:fprc = COPY %0.sub_64:vsrc
+# CHECK-NEXT: 48B dead undef %pair.sub_vsx0:vsrprc = COPY $v2
+# CHECK-NEXT: 64B undef %15.sub_vsx1:vsrprc = COPY $v3
+# CHECK-NEXT: 80B dead undef %3.sub_vsx0:vsrprc = COPY %0:vsrc
+# CHECK-NEXT: 96B dead undef %4.sub_vsx1:vsrprc = COPY %0:vsrc
+# CHECK-NEXT: 112B dead undef %5.sub_vsx0:accrc = COPY %0:vsrc
+# CHECK-NEXT: 128B dead undef %6.sub_vsx1:accrc = COPY %0:vsrc
+# CHECK-NEXT: 144B dead undef %7.sub_64:vsrprc = COPY %float:fprc
+# CHECK-NEXT: 160B dead undef %8.sub_vsx1_then_sub_64:vsrprc = COPY %float:fprc
+# CHECK-NEXT: 176B dead undef %9.sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 192B dead undef %10.sub_vsx1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 208B dead undef %11.sub_pair1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 224B dead undef %12.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 240B dead undef %13.sub_pair0:accrc = COPY %15:vsrprc
+# CHECK-NEXT: 256B dead undef %14.sub_pair1:accrc = COPY %15:vsrprc
+
+
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $v2, $v3
+ %0:vsrc = COPY $v2
+ %float:fprc = COPY %0.sub_64
+ undef %pair.sub_vsx0:vsrprc = COPY $v2
+ undef %pair.sub_vsx1:vsrprc = COPY $v3
+ undef %1.sub_vsx0:vsrprc = COPY %0
+ undef %2.sub_vsx1:vsrprc = COPY %0
+ undef %3.sub_vsx0:accrc = COPY %0
+ undef %4.sub_vsx1:accrc = COPY %0
+ undef %5.sub_64:vsrprc = COPY %float
+ undef %6.sub_vsx1_then_sub_64:vsrprc = COPY %float
+ undef %7.sub_64:accrc = COPY %float
+ undef %8.sub_vsx1_then_sub_64:accrc = COPY %float
+ undef %9.sub_pair1_then_sub_64:accrc = COPY %float
+ undef %10.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float
+ undef %11.sub_pair0:accrc = COPY %pair
+ undef %12.sub_pair1:accrc = COPY %pair
+...
More information about the llvm-commits
mailing list