[llvm] [WIP][PowerPC] Add phony subregisters to cover the high half of the VSX registers. (PR #94628)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 6 08:16:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Stefan Pintilie (stefanp-ibm)
<details>
<summary>Changes</summary>
On PowerPC we have VSX registers which overlap with floating point registers.
However, the floating point registers only overlap with half of each VSX
register while the other half is never used alone. This patch adds phony
registers for the other half of the VSX registers in order to fully cover them
and to make sure that the lane masks are not the same for the VSX and the
floating point register.
Note: This patch is still Work in Progress as there are a number of LIT
failures that need to be investigated.
---
Full diff: https://github.com/llvm/llvm-project/pull/94628.diff
4 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp (+85-1)
- (modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.td (+22-9)
- (modified) llvm/test/CodeGen/PowerPC/frem.ll (-5)
- (added) llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir (+63)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9e8da59615df..a2d089f32cab 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -435,7 +435,91 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- assert(checkAllSuperRegsMarked(Reserved));
+ // Mark phony regsiters for the VSR high bits as reserved so that they are
+ // not used.
+ Reserved.set(PPC::FH0);
+ Reserved.set(PPC::FH1);
+ Reserved.set(PPC::FH2);
+ Reserved.set(PPC::FH3);
+ Reserved.set(PPC::FH4);
+ Reserved.set(PPC::FH5);
+ Reserved.set(PPC::FH6);
+ Reserved.set(PPC::FH7);
+ Reserved.set(PPC::FH8);
+ Reserved.set(PPC::FH9);
+ Reserved.set(PPC::FH10);
+ Reserved.set(PPC::FH11);
+ Reserved.set(PPC::FH12);
+ Reserved.set(PPC::FH13);
+ Reserved.set(PPC::FH14);
+ Reserved.set(PPC::FH15);
+ Reserved.set(PPC::FH16);
+ Reserved.set(PPC::FH17);
+ Reserved.set(PPC::FH18);
+ Reserved.set(PPC::FH19);
+ Reserved.set(PPC::FH20);
+ Reserved.set(PPC::FH21);
+ Reserved.set(PPC::FH22);
+ Reserved.set(PPC::FH23);
+ Reserved.set(PPC::FH24);
+ Reserved.set(PPC::FH25);
+ Reserved.set(PPC::FH26);
+ Reserved.set(PPC::FH27);
+ Reserved.set(PPC::FH28);
+ Reserved.set(PPC::FH29);
+ Reserved.set(PPC::FH30);
+ Reserved.set(PPC::FH31);
+
+ Reserved.set(PPC::VFH0);
+ Reserved.set(PPC::VFH1);
+ Reserved.set(PPC::VFH2);
+ Reserved.set(PPC::VFH3);
+ Reserved.set(PPC::VFH4);
+ Reserved.set(PPC::VFH5);
+ Reserved.set(PPC::VFH6);
+ Reserved.set(PPC::VFH7);
+ Reserved.set(PPC::VFH8);
+ Reserved.set(PPC::VFH9);
+ Reserved.set(PPC::VFH10);
+ Reserved.set(PPC::VFH11);
+ Reserved.set(PPC::VFH12);
+ Reserved.set(PPC::VFH13);
+ Reserved.set(PPC::VFH14);
+ Reserved.set(PPC::VFH15);
+ Reserved.set(PPC::VFH16);
+ Reserved.set(PPC::VFH17);
+ Reserved.set(PPC::VFH18);
+ Reserved.set(PPC::VFH19);
+ Reserved.set(PPC::VFH20);
+ Reserved.set(PPC::VFH21);
+ Reserved.set(PPC::VFH22);
+ Reserved.set(PPC::VFH23);
+ Reserved.set(PPC::VFH24);
+ Reserved.set(PPC::VFH25);
+ Reserved.set(PPC::VFH26);
+ Reserved.set(PPC::VFH27);
+ Reserved.set(PPC::VFH28);
+ Reserved.set(PPC::VFH29);
+ Reserved.set(PPC::VFH30);
+ Reserved.set(PPC::VFH31);
+
+ assert(checkAllSuperRegsMarked(Reserved,
+ {PPC::FH0, PPC::FH1, PPC::FH2, PPC::FH3,
+ PPC::FH4, PPC::FH5, PPC::FH6, PPC::FH7,
+ PPC::FH8, PPC::FH9, PPC::FH10, PPC::FH11,
+ PPC::FH12, PPC::FH13, PPC::FH14, PPC::FH15,
+ PPC::FH16, PPC::FH17, PPC::FH18, PPC::FH19,
+ PPC::FH20, PPC::FH21, PPC::FH22, PPC::FH23,
+ PPC::FH24, PPC::FH25, PPC::FH26, PPC::FH27,
+ PPC::FH28, PPC::FH29, PPC::FH30, PPC::FH31,
+ PPC::VFH0, PPC::VFH1, PPC::VFH2, PPC::VFH3,
+ PPC::VFH4, PPC::VFH5, PPC::VFH6, PPC::VFH7,
+ PPC::VFH8, PPC::VFH9, PPC::VFH10, PPC::VFH11,
+ PPC::VFH12, PPC::VFH13, PPC::VFH14, PPC::VFH15,
+ PPC::VFH16, PPC::VFH17, PPC::VFH18, PPC::VFH19,
+ PPC::VFH20, PPC::VFH21, PPC::VFH22, PPC::VFH23,
+ PPC::VFH24, PPC::VFH25, PPC::VFH26, PPC::VFH27,
+ PPC::VFH28, PPC::VFH29, PPC::VFH30, PPC::VFH31}));
return Reserved;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8a37e40414ee..30e936a157e0 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
def sub_32_hi_phony : SubRegIndex<32,32>;
def sub_64 : SubRegIndex<64>;
+def sub_64_hi_phony : SubRegIndex<64,64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
def sub_gp8_x0 : SubRegIndex<64>;
@@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> {
}
// VR - One of the 32 128-bit vector registers
-class VR<VF SubReg, string n> : PPCReg<n> {
+class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> {
let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
let HWEncoding{5} = 0;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
+ let SubRegs = [SubReg, SubRegH];
+ let SubRegIndices = [sub_64, sub_64_hi_phony];
}
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
// floating-point registers.
-class VSRL<FPR SubReg, string n> : PPCReg<n> {
+class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
+ let SubRegs = [SubReg, SubRegH];
+ let SubRegIndices = [sub_64, sub_64_hi_phony];
}
// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
@@ -155,6 +156,12 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+let isArtificial = 1 in {
+ foreach Index = 0-31 in {
+ def FH#Index : FPR<-1, "">;
+ }
+}
+
// Floating-point pair registers
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
def Fpair#Index : FPPair<"fp"#Index, Index>;
@@ -168,15 +175,21 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+let isArtificial = 1 in {
+ foreach Index = 0-31 in {
+ def VFH#Index : VF<-1, "">;
+ }
+}
+
// Vector registers
foreach Index = 0-31 in {
- def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
- DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+ def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>,
+ DwarfRegNum<[!add(Index, 77)]>;
}
// VSX registers
foreach Index = 0-31 in {
- def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>,
DwarfRegAlias<!cast<FPR>("F"#Index)>;
}
diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 8cb68e60f7f9..19b4b1c9cdf9 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -70,7 +70,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 1, 61
; CHECK-NEXT: xscvspdpn 1, 62
; CHECK-NEXT: xscvspdpn 2, 63
@@ -84,7 +83,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 61, 1
; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,11 +122,8 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: xscpsgndp 61, 1, 1
; CHECK-NEXT: xxswapd 1, 62
; CHECK-NEXT: xxswapd 2, 63
-; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; CHECK-NEXT: bl fmod
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 34, 61, 1
; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
new file mode 100644
index 000000000000..28a4d0347f10
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
@@ -0,0 +1,63 @@
+# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \
+# RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \
+# RUN: -debug-only=regalloc -o - %s 2>&1 >/dev/null | FileCheck %s
+
+# Keep track of all of the lanemasks for various subregsiters.
+#
+# CHECK: %3 [80r,80d:0) 0 at 80r L000000000000000C [80r,80d:0) 0 at 80r weight:0.000000e+00
+# CHECK: %4 [96r,96d:0) 0 at 96r L0000000000003000 [96r,96d:0) 0 at 96r weight:0.000000e+00
+# CHECK: %5 [112r,112d:0) 0 at 112r L000000000000000C [112r,112d:0) 0 at 112r weight:0.000000e+00
+# CHECK: %6 [128r,128d:0) 0 at 128r L0000000000003000 [128r,128d:0) 0 at 128r weight:0.000000e+00
+# CHECK: %7 [144r,144d:0) 0 at 144r L0000000000000004 [144r,144d:0) 0 at 144r weight:0.000000e+00
+# CHECK: %8 [160r,160d:0) 0 at 160r L0000000000001000 [160r,160d:0) 0 at 160r weight:0.000000e+00
+# CHECK: %9 [176r,176d:0) 0 at 176r L0000000000000004 [176r,176d:0) 0 at 176r weight:0.000000e+00
+# CHECK: %10 [192r,192d:0) 0 at 192r L0000000000001000 [192r,192d:0) 0 at 192r weight:0.000000e+00
+# CHECK: %11 [208r,208d:0) 0 at 208r L0000000000004000 [208r,208d:0) 0 at 208r weight:0.000000e+00
+# CHECK: %12 [224r,224d:0) 0 at 224r L0000000000010000 [224r,224d:0) 0 at 224r weight:0.000000e+00
+# CHECK: %13 [240r,240d:0) 0 at 240r L000000000000300C [240r,240d:0) 0 at 240r weight:0.000000e+00
+# CHECK: %14 [256r,256d:0) 0 at 256r L000000000003C000 [256r,256d:0) 0 at 256r weight:0.000000e+00
+
+
+# CHECK: 0B bb.0
+# CHECK-NEXT: liveins
+# CHECK-NEXT: 16B %0:vsrc = COPY $v2
+# CHECK-NEXT: 32B %float:fprc = COPY %0.sub_64:vsrc
+# CHECK-NEXT: 48B dead undef %pair.sub_vsx0:vsrprc = COPY $v2
+# CHECK-NEXT: 64B undef %15.sub_vsx1:vsrprc = COPY $v3
+# CHECK-NEXT: 80B dead undef %3.sub_vsx0:vsrprc = COPY %0:vsrc
+# CHECK-NEXT: 96B dead undef %4.sub_vsx1:vsrprc = COPY %0:vsrc
+# CHECK-NEXT: 112B dead undef %5.sub_vsx0:accrc = COPY %0:vsrc
+# CHECK-NEXT: 128B dead undef %6.sub_vsx1:accrc = COPY %0:vsrc
+# CHECK-NEXT: 144B dead undef %7.sub_64:vsrprc = COPY %float:fprc
+# CHECK-NEXT: 160B dead undef %8.sub_vsx1_then_sub_64:vsrprc = COPY %float:fprc
+# CHECK-NEXT: 176B dead undef %9.sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 192B dead undef %10.sub_vsx1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 208B dead undef %11.sub_pair1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 224B dead undef %12.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float:fprc
+# CHECK-NEXT: 240B dead undef %13.sub_pair0:accrc = COPY %15:vsrprc
+# CHECK-NEXT: 256B dead undef %14.sub_pair1:accrc = COPY %15:vsrprc
+
+
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $v2, $v3
+ %0:vsrc = COPY $v2
+ %float:fprc = COPY %0.sub_64
+ undef %pair.sub_vsx0:vsrprc = COPY $v2
+ undef %pair.sub_vsx1:vsrprc = COPY $v3
+ undef %1.sub_vsx0:vsrprc = COPY %0
+ undef %2.sub_vsx1:vsrprc = COPY %0
+ undef %3.sub_vsx0:accrc = COPY %0
+ undef %4.sub_vsx1:accrc = COPY %0
+ undef %5.sub_64:vsrprc = COPY %float
+ undef %6.sub_vsx1_then_sub_64:vsrprc = COPY %float
+ undef %7.sub_64:accrc = COPY %float
+ undef %8.sub_vsx1_then_sub_64:accrc = COPY %float
+ undef %9.sub_pair1_then_sub_64:accrc = COPY %float
+ undef %10.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float
+ undef %11.sub_pair0:accrc = COPY %pair
+ undef %12.sub_pair1:accrc = COPY %pair
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/94628
More information about the llvm-commits
mailing list