[llvm] 78406ac - [PowerPC][P10] Add Vector pair calling convention
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 15 12:08:47 PDT 2022
Author: Stefan Pintilie
Date: 2022-03-15T14:08:42-05:00
New Revision: 78406ac8985bcefcf38d00c6fd112067cc773d96
URL: https://github.com/llvm/llvm-project/commit/78406ac8985bcefcf38d00c6fd112067cc773d96
DIFF: https://github.com/llvm/llvm-project/commit/78406ac8985bcefcf38d00c6fd112067cc773d96.diff
LOG: [PowerPC][P10] Add Vector pair calling convention
Add the calling convention for the vector pair registers.
These registers overlap with the vector registers.
Part of an original patch by: Lei Huang
Reviewed By: nemanjai, #powerpc
Differential Revision: https://reviews.llvm.org/D117225
Added:
Modified:
llvm/lib/Target/PowerPC/PPCCallingConv.td
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 1e81276f1de35..1901e8d1ebf10 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
(sequence "VSL%u", 0, 19))>;
+
+def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>;
+
+def CSR_VSRP :
+ CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>;
+
+def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>;
+
+def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>;
+
+def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>;
+
+def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec,
+ (sub CSR_ALL_VSRP, VSRp17))>;
+
+def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
+ (sub CSR_ALL_VSRP, VSRp17))>;
+
+def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>;
+
+def CSR_64_AllRegs_VSRP :
+ CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>;
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65c969c196e1d..42e7b9684f2d0 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1974,6 +1974,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ // Do not explicitly save the callee saved VSRp registers.
+ // The individual VSR subregisters will be saved instead.
+ SavedRegs.reset(PPC::VSRp26);
+ SavedRegs.reset(PPC::VSRp27);
+ SavedRegs.reset(PPC::VSRp28);
+ SavedRegs.reset(PPC::VSRp29);
+ SavedRegs.reset(PPC::VSRp30);
+ SavedRegs.reset(PPC::VSRp31);
+
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 76b016c0ee792..4896591c338cf 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -183,6 +183,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX()) {
+ if (Subtarget.pairedVectorMemops())
+ return CSR_64_AllRegs_VSRP_SaveList;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList;
return CSR_64_AllRegs_VSX_SaveList;
@@ -210,6 +212,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isAIXABI())
report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
+ if (Subtarget.pairedVectorMemops())
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList
+ : CSR_SVR64_ColdCC_VSRP_SaveList;
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
: CSR_SVR64_ColdCC_Altivec_SaveList;
@@ -217,7 +222,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_SVR64_ColdCC_SaveList;
}
// 32-bit targets.
- if (Subtarget.hasAltivec())
+ if (Subtarget.pairedVectorMemops())
+ return CSR_SVR32_ColdCC_VSRP_SaveList;
+ else if (Subtarget.hasAltivec())
return CSR_SVR32_ColdCC_Altivec_SaveList;
else if (Subtarget.hasSPE())
return CSR_SVR32_ColdCC_SPE_SaveList;
@@ -225,6 +232,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
// Standard calling convention CSRs.
if (TM.isPPC64()) {
+ if (Subtarget.pairedVectorMemops())
+ return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList;
if (Subtarget.hasAltivec() &&
(!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) {
return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
@@ -239,6 +248,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_AIX32_SaveList;
return CSR_AIX32_SaveList;
}
+ if (Subtarget.pairedVectorMemops())
+ return CSR_SVR432_VSRP_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -252,6 +263,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX()) {
+ if (Subtarget.pairedVectorMemops())
+ return CSR_64_AllRegs_VSRP_RegMask;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask;
return CSR_64_AllRegs_VSX_RegMask;
@@ -275,20 +288,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
if (CC == CallingConv::Cold) {
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
- : CSR_SVR64_ColdCC_RegMask)
- : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
- : (Subtarget.hasSPE()
- ? CSR_SVR32_ColdCC_SPE_RegMask
- : CSR_SVR32_ColdCC_RegMask));
+ if (TM.isPPC64())
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR64_ColdCC_VSRP_RegMask
+ : (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
+ : CSR_SVR64_ColdCC_RegMask);
+ else
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR32_ColdCC_VSRP_RegMask
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR32_ColdCC_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask
+ : CSR_SVR32_ColdCC_RegMask));
}
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
- : CSR_PPC64_RegMask)
- : (Subtarget.hasAltivec()
- ? CSR_SVR432_Altivec_RegMask
- : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
- : CSR_SVR432_RegMask));
+ if (TM.isPPC64())
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR464_VSRP_RegMask
+ : (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask);
+ else
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR432_VSRP_RegMask
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR432_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index 11a06034e3844..d51df192b5d15 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -13,23 +13,29 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-LABEL: intrinsics1:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -176(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 176
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -176(r1)
-; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
-; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
-; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
-; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
-; CHECK-NEXT: xxlor vs0, v2, v2
-; CHECK-NEXT: xxlor vs1, v3, v3
-; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
-; CHECK-NEXT: xxlor vs2, v4, v4
-; CHECK-NEXT: xxlor vs3, v5, v5
+; CHECK-NEXT: .cfi_offset v28, -80
+; CHECK-NEXT: .cfi_offset v29, -64
+; CHECK-NEXT: .cfi_offset v30, -48
+; CHECK-NEXT: .cfi_offset v31, -32
+; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v29, v3
+; CHECK-NEXT: vmr v28, v2
+; CHECK-NEXT: xxlor vs0, v28, v28
+; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v5
+; CHECK-NEXT: vmr v30, v4
+; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: xxlor vs1, v29, v29
+; CHECK-NEXT: xxlor vs2, v30, v30
+; CHECK-NEXT: xxlor vs3, v31, v31
; CHECK-NEXT: ld r30, 272(r1)
-; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
@@ -39,17 +45,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: lxvp vsp0, 64(r1)
; CHECK-NEXT: lxvp vsp2, 32(r1)
; CHECK-NEXT: xxmtacc acc0
-; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload
-; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload
-; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
+; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
+; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r30)
; CHECK-NEXT: stxv vs1, 32(r30)
; CHECK-NEXT: stxv vs2, 16(r30)
; CHECK-NEXT: stxv vs3, 0(r30)
+; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 176
; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
@@ -61,17 +69,23 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
; CHECK-BE-NEXT: .cfi_offset lr, 16
; CHECK-BE-NEXT: .cfi_offset r30, -16
+; CHECK-BE-NEXT: .cfi_offset v28, -80
+; CHECK-BE-NEXT: .cfi_offset v29, -64
+; CHECK-BE-NEXT: .cfi_offset v30, -48
+; CHECK-BE-NEXT: .cfi_offset v31, -32
+; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: vmr v29, v3
+; CHECK-BE-NEXT: vmr v28, v2
+; CHECK-BE-NEXT: xxlor vs0, v28, v28
+; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: vmr v31, v5
+; CHECK-BE-NEXT: vmr v30, v4
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
-; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
-; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
-; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
-; CHECK-BE-NEXT: xxlor vs0, v2, v2
-; CHECK-BE-NEXT: xxlor vs1, v3, v3
-; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
-; CHECK-BE-NEXT: xxlor vs2, v4, v4
-; CHECK-BE-NEXT: xxlor vs3, v5, v5
-; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
+; CHECK-BE-NEXT: xxlor vs1, v29, v29
+; CHECK-BE-NEXT: xxlor vs2, v30, v30
+; CHECK-BE-NEXT: xxlor vs3, v31, v31
; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
@@ -83,9 +97,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
; CHECK-BE-NEXT: xxmtacc acc0
-; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload
-; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload
-; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
+; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
+; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r30)
; CHECK-BE-NEXT: stxv vs0, 0(r30)
More information about the llvm-commits
mailing list