[llvm] 821a8f6 - [PowerPC] Fix spilling of paired VSX registers
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 15 12:13:26 PDT 2021
Author: Nemanja Ivanovic
Date: 2021-06-15T14:13:17-05:00
New Revision: 821a8f680eec356be5894559bcea3f718e8bedd4
URL: https://github.com/llvm/llvm-project/commit/821a8f680eec356be5894559bcea3f718e8bedd4
DIFF: https://github.com/llvm/llvm-project/commit/821a8f680eec356be5894559bcea3f718e8bedd4.diff
LOG: [PowerPC] Fix spilling of paired VSX registers
We have added STXVP/LXVP for spilling and restoring the registers
but we neglected to add FI elimination code for these. The result
is that we end up producing impossible MachineInstr's that have
register operands in place of immediates.
Added:
llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
Modified:
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 8adef376149d..bc950a73beb6 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -133,6 +133,12 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
ImmToIdxMap[PPC::EVSTDD] = PPC::EVSTDDX;
ImmToIdxMap[PPC::SPESTW] = PPC::SPESTWX;
ImmToIdxMap[PPC::SPELWZ] = PPC::SPELWZX;
+
+ // Power10
+ ImmToIdxMap[PPC::LXVP] = PPC::LXVPX;
+ ImmToIdxMap[PPC::STXVP] = PPC::STXVPX;
+ ImmToIdxMap[PPC::PLXVP] = PPC::LXVPX;
+ ImmToIdxMap[PPC::PSTXVP] = PPC::STXVPX;
}
/// getPointerRegClass - Return the register class to use to hold pointers.
@@ -1243,6 +1249,8 @@ static unsigned offsetMinAlignForOpcode(unsigned OpC) {
case PPC::LXV:
case PPC::STXV:
case PPC::LQ:
+ case PPC::LXVP:
+ case PPC::STXVP:
return 16;
}
}
@@ -1370,6 +1378,16 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += MFI.getStackSize();
}
+ // If we encounter an LXVP/STXVP with an offset that doesn't fit, we can
+ // transform it to the prefixed version so we don't have to use the XForm.
+ if ((OpC == PPC::LXVP || OpC == PPC::STXVP) &&
+ (!isInt<16>(Offset) || (Offset % offsetMinAlign(MI)) != 0) &&
+ Subtarget.hasPrefixInstrs()) {
+ unsigned NewOpc = OpC == PPC::LXVP ? PPC::PLXVP : PPC::PSTXVP;
+ MI.setDesc(TII.get(NewOpc));
+ OpC = NewOpc;
+ }
+
// If we can, encode the offset directly into the instruction. If this is a
// normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
// this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
@@ -1378,9 +1396,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
+ // FIXME: This should be factored out to a separate function as prefixed
+ // instructions add a number of opcodes for which we can use 34-bit imm.
bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
isUInt<8>(Offset) :
isInt<16>(Offset);
+ if (OpC == PPC::PLXVP || OpC == PPC::PSTXVP)
+ OffsetFitsMnemonic = isInt<34>(Offset);
if (!noImmForm && ((OffsetFitsMnemonic &&
((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index 1285b6b61a3d..f5e7e8a43a02 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -19,35 +19,27 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -176(r1)
-; CHECK-NEXT: li r3, 128
; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs1, v3, v3
+; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
; CHECK-NEXT: ld r30, 272(r1)
-; CHECK-NEXT: stxvp vsp34, r1(r3) # 32-byte Folded Spill
-; CHECK-NEXT: li r3, 96
+; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxlor vs2, v4, v4
; CHECK-NEXT: xxlor vs3, v5, v5
-; CHECK-NEXT: stxvp vsp36, r1(r3) # 32-byte Folded Spill
; CHECK-NEXT: xxmtacc acc0
-; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
-; CHECK-NEXT: stxvp vsp0, r1(r3)
-; CHECK-NEXT: li r3, 32
-; CHECK-NEXT: stxvp vsp2, r1(r3)
+; CHECK-NEXT: stxvp vsp0, 64(r1)
+; CHECK-NEXT: stxvp vsp2, 32(r1)
; CHECK-NEXT: bl foo at notoc
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lxvp vsp0, r1(r3)
-; CHECK-NEXT: li r3, 32
-; CHECK-NEXT: lxvp vsp2, r1(r3)
-; CHECK-NEXT: li r3, 128
-; CHECK-NEXT: lxvp vsp4, r1(r3) # 32-byte Folded Reload
-; CHECK-NEXT: li r3, 96
-; CHECK-NEXT: lxvp vsp6, r1(r3) # 32-byte Folded Reload
+; CHECK-NEXT: lxvp vsp0, 64(r1)
+; CHECK-NEXT: lxvp vsp2, 32(r1)
+; CHECK-NEXT: lxvp vsp4, 128(r1) # 32-byte Folded Reload
+; CHECK-NEXT: lxvp vsp6, 96(r1) # 32-byte Folded Reload
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, vs4, vs6
; CHECK-NEXT: xxmfacc acc0
@@ -69,37 +61,29 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
; CHECK-BE-NEXT: .cfi_offset lr, 16
; CHECK-BE-NEXT: .cfi_offset r30, -16
-; CHECK-BE-NEXT: li r3, 208
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: xxlor vs0, v2, v2
-; CHECK-BE-NEXT: ld r30, 368(r1)
-; CHECK-BE-NEXT: stxvp vsp34, r1(r3) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxlor vs1, v3, v3
-; CHECK-BE-NEXT: li r3, 176
+; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
+; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxlor vs2, v4, v4
; CHECK-BE-NEXT: xxlor vs3, v5, v5
-; CHECK-BE-NEXT: stxvp vsp36, r1(r3) # 32-byte Folded Spill
+; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxmtacc acc0
-; CHECK-BE-NEXT: li r3, 112
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-BE-NEXT: xxmfacc acc0
-; CHECK-BE-NEXT: stxvp vsp0, r1(r3)
-; CHECK-BE-NEXT: li r3, 144
-; CHECK-BE-NEXT: stxvp vsp2, r1(r3)
+; CHECK-BE-NEXT: stxvp vsp0, 112(r1)
+; CHECK-BE-NEXT: stxvp vsp2, 144(r1)
; CHECK-BE-NEXT: bl foo
; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: li r3, 112
-; CHECK-BE-NEXT: lxvp vsp0, r1(r3)
-; CHECK-BE-NEXT: li r3, 144
-; CHECK-BE-NEXT: lxvp vsp2, r1(r3)
-; CHECK-BE-NEXT: li r3, 208
-; CHECK-BE-NEXT: lxvp vsp4, r1(r3) # 32-byte Folded Reload
-; CHECK-BE-NEXT: li r3, 176
-; CHECK-BE-NEXT: lxvp vsp6, r1(r3) # 32-byte Folded Reload
+; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
+; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
+; CHECK-BE-NEXT: lxvp vsp4, 208(r1) # 32-byte Folded Reload
+; CHECK-BE-NEXT: lxvp vsp6, 176(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, vs4, vs6
; CHECK-BE-NEXT: xxmfacc acc0
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 0207d121369d..1a8d2ece5e64 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -588,16 +588,12 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-NEXT: stxv vs2, 16(r3)
; CHECK-NEXT: stxv vs3, 0(r3)
; CHECK-NEXT: xvf32gerpp acc1, v2, v2
-; CHECK-NEXT: li r3, 64
; CHECK-NEXT: xxmfacc acc1
-; CHECK-NEXT: stxvp vsp4, r1(r3)
-; CHECK-NEXT: li r3, 32
-; CHECK-NEXT: stxvp vsp6, r1(r3)
+; CHECK-NEXT: stxvp vsp4, 64(r1)
+; CHECK-NEXT: stxvp vsp6, 32(r1)
; CHECK-NEXT: bl testRedundantPrimeUnprimeF at notoc
-; CHECK-NEXT: li r3, 64
-; CHECK-NEXT: lxvp vsp0, r1(r3)
-; CHECK-NEXT: li r3, 32
-; CHECK-NEXT: lxvp vsp2, r1(r3)
+; CHECK-NEXT: lxvp vsp0, 64(r1)
+; CHECK-NEXT: lxvp vsp2, 32(r1)
; CHECK-NEXT: stxv vs0, 112(r30)
; CHECK-NEXT: stxv vs1, 96(r30)
; CHECK-NEXT: stxv vs2, 80(r30)
@@ -623,17 +619,13 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2
-; CHECK-BE-NEXT: li r3, 112
; CHECK-BE-NEXT: xxmfacc acc1
-; CHECK-BE-NEXT: stxvp vsp4, r1(r3)
-; CHECK-BE-NEXT: li r3, 144
-; CHECK-BE-NEXT: stxvp vsp6, r1(r3)
+; CHECK-BE-NEXT: stxvp vsp4, 112(r1)
+; CHECK-BE-NEXT: stxvp vsp6, 144(r1)
; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF
; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: li r3, 112
-; CHECK-BE-NEXT: lxvp vsp0, r1(r3)
-; CHECK-BE-NEXT: li r3, 144
-; CHECK-BE-NEXT: lxvp vsp2, r1(r3)
+; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
+; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
; CHECK-BE-NEXT: stxv vs3, 112(r30)
; CHECK-BE-NEXT: stxv vs2, 96(r30)
; CHECK-BE-NEXT: stxv vs1, 80(r30)
diff --git a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
new file mode 100644
index 000000000000..31043e6fee13
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE
+define dso_local void @test(<256 x i1>* %vpp, <256 x i1>* %vp2) local_unnamed_addr #0 {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: stdu r1, -400(r1)
+; CHECK-NEXT: stfd f14, 256(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f15, 264(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f16, 272(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f17, 280(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f18, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f19, 296(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f20, 304(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f21, 312(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f22, 320(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f23, 328(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f24, 336(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f25, 344(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f26, 352(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f27, 360(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f28, 368(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f29, 376(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stfd f30, 384(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f31, 392(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lxvp vsp0, 0(r3)
+; CHECK-NEXT: stxvp vsp0, 32(r1) # 32-byte Folded Spill
+; CHECK-NEXT: #APP
+; CHECK-NEXT: nop
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lxvp vsp0, 32(r1) # 32-byte Folded Reload
+; CHECK-NEXT: stxvp vsp0, 0(r4)
+; CHECK-NEXT: lxv v31, 240(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 224(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v29, 208(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v28, 192(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v27, 176(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v26, 160(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v25, 144(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v24, 128(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v23, 112(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v22, 96(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v21, 80(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v20, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lfd f31, 392(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f30, 384(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f29, 376(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f28, 368(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f27, 360(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f26, 352(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f25, 344(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f24, 336(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f23, 328(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f22, 320(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f21, 312(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f20, 304(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f19, 296(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f18, 288(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f17, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f16, 272(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f15, 264(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f14, 256(r1) # 8-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 400
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: stdu r1, -416(r1)
+; CHECK-BE-NEXT: stfd f14, 272(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f15, 280(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f16, 288(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f17, 296(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f18, 304(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f19, 312(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f20, 320(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f21, 328(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f22, 336(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f23, 344(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f24, 352(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f25, 360(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f26, 368(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f27, 376(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f28, 384(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f29, 392(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stfd f30, 400(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stfd f31, 408(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT: stxvp vsp0, 48(r1) # 32-byte Folded Spill
+; CHECK-BE-NEXT: #APP
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: #NO_APP
+; CHECK-BE-NEXT: lxvp vsp0, 48(r1) # 32-byte Folded Reload
+; CHECK-BE-NEXT: stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT: lxv v31, 256(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v30, 240(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v29, 224(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v28, 208(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v27, 192(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v26, 176(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v25, 160(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v24, 144(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v23, 128(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v22, 112(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v21, 96(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v20, 80(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lfd f31, 408(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f30, 400(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f29, 392(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f28, 384(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f27, 376(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f26, 368(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f25, 360(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f24, 352(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f23, 344(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f22, 336(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f21, 328(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f20, 320(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f19, 312(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f18, 304(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f17, 296(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f16, 288(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f15, 280(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: lfd f14, 272(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: addi r1, r1, 416
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = bitcast <256 x i1>* %vpp to i8*
+ %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0)
+ tail call void asm sideeffect "nop", "~{memory},~{vs0},~{vs1},~{vs2},~{vs3},~{vs4},~{vs5},~{vs6},~{vs7},~{vs8},~{vs9},~{vs10},~{vs11},~{vs12},~{vs13},~{vs14},~{vs15},~{vs16},~{vs17},~{vs18},~{vs19},~{vs20},~{vs21},~{vs22},~{vs23},~{vs24},~{vs25},~{vs26},~{vs27},~{vs28},~{vs29},~{vs30},~{vs31},~{vs32},~{vs33},~{vs34},~{vs35},~{vs36},~{vs37},~{vs38},~{vs39},~{vs40},~{vs41},~{vs42},~{vs43},~{vs44},~{vs45},~{vs46},~{vs47},~{vs48},~{vs49},~{vs50},~{vs51},~{vs52},~{vs53},~{vs54},~{vs55},~{vs56},~{vs57},~{vs58},~{vs59},~{vs60},~{vs61},~{vs62},~{vs63}"()
+ %2 = bitcast <256 x i1>* %vp2 to i8*
+ tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %1, i8* %2)
+ ret void
+}
+
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) #1
+
+declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*) #2
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list