[llvm] r316366 - [PowerPC] Try to simplify a Swap if it feeds a Splat
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 23 12:33:31 PDT 2017
Author: stefanp
Date: Mon Oct 23 12:33:31 2017
New Revision: 316366
URL: http://llvm.org/viewvc/llvm-project?rev=316366&view=rev
Log:
[PowerPC] Try to simplify a Swap if it feeds a Splat
If we have the situation where a Swap feeds a Splat we can sometimes change the
index on the Splat and then remove the Swap instruction.
Differential Revision: https://reviews.llvm.org/D39009
Added:
llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=316366&r1=316365&r2=316366&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Mon Oct 23 12:33:31 2017
@@ -375,6 +375,53 @@ bool PPCMIPeephole::simplifyCode(void) {
MI.getOperand(2).setImm(NewElem);
}
}
+
+ // Splat is fed by a SWAP which is a permute of this form
+ // XXPERMDI %VA, %VA, 2
+ // Since the splat instruction can use any of the vector elements to do
+ // the splat we do not have to rearrange the elements in the vector
+ // with a swap before we do the splat. We can simply do the splat from
+ // a different index.
+ // If the swap has only one use (the splat) then we can completely
+ // remove the swap too.
+ if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) {
+ unsigned SwapRes = DefMI->getOperand(0).getReg();
+ unsigned SwapOp1 = DefMI->getOperand(1).getReg();
+ unsigned SwapOp2 = DefMI->getOperand(2).getReg();
+ unsigned SwapImm = DefMI->getOperand(3).getImm();
+ unsigned SplatImm = MI.getOperand(1).getImm();
+
+ // Break if this permute is not a swap.
+ if (SwapOp1 != SwapOp2 || SwapImm != 2)
+ break;
+
+ unsigned NewElem = 0;
+ // Compute the new index to use for the splat.
+ if (MI.getOpcode() == PPC::VSPLTB)
+ NewElem = (SplatImm + 8) & 0xF;
+ else if (MI.getOpcode() == PPC::VSPLTH)
+ NewElem = (SplatImm + 4) & 0x7;
+ else if (MI.getOpcode() == PPC::XXSPLTW)
+ NewElem = (SplatImm + 2) & 0x3;
+ else {
+ DEBUG(dbgs() << "Unknown splat opcode.");
+ DEBUG(MI.dump());
+ break;
+ }
+
+ if (MRI->hasOneNonDBGUse(SwapRes)) {
+ DEBUG(dbgs() << "Removing redundant swap: ");
+ DEBUG(DefMI->dump());
+ ToErase = DefMI;
+ }
+ Simplified = true;
+ DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
+ " to " << NewElem << " in instruction: ");
+ DEBUG(MI.dump());
+ MI.getOperand(1).setImm(NewElem);
+ MI.getOperand(2).setReg(SwapOp1);
+ }
+
break;
}
case PPC::XVCVDPSP: {
Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll?rev=316366&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll Mon Oct 23 12:33:31 2017
@@ -0,0 +1,134 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8
+
+; The strightforward expansion of this code will result in a swap followed by a
+; splat. However, the swap is not needed since in this case the splat is the
+; only use.
+; We want to check that we are not using the swap and that we have indexed the
+; splat to the correct location.
+; 8 Bit Signed Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr {
+entry:
+ %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
+ %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
+ %add = add <16 x i8> %splat.splat.i, %v
+ ret <16 x i8> %add
+; CHECK-LABEL: splat_8_plus
+; CHECK-NOT: xxswapd
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_8_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK-PWR8: blr
+}
+
+; 8 Bit Unsigned Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr {
+entry:
+ %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
+ %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
+ %add = add <16 x i8> %splat.splat.i, %v
+ ret <16 x i8> %add
+; CHECK-LABEL: splat_u8_plus
+; CHECK-NOT: xxswapd
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u8_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK-PWR8: blr
+}
+
+; 16 Bit Signed Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr {
+entry:
+ %0 = shl i16 %c, 8
+ %conv.i = ashr exact i16 %0, 8
+ %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
+ %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %add = add <8 x i16> %splat.splat.i, %v
+ ret <8 x i16> %add
+; CHECK-LABEL: splat_16_plus
+; CHECK-NOT: xxswapd
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_16_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK-PWR8: blr
+}
+
+; 16 Bit Unsigned Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr {
+entry:
+ %0 = shl i16 %c, 8
+ %conv.i = ashr exact i16 %0, 8
+ %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
+ %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %add = add <8 x i16> %splat.splat.i, %v
+ ret <8 x i16> %add
+; CHECK-LABEL: splat_u16_plus
+; CHECK-NOT: xxswapd
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u16_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK-PWR8: blr
+}
+
+; 32 Bit Signed Version of the test.
+; The 32 bit examples work differently than the 8 and 16 bit versions of the
+; test. On Power 9 we have the mtvsrws instruction that does both the move to
+; register and the splat so it does not really test the newly implemented code.
+; On Power 9 for the 32 bit case we don't need the new simplification. It is
+; just here for completeness.
+; Function Attrs: norecurse nounwind readnone
+define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr {
+entry:
+ %sext = shl i32 %c, 24
+ %conv.i = ashr exact i32 %sext, 24
+ %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
+ %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %add = add <4 x i32> %splat.splat.i, %v
+ ret <4 x i32> %add
+; CHECK-LABEL: splat_32_plus
+; CHECK-NOT: xxswapd
+; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_32_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-PWR8: blr
+}
+
+; 32 Bit Unsigned Version of the test.
+; The 32 bit examples work differently than the 8 and 16 bit versions of the
+; test. On Power 9 we have the mtvsrws instruction that does both the move to
+; register and the splat so it does not really test the newly implemented code.
+; On Power 9 for the 32 bit case we don't need the new simplification. It is
+; just here for completeness.
+; Function Attrs: norecurse nounwind readnone
+define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr {
+entry:
+ %sext = shl i32 %c, 24
+ %conv.i = ashr exact i32 %sext, 24
+ %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
+ %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %add = add <4 x i32> %splat.splat.i, %v
+ ret <4 x i32> %add
+; CHECK-LABEL: splat_u32_plus
+; CHECK-NOT: xxswapd
+; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u32_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-PWR8: blr
+}
+
More information about the llvm-commits
mailing list