[llvm] r316478 - [PowerPC] Try to simplify a Swap if it feeds a Splat

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 10:44:27 PDT 2017


Author: stefanp
Date: Tue Oct 24 10:44:27 2017
New Revision: 316478

URL: http://llvm.org/viewvc/llvm-project?rev=316478&view=rev
Log:
[PowerPC] Try to simplify a Swap if it feeds a Splat

If we have the situation where a Swap feeds a Splat we can sometimes change the
  index on the Splat and then remove the Swap instruction.

Fixed the test case that was failing and recommit after pulling the original
  commit.

  Original revision is here: https://reviews.llvm.org/D39009

Added:
    llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=316478&r1=316477&r2=316478&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Tue Oct 24 10:44:27 2017
@@ -375,6 +375,53 @@ bool PPCMIPeephole::simplifyCode(void) {
             MI.getOperand(2).setImm(NewElem);
           }
         }
+
+        // Splat is fed by a SWAP which is a permute of this form
+        //  XXPERMDI %VA, %VA, 2
+        // Since the splat instruction can use any of the vector elements to do
+        //  the splat we do not have to rearrange the elements in the vector
+        //  with a swap before we do the splat. We can simply do the splat from
+        //  a different index.
+        // If the swap has only one use (the splat) then we can completely
+        //  remove the swap too.
+        if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) {
+          unsigned SwapRes = DefMI->getOperand(0).getReg();
+          unsigned SwapOp1 = DefMI->getOperand(1).getReg();
+          unsigned SwapOp2 = DefMI->getOperand(2).getReg();
+          unsigned SwapImm = DefMI->getOperand(3).getImm();
+          unsigned SplatImm = MI.getOperand(1).getImm();
+
+          // Break if this permute is not a swap.
+          if (SwapOp1 != SwapOp2 || SwapImm != 2)
+            break;
+
+          unsigned NewElem = 0;
+          // Compute the new index to use for the splat.
+          if (MI.getOpcode() == PPC::VSPLTB)
+            NewElem = (SplatImm + 8) & 0xF;
+          else if (MI.getOpcode() == PPC::VSPLTH)
+            NewElem = (SplatImm + 4) & 0x7;
+          else if (MI.getOpcode() == PPC::XXSPLTW)
+            NewElem = (SplatImm + 2) & 0x3;
+          else {
+            DEBUG(dbgs() << "Unknown splat opcode.");
+            DEBUG(MI.dump());
+            break;
+          }
+
+          if (MRI->hasOneNonDBGUse(SwapRes)) {
+            DEBUG(dbgs() << "Removing redundant swap: ");
+            DEBUG(DefMI->dump());
+            ToErase = DefMI;
+          }
+          Simplified = true;
+          DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
+                " to " << NewElem << " in instruction: ");
+          DEBUG(MI.dump());
+          MI.getOperand(1).setImm(NewElem);
+          MI.getOperand(2).setReg(SwapOp1);
+        }
+
         break;
       }
       case PPC::XVCVDPSP: {

Modified: llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll?rev=316478&r1=316477&r2=316478&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll Tue Oct 24 10:44:27 2017
@@ -16,7 +16,7 @@ entry:
 ; CHECK: sldi [[REG1:[0-9]+]], 3, 56
 ; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -28,7 +28,7 @@ entry:
 ; CHECK: sldi [[REG1:[0-9]+]], 3, 48
 ; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone

Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll?rev=316478&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll Tue Oct 24 10:44:27 2017
@@ -0,0 +1,134 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8
+
+; The strightforward expansion of this code will result in a swap followed by a
+;  splat. However, the swap is not needed since in this case the splat is the
+;  only use.
+; We want to check that we are not using the swap and that we have indexed the
+;  splat to the correct location.
+; 8 Bit Signed Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr {
+entry:
+  %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
+  %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
+  %add = add <16 x i8> %splat.splat.i, %v
+  ret <16 x i8> %add
+; CHECK-LABEL: splat_8_plus
+; CHECK-NOT: xxswapd
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_8_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK-PWR8: blr
+}
+
+; 8 Bit Unsigned Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr {
+entry:
+  %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
+  %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
+  %add = add <16 x i8> %splat.splat.i, %v
+  ret <16 x i8> %add
+; CHECK-LABEL: splat_u8_plus
+; CHECK-NOT: xxswapd
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u8_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK-PWR8: blr
+}
+
+; 16 Bit Signed Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr {
+entry:
+  %0 = shl i16 %c, 8
+  %conv.i = ashr exact i16 %0, 8
+  %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
+  %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i16> %splat.splat.i, %v
+  ret <8 x i16> %add
+; CHECK-LABEL: splat_16_plus
+; CHECK-NOT: xxswapd
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_16_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK-PWR8: blr
+}
+
+; 16 Bit Unsigned Version of the test.
+; Function Attrs: norecurse nounwind readnone
+define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr {
+entry:
+  %0 = shl i16 %c, 8
+  %conv.i = ashr exact i16 %0, 8
+  %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
+  %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i16> %splat.splat.i, %v
+  ret <8 x i16> %add
+; CHECK-LABEL: splat_u16_plus
+; CHECK-NOT: xxswapd
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u16_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK-PWR8: blr
+}
+
+; 32 Bit Signed Version of the test.
+; The 32 bit examples work differently than the 8 and 16 bit versions of the
+;  test. On Power 9 we have the mtvsrws instruction that does both the move to
+;  register and the splat so it does not really test the newly implemented code.
+; On Power 9 for the 32 bit case we don't need the new simplification. It is
+;  just here for completeness.
+; Function Attrs: norecurse nounwind readnone
+define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr {
+entry:
+  %sext = shl i32 %c, 24
+  %conv.i = ashr exact i32 %sext, 24
+  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
+  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %add = add <4 x i32> %splat.splat.i, %v
+  ret <4 x i32> %add
+; CHECK-LABEL: splat_32_plus
+; CHECK-NOT: xxswapd
+; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_32_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-PWR8: blr
+}
+
+; 32 Bit Unsigned Version of the test.
+; The 32 bit examples work differently than the 8 and 16 bit versions of the
+;  test. On Power 9 we have the mtvsrws instruction that does both the move to
+;  register and the splat so it does not really test the newly implemented code.
+; On Power 9 for the 32 bit case we don't need the new simplification. It is
+;  just here for completeness.
+; Function Attrs: norecurse nounwind readnone
+define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr {
+entry:
+  %sext = shl i32 %c, 24
+  %conv.i = ashr exact i32 %sext, 24
+  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
+  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %add = add <4 x i32> %splat.splat.i, %v
+  ret <4 x i32> %add
+; CHECK-LABEL: splat_u32_plus
+; CHECK-NOT: xxswapd
+; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+; CHECK-PWR8-LABEL: splat_u32_plus
+; CHECK-PWR8-NOT: xxswapd
+; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-PWR8: blr
+}
+




More information about the llvm-commits mailing list