[llvm] r316952 - Revert "[PowerPC] Try to simplify a Swap if it feeds a Splat"

Mon Oct 30 12:55:39 PDT 2017

Author: stefanp
Date: Mon Oct 30 12:55:38 2017
New Revision: 316952

URL: http://llvm.org/viewvc/llvm-project?rev=316952&view=rev
Log:
Revert "[PowerPC] Try to simplify a Swap if it feeds a Splat"

Revert r316478.
A test case has failed.
Will recommit this change once we find and fix the failure.

This reverts commit 7c330fabaedaba3d02c58bc3cc1198896c895f34.

Removed:
    llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=316952&r1=316951&r2=316952&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Mon Oct 30 12:55:38 2017
@@ -375,53 +375,6 @@ bool PPCMIPeephole::simplifyCode(void) {
             MI.getOperand(2).setImm(NewElem);
           }
         }
-
-        // Splat is fed by a SWAP which is a permute of this form
-        //  XXPERMDI %VA, %VA, 2
-        // Since the splat instruction can use any of the vector elements to do
-        //  the splat we do not have to rearrange the elements in the vector
-        //  with a swap before we do the splat. We can simply do the splat from
-        //  a different index.
-        // If the swap has only one use (the splat) then we can completely
-        //  remove the swap too.
-        if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) {
-          unsigned SwapRes = DefMI->getOperand(0).getReg();
-          unsigned SwapOp1 = DefMI->getOperand(1).getReg();
-          unsigned SwapOp2 = DefMI->getOperand(2).getReg();
-          unsigned SwapImm = DefMI->getOperand(3).getImm();
-          unsigned SplatImm = MI.getOperand(1).getImm();
-
-          // Break if this permute is not a swap.
-          if (SwapOp1 != SwapOp2 || SwapImm != 2)
-            break;
-
-          unsigned NewElem = 0;
-          // Compute the new index to use for the splat.
-          if (MI.getOpcode() == PPC::VSPLTB)
-            NewElem = (SplatImm + 8) & 0xF;
-          else if (MI.getOpcode() == PPC::VSPLTH)
-            NewElem = (SplatImm + 4) & 0x7;
-          else if (MI.getOpcode() == PPC::XXSPLTW)
-            NewElem = (SplatImm + 2) & 0x3;
-          else {
-            DEBUG(dbgs() << "Unknown splat opcode.");
-            DEBUG(MI.dump());
-            break;
-          }
-
-          if (MRI->hasOneNonDBGUse(SwapRes)) {
-            DEBUG(dbgs() << "Removing redundant swap: ");
-            DEBUG(DefMI->dump());
-            ToErase = DefMI;
-          }
-          Simplified = true;
-          DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
-                " to " << NewElem << " in instruction: ");
-          DEBUG(MI.dump());
-          MI.getOperand(1).setImm(NewElem);
-          MI.getOperand(2).setReg(SwapOp1);
-        }
-
         break;
       }
       case PPC::XVCVDPSP: {

Modified: llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll?rev=316952&r1=316951&r2=316952&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll Mon Oct 30 12:55:38 2017
@@ -16,7 +16,7 @@ entry:
 ; CHECK: sldi [[REG1:[0-9]+]], 3, 56
 ; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -28,7 +28,7 @@ entry:
 ; CHECK: sldi [[REG1:[0-9]+]], 3, 48
 ; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
 }
 
 ; Function Attrs: norecurse nounwind readnone

Removed: llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll?rev=316951&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-peephole-swap.ll (removed)
@@ -1,134 +0,0 @@
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8
-
-; The strightforward expansion of this code will result in a swap followed by a
-;  splat. However, the swap is not needed since in this case the splat is the
-;  only use.
-; We want to check that we are not using the swap and that we have indexed the
-;  splat to the correct location.
-; 8 Bit Signed Version of the test.
-; Function Attrs: norecurse nounwind readnone
-define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr {
-entry:
-  %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
-  %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
-  %add = add <16 x i8> %splat.splat.i, %v
-  ret <16 x i8> %add
-; CHECK-LABEL: splat_8_plus
-; CHECK-NOT: xxswapd
-; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_8_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
-; CHECK-PWR8: blr
-}
-
-; 8 Bit Unsigned Version of the test.
-; Function Attrs: norecurse nounwind readnone
-define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr {
-entry:
-  %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
-  %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
-  %add = add <16 x i8> %splat.splat.i, %v
-  ret <16 x i8> %add
-; CHECK-LABEL: splat_u8_plus
-; CHECK-NOT: xxswapd
-; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_u8_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
-; CHECK-PWR8: blr
-}
-
-; 16 Bit Signed Version of the test.
-; Function Attrs: norecurse nounwind readnone
-define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr {
-entry:
-  %0 = shl i16 %c, 8
-  %conv.i = ashr exact i16 %0, 8
-  %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
-  %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
-  %add = add <8 x i16> %splat.splat.i, %v
-  ret <8 x i16> %add
-; CHECK-LABEL: splat_16_plus
-; CHECK-NOT: xxswapd
-; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_16_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
-; CHECK-PWR8: blr
-}
-
-; 16 Bit Unsigned Version of the test.
-; Function Attrs: norecurse nounwind readnone
-define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr {
-entry:
-  %0 = shl i16 %c, 8
-  %conv.i = ashr exact i16 %0, 8
-  %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
-  %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
-  %add = add <8 x i16> %splat.splat.i, %v
-  ret <8 x i16> %add
-; CHECK-LABEL: splat_u16_plus
-; CHECK-NOT: xxswapd
-; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_u16_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
-; CHECK-PWR8: blr
-}
-
-; 32 Bit Signed Version of the test.
-; The 32 bit examples work differently than the 8 and 16 bit versions of the
-;  test. On Power 9 we have the mtvsrws instruction that does both the move to
-;  register and the splat so it does not really test the newly implemented code.
-; On Power 9 for the 32 bit case we don't need the new simplification. It is
-;  just here for completeness.
-; Function Attrs: norecurse nounwind readnone
-define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr {
-entry:
-  %sext = shl i32 %c, 24
-  %conv.i = ashr exact i32 %sext, 24
-  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
-  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
-  %add = add <4 x i32> %splat.splat.i, %v
-  ret <4 x i32> %add
-; CHECK-LABEL: splat_32_plus
-; CHECK-NOT: xxswapd
-; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_32_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK-PWR8: blr
-}
-
-; 32 Bit Unsigned Version of the test.
-; The 32 bit examples work differently than the 8 and 16 bit versions of the
-;  test. On Power 9 we have the mtvsrws instruction that does both the move to
-;  register and the splat so it does not really test the newly implemented code.
-; On Power 9 for the 32 bit case we don't need the new simplification. It is
-;  just here for completeness.
-; Function Attrs: norecurse nounwind readnone
-define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr {
-entry:
-  %sext = shl i32 %c, 24
-  %conv.i = ashr exact i32 %sext, 24
-  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
-  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
-  %add = add <4 x i32> %splat.splat.i, %v
-  ret <4 x i32> %add
-; CHECK-LABEL: splat_u32_plus
-; CHECK-NOT: xxswapd
-; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
-; CHECK: blr
-; CHECK-PWR8-LABEL: splat_u32_plus
-; CHECK-PWR8-NOT: xxswapd
-; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK-PWR8: blr
-}
-