[PATCH] D25493: [PPCMIPeephole] Fix splat elimination

Tue Oct 11 17:26:40 PDT 2016

timshen created this revision.
timshen added reviewers: echristo, iteratee, kbarton, nemanjai.
timshen added a subscriber: llvm-commits.
Herald added a subscriber: mehdi_amini.

In PPCMIPeephole, when we see two splat instructions, we can't simply do the following transformation:

  B = Splat A
  C = Splat B

>
=

  C = Splat A

because B may still be used between these two instructions. Instead, we should make the second Splat a PPC::COPY and let later passes decide whether to remove it or not:

  B = Splat A
  C = Splat B

>
=

  B = Splat A
  C = COPY B


https://reviews.llvm.org/D25493

Files:
  llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
  llvm/test/CodeGen/PowerPC/pr30663.ll


Index: llvm/test/CodeGen/PowerPC/pr30663.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/PowerPC/pr30663.ll
@@ -0,0 +1,24 @@
+; RUN: llc -O1 < %s | FileCheck %s
+target triple = "powerpc64le-linux-gnu"
+
+; The second xxspltw should be eliminated.
+; CHECK: xxspltw
+; CHECK-NOT: xxspltw
+define void @Test() {
+bb4:
+  %tmp = load <4 x i8>, <4 x i8>* undef
+  %tmp8 = bitcast <4 x i8> %tmp to float
+  %tmp18 = fmul float %tmp8, undef
+  %tmp19 = fsub float 0.000000e+00, %tmp18
+  store float %tmp19, float* undef
+  %tmp22 = shufflevector <4 x i8> %tmp, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float>
+  %tmp25 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> %tmp23, <4 x float> undef)
+  %tmp26 = fsub <4 x float> zeroinitializer, %tmp25
+  %tmp27 = bitcast <4 x float> %tmp26 to <4 x i32>
+  tail call void @llvm.ppc.altivec.stvx(<4 x i32> %tmp27, i8* undef)
+  ret void
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
Index: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -189,6 +189,7 @@
       case PPC::XXSPLTW: {
         unsigned MyOpcode = MI.getOpcode();
         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
+        unsigned UimNo = 3 - OpNo;
         unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
         if (!DefMI)
@@ -201,11 +202,11 @@
         // Splat fed by another splat - switch the output of the first
         // and remove the second.
         if (SameOpcode) {
-          DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
-          ToErase = &MI;
-          Simplified = true;
-          DEBUG(dbgs() << "Removing redundant splat: ");
+          DEBUG(dbgs() << "Changing redundant splat to a copy: ");
           DEBUG(MI.dump());
+          MI.setDesc(TII->get(PPC::COPY));
+          MI.RemoveOperand(UimNo);
+          Simplified = true;
         }
         // Splat fed by a shift. Usually when we align value to splat into
         // vector element zero.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25493.74312.patch
Type: text/x-patch
Size: 2452 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161012/1ad1213a/attachment.bin>