[PATCH] D13364: [x86] PR24562: fix incorrect folding of X86ISD::PSHUFB nodes that have a mask of all indices with the most significant bit set.

Mon Oct 12 04:27:49 PDT 2015

This revision was automatically updated to reflect the committed changes.
Closed by commit rL250027: [x86] PR24562: fix incorrect folding of PSHUFB nodes with a mask where all… (authored by adibiagio).

Changed prior to commit:
  http://reviews.llvm.org/D13364?vs=36624&id=37095#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D13364

Files:
  llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
  llvm/trunk/test/CodeGen/X86/pr24562.ll

Index: llvm/trunk/test/CodeGen/X86/pr24562.ll
===================================================================

--- llvm/trunk/test/CodeGen/X86/pr24562.ll
+++ llvm/trunk/test/CodeGen/X86/pr24562.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mattr=+ssse3 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; The pshufb from function @pr24562 was wrongly folded into its first operand
+; as a result of a late target shuffle combine on the legalized selection dag.
+; 
+; Check that the pshufb is correctly folded to a zero vector.
+
+define <2 x i64> @pr24562() {
+; CHECK-LABEL: pr24562:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) #2
+  %1 = bitcast <16 x i8> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -21991,10 +21991,22 @@
   MVT RootVT = Root.getSimpleValueType();
   SDLoc DL(Root);
 
-  // Just remove no-op shuffle masks.
   if (Mask.size() == 1) {
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
-                  /*AddTo*/ true);
+    int Index = Mask[0];
+    assert((Index >= 0 || Index == SM_SentinelUndef ||
+            Index == SM_SentinelZero) &&
+           "Invalid shuffle index found!");
+
+    // We may end up with an accumulated mask of size 1 as a result of
+    // widening of shuffle operands (see function canWidenShuffleElements).
+    // If the only shuffle index is equal to SM_SentinelZero then propagate
+    // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+    // mask, and therefore the entire chain of shuffles can be folded away.
+    if (Index == SM_SentinelZero)
+      DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+    else
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+                    /*AddTo*/ true);
     return true;
   }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D13364.37095.patch
Type: text/x-patch
Size: 2392 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151012/10941632/attachment.bin>