[PATCH] D13364: [x86] PR24562: fix incorrect folding of X86ISD::PSHUFB nodes that have a mask of all indices with the most significant bit set.
Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 12 04:27:49 PDT 2015
This revision was automatically updated to reflect the committed changes.
Closed by commit rL250027: [x86] PR24562: fix incorrect folding of PSHUFB nodes with a mask where all… (authored by adibiagio).
Changed prior to commit:
http://reviews.llvm.org/D13364?vs=36624&id=37095#toc
Repository:
rL LLVM
http://reviews.llvm.org/D13364
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/pr24562.ll
Index: llvm/trunk/test/CodeGen/X86/pr24562.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/pr24562.ll
+++ llvm/trunk/test/CodeGen/X86/pr24562.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mattr=+ssse3 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; The pshufb from function @pr24562 was wrongly folded into its first operand
+; as a result of a late target shuffle combine on the legalized selection dag.
+;
+; Check that the pshufb is correctly folded to a zero vector.
+
+define <2 x i64> @pr24562() {
+; CHECK-LABEL: pr24562:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) #2
+ %1 = bitcast <16 x i8> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -21991,10 +21991,22 @@
MVT RootVT = Root.getSimpleValueType();
SDLoc DL(Root);
- // Just remove no-op shuffle masks.
if (Mask.size() == 1) {
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
- /*AddTo*/ true);
+ int Index = Mask[0];
+ assert((Index >= 0 || Index == SM_SentinelUndef ||
+ Index == SM_SentinelZero) &&
+ "Invalid shuffle index found!");
+
+ // We may end up with an accumulated mask of size 1 as a result of
+ // widening of shuffle operands (see function canWidenShuffleElements).
+ // If the only shuffle index is equal to SM_SentinelZero then propagate
+ // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+ // mask, and therefore the entire chain of shuffles can be folded away.
+ if (Index == SM_SentinelZero)
+ DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+ else
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+ /*AddTo*/ true);
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D13364.37095.patch
Type: text/x-patch
Size: 2392 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151012/10941632/attachment.bin>
More information about the llvm-commits
mailing list