[llvm] r261434 - [X86][SSE] Fixed issue with commutation of 'faux unary' target shuffles (PR26667)

Sat Feb 20 06:39:46 PST 2016

Author: rksimon
Date: Sat Feb 20 08:39:45 2016
New Revision: 261434

URL: http://llvm.org/viewvc/llvm-project?rev=261434&view=rev
Log:
[X86][SSE] Fixed issue with commutation of 'faux unary' target shuffles (PR26667)

Fixed a bug introduced by D16683 when a binary shuffle is simplified to a unary shuffle (with undef/zero sentinel mask indices) - if this resulted in only the second input being used combineX86ShuffleChain failed to take this into account and still referenced the first input.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=261434&r1=261433&r2=261434&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 20 08:39:45 2016
@@ -23506,15 +23506,15 @@ static SDValue PerformShuffleCombine256(
 /// into either a single instruction if there is a special purpose instruction
 /// for this operation, or into a PSHUFB instruction which is a fully general
 /// instruction but should only be used to replace chains over a certain depth.
-static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
-                                   int Depth, bool HasPSHUFB, SelectionDAG &DAG,
+static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
+                                   ArrayRef<int> Mask, int Depth,
+                                   bool HasPSHUFB, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget &Subtarget) {
   assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
 
   // Find the operand that enters the chain. Note that multiple uses are OK
   // here, we're not going to remove the operand we find.
-  SDValue Input = Op.getOperand(0);
   while (Input.getOpcode() == ISD::BITCAST)
     Input = Input.getOperand(0);
 
@@ -23814,7 +23814,6 @@ static bool combineX86ShufflesRecursivel
                                     DAG, DCI, Subtarget))
     return true;
 
-
   // Minor canonicalization of the accumulated shuffle mask to make it easier
   // to match below. All this does is detect masks with sequential pairs of
   // elements, and shrink them to the half-width mask. It does this in a loop
@@ -23826,7 +23825,7 @@ static bool combineX86ShufflesRecursivel
     WidenedMask.clear();
   }
 
-  return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
+  return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
                                 Subtarget);
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=261434&r1=261433&r2=261434&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sat Feb 20 08:39:45 2016
@@ -391,10 +391,12 @@ declare <4 x double> @llvm.x86.avx.vperm
 define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) {
 ; X32-LABEL: test_mm256_permute2f128_ps:
 ; X32:       # BB#0:
+; X32-NEXT:    vmovaps %ymm1, %ymm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_permute2f128_ps:
 ; X64:       # BB#0:
+; X64-NEXT:    vmovaps %ymm1, %ymm0
 ; X64-NEXT:    retq
   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50)
   ret <8 x float> %res