[llvm] r356476 - [DAGCombine] Fix a miscompile when reducing BUILD_VECTORs to a shuffle

Tue Mar 19 09:52:01 PDT 2019

Author: bogner
Date: Tue Mar 19 09:52:00 2019
New Revision: 356476

URL: http://llvm.org/viewvc/llvm-project?rev=356476&view=rev
Log:
[DAGCombine] Fix a miscompile when reducing BUILD_VECTORs to a shuffle

In r311255 we added a case where we split vectors whose elements are
all derived from the same input vector so that we could shuffle it
more efficiently. In doing so, createBuildVecShuffle was taught to
adjust for the fact that all indices would be based off of the first
vector when this happens, but it's possible for the code that checked
that to fire incorrectly if we happen to have a BUILD_VECTOR of
extracts from subvectors and don't hit this new optimization.

Instead of trying to detect if we've split the vector by checking if
we have extracts from the same base vector, we can just pass that
information into createBuildVecShuffle, avoiding the miscompile.

Differential Revision: https://reviews.llvm.org/D59507

Added:
    llvm/trunk/test/CodeGen/X86/shuffle-extract-subvector.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=356476&r1=356475&r2=356476&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Mar 19 09:52:00 2019
@@ -475,7 +475,8 @@ namespace {
     SDValue reduceBuildVecToShuffle(SDNode *N);
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
-                                  SDValue VecIn2, unsigned LeftIdx);
+                                  SDValue VecIn2, unsigned LeftIdx,
+                                  bool DidSplitVec);
     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 
     /// Walk up chain skipping non-aliasing memory nodes,
@@ -16416,7 +16417,7 @@ SDValue DAGCombiner::reduceBuildVecExtTo
 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                            ArrayRef<int> VectorMask,
                                            SDValue VecIn1, SDValue VecIn2,
-                                           unsigned LeftIdx) {
+                                           unsigned LeftIdx, bool DidSplitVec) {
   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
 
@@ -16424,17 +16425,12 @@ SDValue DAGCombiner::createBuildVecShuff
   EVT InVT1 = VecIn1.getValueType();
   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
 
-  unsigned Vec2Offset = 0;
   unsigned NumElems = VT.getVectorNumElements();
   unsigned ShuffleNumElems = NumElems;
 
-  // In case both the input vectors are extracted from same base
-  // vector we do not need extra addend (Vec2Offset) while
-  // computing shuffle mask.
-  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
-    Vec2Offset = InVT1.getVectorNumElements();
+  // If we artificially split a vector in two already, then the offsets in the
+  // operands will all be based off of VecIn1, even those in VecIn2.
+  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
 
   // We can't generate a shuffle node with mismatched input and output types.
   // Try to make the types match the type of the output.
@@ -16693,6 +16689,7 @@ SDValue DAGCombiner::reduceBuildVecToShu
   // vector, then split the vector efficiently based on the maximum
   // vector access index and adjust the VectorMask and
   // VecIn accordingly.
+  bool DidSplitVec = false;
   if (VecIn.size() == 2) {
     unsigned MaxIndex = 0;
     unsigned NearestPow2 = 0;
@@ -16723,6 +16720,7 @@ SDValue DAGCombiner::reduceBuildVecToShu
         VecIn.pop_back();
         VecIn.push_back(VecIn1);
         VecIn.push_back(VecIn2);
+        DidSplitVec = true;
 
         for (unsigned i = 0; i < NumElems; i++) {
           if (VectorMask[i] <= 0)
@@ -16760,7 +16758,7 @@ SDValue DAGCombiner::reduceBuildVecToShu
         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
 
     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
-                                                VecRight, LeftIdx))
+                                                VecRight, LeftIdx, DidSplitVec))
       Shuffles.push_back(Shuffle);
     else
       return SDValue();

Added: llvm/trunk/test/CodeGen/X86/shuffle-extract-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-extract-subvector.ll?rev=356476&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-extract-subvector.ll (added)
+++ llvm/trunk/test/CodeGen/X86/shuffle-extract-subvector.ll Tue Mar 19 09:52:00 2019
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzwl (%rdi), %r8d
+; CHECK-NEXT:    movzwl 2(%rdi), %r9d
+; CHECK-NEXT:    movzwl 4(%rdi), %r11d
+; CHECK-NEXT:    movzwl 6(%rdi), %edi
+; CHECK-NEXT:    movzwl (%rsi), %r10d
+; CHECK-NEXT:    movzwl 2(%rsi), %ecx
+; CHECK-NEXT:    movzwl 4(%rsi), %eax
+; CHECK-NEXT:    movzwl 6(%rsi), %esi
+; CHECK-NEXT:    movw %si, 14(%rdx)
+; CHECK-NEXT:    movw %di, 12(%rdx)
+; CHECK-NEXT:    movw %ax, 10(%rdx)
+; CHECK-NEXT:    movw %r11w, 8(%rdx)
+; CHECK-NEXT:    movw %cx, 6(%rdx)
+; CHECK-NEXT:    movw %r9w, 4(%rdx)
+; CHECK-NEXT:    movw %r10w, 2(%rdx)
+; CHECK-NEXT:    movw %r8w, (%rdx)
+; CHECK-NEXT:    retq
+  %tmp4 = load <4 x half>, <4 x half>* %a
+  %tmp5 = load <4 x half>, <4 x half>* %b
+  %tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4>
+  %tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5>
+  %tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6>
+  %tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7>
+  %tmp11 = extractelement <2 x half> %tmp7, i32 0
+  %tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0
+  %tmp13 = extractelement <2 x half> %tmp7, i32 1
+  %tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1
+  %tmp15 = extractelement <2 x half> %tmp8, i32 0
+  %tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2
+  %tmp17 = extractelement <2 x half> %tmp8, i32 1
+  %tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3
+  %tmp19 = extractelement <2 x half> %tmp9, i32 0
+  %tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4
+  %tmp21 = extractelement <2 x half> %tmp9, i32 1
+  %tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5
+  %tmp23 = extractelement <2 x half> %tmp10, i32 0
+  %tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6
+  %tmp25 = extractelement <2 x half> %tmp10, i32 1
+  %tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7
+  store <8 x half> %tmp26, <8 x half>* %c
+  ret void
+}