[llvm] r220051 - [AArch64] Fix a silent codegen fault in BUILD_VECTOR lowering.

Fri Oct 17 10:06:31 PDT 2014

Author: jamesm
Date: Fri Oct 17 12:06:31 2014
New Revision: 220051

URL: http://llvm.org/viewvc/llvm-project?rev=220051&view=rev
Log:
[AArch64] Fix a silent codegen fault in BUILD_VECTOR lowering.

We should be talking about the number of source elements, not the number of destination elements, given we know at this point that the source and dest element numbers are not the same.

While we're at it, avoid writing to std::vector::end()...

Bug found with random testing and a lot of coffee.

Added:
    llvm/trunk/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=220051&r1=220050&r2=220051&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Oct 17 12:06:31 2014
@@ -4558,7 +4558,7 @@ SDValue AArch64TargetLowering::Reconstru
     SDValue SourceVec = V.getOperand(0);
     auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
     if (Source == Sources.end())
-      Sources.push_back(ShuffleSourceInfo(SourceVec));
+      Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
 
     // Update the minimum and maximum lane number seen.
     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
@@ -4597,8 +4597,8 @@ SDValue AArch64TargetLowering::Reconstru
     // This stage of the search produces a source with the same element type as
     // the original, but with a total width matching the BUILD_VECTOR output.
     EVT EltVT = SrcVT.getVectorElementType();
-    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                                  VT.getSizeInBits() / EltVT.getSizeInBits());
+    unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
 
     if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
       assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
@@ -4612,18 +4612,18 @@ SDValue AArch64TargetLowering::Reconstru
 
     assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
 
-    if (Src.MaxElt - Src.MinElt >= NumElts) {
+    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
       // Span too large for a VEXT to cope
       return SDValue();
     }
 
-    if (Src.MinElt >= NumElts) {
+    if (Src.MinElt >= NumSrcElts) {
       // The extraction can just take the second half
       Src.ShuffleVec =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumElts));
-      Src.WindowBase = -NumElts;
-    } else if (Src.MaxElt < NumElts) {
+                      DAG.getIntPtrConstant(NumSrcElts));
+      Src.WindowBase = -NumSrcElts;
+    } else if (Src.MaxElt < NumSrcElts) {
       // The extraction can just take the first half
       Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
                                    Src.ShuffleVec, DAG.getIntPtrConstant(0));
@@ -4633,7 +4633,7 @@ SDValue AArch64TargetLowering::Reconstru
                                      Src.ShuffleVec, DAG.getIntPtrConstant(0));
       SDValue VEXTSrc2 =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumElts));
+                      DAG.getIntPtrConstant(NumSrcElts));
       unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
 
       Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,

Added: llvm/trunk/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-wide-shuffle.ll?rev=220051&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-wide-shuffle.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-wide-shuffle.ll Fri Oct 17 12:06:31 2014
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+entry:
+  ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
+  ; but only match the last three instructions as the first two could be combined to
+  ; a dup2 at some stage.
+  ; CHECK: dup
+  ; CHECK: ext
+  ; CHECK: ext
+  %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
+  %vgetq_lane = trunc i32 %x4 to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
+  %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
+  %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
+  %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
+  ret <4 x i16> %vset_lane267
+}