[llvm] r286616 - [AArch64] Fix bugs in isel lowering replaceSplatVectorStore.

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 11 11:25:20 PST 2016


Author: gberry
Date: Fri Nov 11 13:25:20 2016
New Revision: 286616

URL: http://llvm.org/viewvc/llvm-project?rev=286616&view=rev
Log:
[AArch64] Fix bugs in isel lowering replaceSplatVectorStore.

Summary:
Fix off-by-one indexing error in loop checking that inserted value was a
splat vector.

Add code to check that INSERT_VECTOR_ELT nodes constructing the splat
vector have the expected constant index values.

Reviewers: t.p.northover, jmolloy, mcrosier

Subscribers: aemerson, llvm-commits, rengolin

Differential Revision: https://reviews.llvm.org/D26409

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=286616&r1=286615&r2=286616&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Nov 11 13:25:20 2016
@@ -8783,26 +8783,42 @@ static SDValue replaceSplatVectorStore(S
   if (VT.isFloatingPoint())
     return SDValue();
 
-  // Check for insert vector elements.
-  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
-    return SDValue();
-
   // We can express a splat as store pair(s) for 2 or 4 elements.
   unsigned NumVecElts = VT.getVectorNumElements();
   if (NumVecElts != 4 && NumVecElts != 2)
     return SDValue();
-  SDValue SplatVal = StVal.getOperand(1);
-  unsigned RemainInsertElts = NumVecElts - 1;
 
   // Check that this is a splat.
-  while (--RemainInsertElts) {
-    SDValue NextInsertElt = StVal.getOperand(0);
-    if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+  // Make sure that each of the relevant vector element locations are inserted
+  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
+  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
+  SDValue SplatVal;
+  for (unsigned I = 0; I < NumVecElts; ++I) {
+    // Check for insert vector elements.
+    if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
       return SDValue();
-    if (NextInsertElt.getOperand(1) != SplatVal)
+
+    // Check that same value is inserted at each vector element.
+    if (I == 0)
+      SplatVal = StVal.getOperand(1);
+    else if (StVal.getOperand(1) != SplatVal)
+      return SDValue();
+
+    // Check insert element index.
+    ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
+    if (!CIndex)
       return SDValue();
-    StVal = NextInsertElt;
+    uint64_t IndexVal = CIndex->getZExtValue();
+    if (IndexVal >= NumVecElts)
+      return SDValue();
+    IndexNotInserted.reset(IndexVal);
+
+    StVal = StVal.getOperand(0);
   }
+  // Check that all vector element locations were inserted to.
+  if (IndexNotInserted.any())
+      return SDValue();
+
   unsigned OrigAlignment = St->getAlignment();
   unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
   unsigned Alignment = std::min(OrigAlignment, EltOffset);

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll?rev=286616&r1=286615&r2=286616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll Fri Nov 11 13:25:20 2016
@@ -98,6 +98,51 @@ entry:
   ret void
 }
 
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: nosplat_v4i32:
+; CHECK: str w0,
+; CHECK: ldr q[[REG1:[0-9]+]],
+; CHECK-DAG: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+  %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
+  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
+
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat2_v4i32(i32 %v, i32 *%p, <4 x i32> %vin) {
+entry:
+
+; CHECK-LABEL: nosplat2_v4i32:
+; CHECK: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+  %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
+
 ; Read of %b to compute %tmp2 shouldn't prevent formation of stp
 ; CHECK-LABEL: stp_int_rar_hazard
 ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]




More information about the llvm-commits mailing list