[PATCH] D26409: [AArch64] Fix bugs in isel lowering replaceSplatVectorStore.

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 8 12:00:26 PST 2016


gberry created this revision.
gberry added reviewers: t.p.northover, jmolloy, mcrosier.
gberry added a subscriber: llvm-commits.
Herald added subscribers: rengolin, aemerson.

Fix off-by-one indexing error in loop checking that inserted value was a
splat vector.

Add code to check that INSERT_VECTOR_ELT nodes constructing the splat
vector have the expected constant index values.


https://reviews.llvm.org/D26409

Files:
  lib/Target/AArch64/AArch64ISelLowering.cpp
  test/CodeGen/AArch64/arm64-stp.ll


Index: test/CodeGen/AArch64/arm64-stp.ll
===================================================================
--- test/CodeGen/AArch64/arm64-stp.ll
+++ test/CodeGen/AArch64/arm64-stp.ll
@@ -98,6 +98,30 @@
   ret void
 }
 
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: nosplat_v4i32:
+; CHECK: str w0,
+; CHECK: ldr q[[REG1:[0-9]+]],
+; CHECK-DAG: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+  %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
+  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
+
 ; Read of %b to compute %tmp2 shouldn't prevent formation of stp
 ; CHECK-LABEL: stp_int_rar_hazard
 ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8768,26 +8768,42 @@
   if (VT.isFloatingPoint())
     return SDValue();
 
-  // Check for insert vector elements.
-  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
-    return SDValue();
-
   // We can express a splat as store pair(s) for 2 or 4 elements.
   unsigned NumVecElts = VT.getVectorNumElements();
   if (NumVecElts != 4 && NumVecElts != 2)
     return SDValue();
-  SDValue SplatVal = StVal.getOperand(1);
-  unsigned RemainInsertElts = NumVecElts - 1;
 
   // Check that this is a splat.
-  while (--RemainInsertElts) {
-    SDValue NextInsertElt = StVal.getOperand(0);
-    if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+  // Make sure that each of the relevant vector element locations are inserted
+  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
+  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
+  SDValue SplatVal;
+  for (unsigned I = 0; I < NumVecElts; ++I) {
+    // Check for insert vector elements.
+    if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
       return SDValue();
-    if (NextInsertElt.getOperand(1) != SplatVal)
+
+    // Check that same value is inserted at each vector element.
+    if (I == 0)
+      SplatVal = StVal.getOperand(1);
+    else if (StVal.getOperand(1) != SplatVal)
+      return SDValue();
+
+    // Check insert element index.
+    ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
+    if (!CIndex)
       return SDValue();
-    StVal = NextInsertElt;
+    uint64_t IndexVal = CIndex->getZExtValue();
+    if (IndexVal >= NumVecElts)
+      return SDValue();
+    IndexNotInserted.reset(IndexVal);
+
+    StVal = StVal.getOperand(0);
   }
+  // Check that all vector element locations were inserted to.
+  if (IndexNotInserted.any())
+      return SDValue();
+
   unsigned OrigAlignment = St->getAlignment();
   unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
   unsigned Alignment = std::min(OrigAlignment, EltOffset);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26409.77229.patch
Type: text/x-patch
Size: 3344 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161108/361580c3/attachment.bin>


More information about the llvm-commits mailing list