[PATCH] D26409: [AArch64] Fix bugs in isel lowering replaceSplatVectorStore.
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 8 12:00:26 PST 2016
gberry created this revision.
gberry added reviewers: t.p.northover, jmolloy, mcrosier.
gberry added a subscriber: llvm-commits.
Herald added subscribers: rengolin, aemerson.
Fix off-by-one indexing error in loop checking that inserted value was a
splat vector.
Add code to check that INSERT_VECTOR_ELT nodes constructing the splat
vector have the expected constant index values.
https://reviews.llvm.org/D26409
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
test/CodeGen/AArch64/arm64-stp.ll
Index: test/CodeGen/AArch64/arm64-stp.ll
===================================================================
--- test/CodeGen/AArch64/arm64-stp.ll
+++ test/CodeGen/AArch64/arm64-stp.ll
@@ -98,6 +98,30 @@
ret void
}
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: nosplat_v4i32:
+; CHECK: str w0,
+; CHECK: ldr q[[REG1:[0-9]+]],
+; CHECK-DAG: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+ %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
+ %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+ %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+ %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+ %p21 = bitcast i32* %p to <4 x i32>*
+ store <4 x i32> %p20, <4 x i32>* %p21, align 4
+ ret void
+}
+
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
; CHECK-LABEL: stp_int_rar_hazard
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8768,26 +8768,42 @@
if (VT.isFloatingPoint())
return SDValue();
- // Check for insert vector elements.
- if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
-
// We can express a splat as store pair(s) for 2 or 4 elements.
unsigned NumVecElts = VT.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
- SDValue SplatVal = StVal.getOperand(1);
- unsigned RemainInsertElts = NumVecElts - 1;
// Check that this is a splat.
- while (--RemainInsertElts) {
- SDValue NextInsertElt = StVal.getOperand(0);
- if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ // Make sure that each of the relevant vector element locations are inserted
+ // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
+ std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
+ SDValue SplatVal;
+ for (unsigned I = 0; I < NumVecElts; ++I) {
+ // Check for insert vector elements.
+ if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
- if (NextInsertElt.getOperand(1) != SplatVal)
+
+ // Check that same value is inserted at each vector element.
+ if (I == 0)
+ SplatVal = StVal.getOperand(1);
+ else if (StVal.getOperand(1) != SplatVal)
+ return SDValue();
+
+ // Check insert element index.
+ ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
+ if (!CIndex)
return SDValue();
- StVal = NextInsertElt;
+ uint64_t IndexVal = CIndex->getZExtValue();
+ if (IndexVal >= NumVecElts)
+ return SDValue();
+ IndexNotInserted.reset(IndexVal);
+
+ StVal = StVal.getOperand(0);
}
+ // Check that all vector element locations were inserted to.
+ if (IndexNotInserted.any())
+ return SDValue();
+
unsigned OrigAlignment = St->getAlignment();
unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
unsigned Alignment = std::min(OrigAlignment, EltOffset);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26409.77229.patch
Type: text/x-patch
Size: 3344 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161108/361580c3/attachment.bin>
More information about the llvm-commits
mailing list