[llvm] r286616 - [AArch64] Fix bugs in isel lowering replaceSplatVectorStore.
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 11 11:25:20 PST 2016
Author: gberry
Date: Fri Nov 11 13:25:20 2016
New Revision: 286616
URL: http://llvm.org/viewvc/llvm-project?rev=286616&view=rev
Log:
[AArch64] Fix bugs in isel lowering replaceSplatVectorStore.
Summary:
Fix off-by-one indexing error in loop checking that inserted value was a
splat vector.
Add code to check that INSERT_VECTOR_ELT nodes constructing the splat
vector have the expected constant index values.
Reviewers: t.p.northover, jmolloy, mcrosier
Subscribers: aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D26409
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=286616&r1=286615&r2=286616&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Nov 11 13:25:20 2016
@@ -8783,26 +8783,42 @@ static SDValue replaceSplatVectorStore(S
if (VT.isFloatingPoint())
return SDValue();
- // Check for insert vector elements.
- if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
-
// We can express a splat as store pair(s) for 2 or 4 elements.
unsigned NumVecElts = VT.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
- SDValue SplatVal = StVal.getOperand(1);
- unsigned RemainInsertElts = NumVecElts - 1;
// Check that this is a splat.
- while (--RemainInsertElts) {
- SDValue NextInsertElt = StVal.getOperand(0);
- if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ // Make sure that each of the relevant vector element locations are inserted
+ // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
+ std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
+ SDValue SplatVal;
+ for (unsigned I = 0; I < NumVecElts; ++I) {
+ // Check for insert vector elements.
+ if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
- if (NextInsertElt.getOperand(1) != SplatVal)
+
+ // Check that same value is inserted at each vector element.
+ if (I == 0)
+ SplatVal = StVal.getOperand(1);
+ else if (StVal.getOperand(1) != SplatVal)
+ return SDValue();
+
+ // Check insert element index.
+ ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
+ if (!CIndex)
return SDValue();
- StVal = NextInsertElt;
+ uint64_t IndexVal = CIndex->getZExtValue();
+ if (IndexVal >= NumVecElts)
+ return SDValue();
+ IndexNotInserted.reset(IndexVal);
+
+ StVal = StVal.getOperand(0);
}
+ // Check that all vector element locations were inserted to.
+ if (IndexNotInserted.any())
+ return SDValue();
+
unsigned OrigAlignment = St->getAlignment();
unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
unsigned Alignment = std::min(OrigAlignment, EltOffset);
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll?rev=286616&r1=286615&r2=286616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll Fri Nov 11 13:25:20 2016
@@ -98,6 +98,51 @@ entry:
ret void
}
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: nosplat_v4i32:
+; CHECK: str w0,
+; CHECK: ldr q[[REG1:[0-9]+]],
+; CHECK-DAG: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+ %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
+ %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+ %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+ %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+ %p21 = bitcast i32* %p to <4 x i32>*
+ store <4 x i32> %p20, <4 x i32>* %p21, align 4
+ ret void
+}
+
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat2_v4i32(i32 %v, i32 *%p, <4 x i32> %vin) {
+entry:
+
+; CHECK-LABEL: nosplat2_v4i32:
+; CHECK: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+ %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1
+ %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+ %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+ %p21 = bitcast i32* %p to <4 x i32>*
+ store <4 x i32> %p20, <4 x i32>* %p21, align 4
+ ret void
+}
+
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
; CHECK-LABEL: stp_int_rar_hazard
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
More information about the llvm-commits
mailing list