<div dir="ltr"><div class="gmail_default" style="font-family:arial,helvetica,sans-serif;font-size:small">LGTM!</div></div><div class="gmail_extra"><br><br><div class="gmail_quote">2014-02-12 18:43 GMT+08:00 Hao Liu <span dir="ltr"><<a href="mailto:Hao.Liu@arm.com" target="_blank">Hao.Liu@arm.com</a>></span>:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi t.p.northover,<br>
<br>
Hi Tim and reviewers,<br>
<br>
If we use SETCC to compare two v1i64 operations, it will generate get a v1i1 result. If such node passed to type legalizer, it will try to scalarize SETCC by using "i1 SETCC i64, i64". But as v1i64 is legal to the AArch64, the operand with v1i64 type is not priviously scalarized. This will cause an assertion failure in GetScalarizedVector() (in LegalizeTypes.h), as the scalarize algorithm supposes the operand has already been scalarized.<br>
<br>
This patch solves this problem by doing some combination to the "v1i1 SETCC" in combine1 phase before type legalization. There are three situations:<br>
(1) iXX sign_extend (extract_vector_elt (v1i1 setcc)) -> extract_vector_elt (v1iXX setcc)<br>
This is an optimization. (Actually, we suppose to get "extract_vector_elt (sign_extend (v1i1 setcc))" from the C code. But the extract_vector_elt and sign_extend are switched by the middle end optimization).<br>
<br>
(2) vselect (v1i1 setcc) -> vselect (v1iXX setcc)<br>
This is necessary as vselect will also try to scalarize this "v1i1 setcc" operand.<br>
<br>
(3) v1i1 setcc v1iXX, v1iXX -> v1i1 bitcast (i1 setcc (iXX extract_vector_elt), (iXX extract_vector_elt)<br>
This is other situations except (1) and (2). The result of setcc may be used in other situation, such as extract out the i1 and branch. Or the v1i1 result is compared again. There may be many other situations of using such node.<br>
This situation is fixed by extracting it's operands.<br>
<br>
Review, please.<br>
<br>
Thanks,<br>
-Hao<br>
<br>
<a href="http://llvm-reviews.chandlerc.com/D2751" target="_blank">http://llvm-reviews.chandlerc.com/D2751</a><br>
<br>
Files:<br>
lib/Target/AArch64/AArch64ISelLowering.cpp<br>
test/CodeGen/AArch64/neon-v1i1-setcc.ll<br>
<br>
Index: lib/Target/AArch64/AArch64ISelLowering.cpp<br>
===================================================================<br>
--- lib/Target/AArch64/AArch64ISelLowering.cpp<br>
+++ lib/Target/AArch64/AArch64ISelLowering.cpp<br>
@@ -521,6 +521,10 @@<br>
setOperationAction(ISD::MUL, MVT::v1i64, Expand);<br>
setOperationAction(ISD::MUL, MVT::v2i64, Expand);<br>
}<br>
+<br>
+ setTargetDAGCombine(ISD::SETCC);<br>
+ setTargetDAGCombine(ISD::SIGN_EXTEND);<br>
+ setTargetDAGCombine(ISD::VSELECT);<br>
}<br>
<br>
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {<br>
@@ -4258,6 +4262,89 @@<br>
return SDValue(N, 0);<br>
}<br>
<br>
+// v1i1 setcc -><br>
+// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))<br>
+// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result.<br>
+// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC.<br>
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {<br>
+ EVT ResVT = N->getValueType(0);<br>
+<br>
+ if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||<br>
+ ResVT.getVectorElementType() != MVT::i1)<br>
+ return SDValue();<br>
+<br>
+ SDValue LHS = N->getOperand(0);<br>
+ SDValue RHS = N->getOperand(1);<br>
+ EVT CmpVT = LHS.getValueType();<br>
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),<br>
+ CmpVT.getVectorElementType(), LHS,<br>
+ DAG.getConstant(0, MVT::i64));<br>
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),<br>
+ CmpVT.getVectorElementType(), RHS,<br>
+ DAG.getConstant(0, MVT::i64));<br>
+ SDValue SetCC =<br>
+ DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,<br>
+ cast<CondCodeSDNode>(N->getOperand(2))->get());<br>
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);<br>
+}<br>
+<br>
+// vselect (v1i1 setcc) -><br>
+// vselect (v1iXX setcc) (XX is the size of the compared operand type)<br>
+// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as<br>
+// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine<br>
+// such VSELECT.<br>
+static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {<br>
+ SDValue N0 = N->getOperand(0);<br>
+ EVT CCVT = N0.getValueType();<br>
+<br>
+ if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||<br>
+ CCVT.getVectorElementType() != MVT::i1)<br>
+ return SDValue();<br>
+<br>
+ EVT ResVT = N->getValueType(0);<br>
+ EVT CmpVT = N0.getOperand(0).getValueType();<br>
+ // Only combine when the result type is of the same size as the compared<br>
+ // operands.<br>
+ if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())<br>
+ return SDValue();<br>
+<br>
+ SDValue IfTrue = N->getOperand(1);<br>
+ SDValue IfFalse = N->getOperand(2);<br>
+ SDValue SetCC =<br>
+ DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),<br>
+ N0.getOperand(0), N0.getOperand(1),<br>
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());<br>
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,<br>
+ IfTrue, IfFalse);<br>
+}<br>
+<br>
+// sign_extend (extract_vector_elt (v1i1 setcc)) -><br>
+// extract_vector_elt (v1iXX setcc)<br>
+// (XX is the size of the compared operand type)<br>
+static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {<br>
+ SDValue N0 = N->getOperand(0);<br>
+ SDValue Vec = N0.getOperand(0);<br>
+<br>
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||<br>
+ Vec.getOpcode() != ISD::SETCC)<br>
+ return SDValue();<br>
+<br>
+ EVT ResVT = N->getValueType(0);<br>
+ EVT CmpVT = Vec.getOperand(0).getValueType();<br>
+ // Only optimize when the result type is of the same size as the element<br>
+ // type of the compared operand.<br>
+ if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits())<br>
+ return SDValue();<br>
+<br>
+ SDValue Lane = N0.getOperand(1);<br>
+ SDValue SetCC =<br>
+ DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),<br>
+ Vec.getOperand(0), Vec.getOperand(1),<br>
+ cast<CondCodeSDNode>(Vec.getOperand(2))->get());<br>
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,<br>
+ SetCC, Lane);<br>
+}<br>
+<br>
SDValue<br>
AArch64TargetLowering::PerformDAGCombine(SDNode *N,<br>
DAGCombinerInfo &DCI) const {<br>
@@ -4269,6 +4356,9 @@<br>
case ISD::SRA:<br>
case ISD::SRL:<br>
return PerformShiftCombine(N, DCI, getSubtarget());<br>
+ case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);<br>
+ case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);<br>
+ case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);<br>
case ISD::INTRINSIC_WO_CHAIN:<br>
return PerformIntrinsicCombine(N, DCI.DAG);<br>
case AArch64ISD::NEON_VDUPLANE:<br>
Index: test/CodeGen/AArch64/neon-v1i1-setcc.ll<br>
===================================================================<br>
--- /dev/null<br>
+++ test/CodeGen/AArch64/neon-v1i1-setcc.ll<br>
@@ -0,0 +1,60 @@<br>
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s<br>
+<br>
+define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {<br>
+; CHECK-LABEL: test_sext_extr_cmp_0:<br>
+; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}<br>
+ %1 = icmp sge <1 x i64> %v1, %v2<br>
+ %2 = extractelement <1 x i1> %1, i32 0<br>
+ %vget_lane = sext i1 %2 to i64<br>
+ ret i64 %vget_lane<br>
+}<br>
+<br>
+define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {<br>
+; CHECK-LABEL: test_sext_extr_cmp_1:<br>
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}<br>
+ %1 = fcmp oeq <1 x double> %v1, %v2<br>
+ %2 = extractelement <1 x i1> %1, i32 0<br>
+ %vget_lane = sext i1 %2 to i64<br>
+ ret i64 %vget_lane<br>
+}<br>
+<br>
+define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {<br>
+; CHECK-LABEL: test_select_v1i1_0:<br>
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}<br>
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b<br>
+ %1 = icmp eq <1 x i64> %v1, %v2<br>
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3<br>
+ ret <1 x i64> %res<br>
+}<br>
+<br>
+define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {<br>
+; CHECK-LABEL: test_select_v1i1_1:<br>
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}<br>
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b<br>
+ %1 = fcmp oeq <1 x double> %v1, %v2<br>
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3<br>
+ ret <1 x i64> %res<br>
+}<br>
+<br>
+define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {<br>
+; CHECK-LABEL: test_select_v1i1_2:<br>
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}<br>
+; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b<br>
+ %1 = icmp eq <1 x i64> %v1, %v2<br>
+ %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3<br>
+ ret <1 x double> %res<br>
+}<br>
+<br>
+define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {<br>
+; CHECK-LABEL: test_br_extr_cmp:<br>
+; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}<br>
+ %1 = icmp eq <1 x i64> %v1, %v2<br>
+ %2 = extractelement <1 x i1> %1, i32 0<br>
+ br i1 %2, label %if.end, label %if.then<br>
+<br>
+if.then:<br>
+ ret i32 0;<br>
+<br>
+if.end:<br>
+ ret i32 1;<br>
+}<br>
<br>_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
<br></blockquote></div><br><br clear="all"><div><br></div>-- <br><div dir="ltr"><font face="courier new, monospace">Thanks,</font><div><font face="courier new, monospace">-Jiangning</font></div></div>
</div>