[PATCH] [AArch64]Fix the assertion failure caused by scalarizing "v1i1 setcc" in type legalizer

Jiangning Liu liujiangning1 at gmail.com
Thu Feb 13 18:02:08 PST 2014


LGTM!


2014-02-12 18:43 GMT+08:00 Hao Liu <Hao.Liu at arm.com>:

> Hi t.p.northover,
>
> Hi Tim and reviewers,
>
> If we use SETCC to compare two v1i64 operations, it will generate get a
> v1i1 result. If such node passed to type legalizer, it will try to
> scalarize SETCC by using "i1 SETCC i64, i64". But as v1i64 is legal to the
> AArch64, the operand with v1i64 type is not priviously scalarized. This
> will cause an assertion failure in GetScalarizedVector() (in
> LegalizeTypes.h), as the scalarize algorithm supposes the operand has
> already been scalarized.
>
> This patch solves this problem by doing some combination to the "v1i1
> SETCC" in combine1 phase before type legalization. There are three
> situations:
> (1) iXX sign_extend (extract_vector_elt (v1i1 setcc)) ->
> extract_vector_elt (v1iXX setcc)
> This is an optimization. (Actually, we suppose to get "extract_vector_elt
> (sign_extend (v1i1 setcc))" from the C code. But the extract_vector_elt and
> sign_extend are switched by the middle end optimization).
>
> (2) vselect (v1i1 setcc) -> vselect (v1iXX setcc)
> This is necessary as vselect will also try to scalarize this "v1i1 setcc"
> operand.
>
> (3) v1i1 setcc v1iXX, v1iXX -> v1i1 bitcast (i1 setcc (iXX
> extract_vector_elt), (iXX extract_vector_elt)
> This is other situations except (1) and (2). The result of setcc may be
> used in other situation, such as extract out the i1 and branch. Or the v1i1
> result is compared again. There may be many other situations of using such
> node.
> This situation is fixed by extracting it's operands.
>
> Review, please.
>
> Thanks,
> -Hao
>
> http://llvm-reviews.chandlerc.com/D2751
>
> Files:
>   lib/Target/AArch64/AArch64ISelLowering.cpp
>   test/CodeGen/AArch64/neon-v1i1-setcc.ll
>
> Index: lib/Target/AArch64/AArch64ISelLowering.cpp
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.cpp
> +++ lib/Target/AArch64/AArch64ISelLowering.cpp
> @@ -521,6 +521,10 @@
>      setOperationAction(ISD::MUL, MVT::v1i64, Expand);
>      setOperationAction(ISD::MUL, MVT::v2i64, Expand);
>    }
> +
> +  setTargetDAGCombine(ISD::SETCC);
> +  setTargetDAGCombine(ISD::SIGN_EXTEND);
> +  setTargetDAGCombine(ISD::VSELECT);
>  }
>
>  EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT)
> const {
> @@ -4258,6 +4262,89 @@
>    return SDValue(N, 0);
>  }
>
> +// v1i1 setcc ->
> +//     v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))
> +// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as
> result.
> +// If it can legalize "v1i1 SETCC" correctly, no need to combine such
> SETCC.
> +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
> +  EVT ResVT = N->getValueType(0);
> +
> +  if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||
> +      ResVT.getVectorElementType() != MVT::i1)
> +    return SDValue();
> +
> +  SDValue LHS = N->getOperand(0);
> +  SDValue RHS = N->getOperand(1);
> +  EVT CmpVT = LHS.getValueType();
> +  LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
> +                    CmpVT.getVectorElementType(), LHS,
> +                    DAG.getConstant(0, MVT::i64));
> +  RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
> +                    CmpVT.getVectorElementType(), RHS,
> +                    DAG.getConstant(0, MVT::i64));
> +  SDValue SetCC =
> +      DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,
> +                   cast<CondCodeSDNode>(N->getOperand(2))->get());
> +  return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);
> +}
> +
> +// vselect (v1i1 setcc) ->
> +//     vselect (v1iXX setcc)  (XX is the size of the compared operand
> type)
> +// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
> +// condition. If it can legalize "VSELECT v1i1" correctly, no need to
> combine
> +// such VSELECT.
> +static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
> +  SDValue N0 = N->getOperand(0);
> +  EVT CCVT = N0.getValueType();
> +
> +  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
> +      CCVT.getVectorElementType() != MVT::i1)
> +    return SDValue();
> +
> +  EVT ResVT = N->getValueType(0);
> +  EVT CmpVT = N0.getOperand(0).getValueType();
> +  // Only combine when the result type is of the same size as the compared
> +  // operands.
> +  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
> +    return SDValue();
> +
> +  SDValue IfTrue = N->getOperand(1);
> +  SDValue IfFalse = N->getOperand(2);
> +  SDValue SetCC =
> +      DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> +                   N0.getOperand(0), N0.getOperand(1),
> +                   cast<CondCodeSDNode>(N0.getOperand(2))->get());
> +  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
> +                     IfTrue, IfFalse);
> +}
> +
> +// sign_extend (extract_vector_elt (v1i1 setcc)) ->
> +//     extract_vector_elt (v1iXX setcc)
> +// (XX is the size of the compared operand type)
> +static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {
> +  SDValue N0 = N->getOperand(0);
> +  SDValue Vec = N0.getOperand(0);
> +
> +  if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
> +      Vec.getOpcode() != ISD::SETCC)
> +    return SDValue();
> +
> +  EVT ResVT = N->getValueType(0);
> +  EVT CmpVT = Vec.getOperand(0).getValueType();
> +  // Only optimize when the result type is of the same size as the element
> +  // type of the compared operand.
> +  if (ResVT.getSizeInBits() !=
> CmpVT.getVectorElementType().getSizeInBits())
> +    return SDValue();
> +
> +  SDValue Lane = N0.getOperand(1);
> +  SDValue SetCC =
> +      DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> +                   Vec.getOperand(0), Vec.getOperand(1),
> +                   cast<CondCodeSDNode>(Vec.getOperand(2))->get());
> +  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,
> +                     SetCC, Lane);
> +}
> +
>  SDValue
>  AArch64TargetLowering::PerformDAGCombine(SDNode *N,
>                                           DAGCombinerInfo &DCI) const {
> @@ -4269,6 +4356,9 @@
>    case ISD::SRA:
>    case ISD::SRL:
>      return PerformShiftCombine(N, DCI, getSubtarget());
> +  case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);
> +  case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
> +  case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
>    case ISD::INTRINSIC_WO_CHAIN:
>      return PerformIntrinsicCombine(N, DCI.DAG);
>    case AArch64ISD::NEON_VDUPLANE:
> Index: test/CodeGen/AArch64/neon-v1i1-setcc.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/AArch64/neon-v1i1-setcc.ll
> @@ -0,0 +1,60 @@
> +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu
> -mattr=+neon -fp-contract=fast | FileCheck %s
> +
> +define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
> +; CHECK-LABEL: test_sext_extr_cmp_0:
> +; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
> +  %1 = icmp sge <1 x i64> %v1, %v2
> +  %2 = extractelement <1 x i1> %1, i32 0
> +  %vget_lane = sext i1 %2 to i64
> +  ret i64 %vget_lane
> +}
> +
> +define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
> +; CHECK-LABEL: test_sext_extr_cmp_1:
> +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
> +  %1 = fcmp oeq <1 x double> %v1, %v2
> +  %2 = extractelement <1 x i1> %1, i32 0
> +  %vget_lane = sext i1 %2 to i64
> +  ret i64 %vget_lane
> +}
> +
> +define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x
> i64> %v3) {
> +; CHECK-LABEL: test_select_v1i1_0:
> +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
> +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
> +  %1 = icmp eq <1 x i64> %v1, %v2
> +  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
> +  ret <1 x i64> %res
> +}
> +
> +define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2,
> <1 x i64> %v3) {
> +; CHECK-LABEL: test_select_v1i1_1:
> +; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
> +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
> +  %1 = fcmp oeq <1 x double> %v1, %v2
> +  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
> +  ret <1 x i64> %res
> +}
> +
> +define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1
> x double> %v3) {
> +; CHECK-LABEL: test_select_v1i1_2:
> +; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
> +; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
> +  %1 = icmp eq <1 x i64> %v1, %v2
> +  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double>
> %v3
> +  ret <1 x double> %res
> +}
> +
> +define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
> +; CHECK-LABEL: test_br_extr_cmp:
> +; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
> +  %1 = icmp eq <1 x i64> %v1, %v2
> +  %2 = extractelement <1 x i1> %1, i32 0
> +  br i1 %2, label %if.end, label %if.then
> +
> +if.then:
> +  ret i32 0;
> +
> +if.end:
> +  ret i32 1;
> +}
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>


-- 
Thanks,
-Jiangning
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140214/88ebfaeb/attachment.html>


More information about the llvm-commits mailing list