[PATCH] [AArch64] Change default legalization behavior of v1i32 to be widen to v2i32 instead of scalarization

Wed Jul 2 13:48:25 PDT 2014

On Wed, Jul 02, 2014 at 10:35:24AM +0000, Hao Liu wrote:
> Hi Tim,
> 
> Your suggestion is very good.
> I've refactored the patch by using one hook to control all the legalization actions.
> 

Hi Hao,

Thanks for working on this.  On R600, we will probably want to split everything,
but I will do that as a follow on patch, since I think it is better if this
patch preserve the current behavior to minimize the impact.

The R600 changes LGTM.

> Thanks,
> -Hao
> 
> http://reviews.llvm.org/D4322
> 
> Files:
>   include/llvm/Target/TargetLowering.h
>   lib/CodeGen/TargetLoweringBase.cpp
>   lib/Target/AArch64/AArch64ISelLowering.cpp
>   lib/Target/AArch64/AArch64ISelLowering.h
>   lib/Target/NVPTX/NVPTXISelLowering.cpp
>   lib/Target/NVPTX/NVPTXISelLowering.h
>   lib/Target/R600/SIISelLowering.cpp
>   lib/Target/R600/SIISelLowering.h
>   test/CodeGen/AArch64/arm64-neon-copy.ll
>   test/CodeGen/AArch64/arm64-neon-select_cc.ll
>   test/CodeGen/AArch64/trunc-v1i64.ll

> Index: include/llvm/Target/TargetLowering.h
> ===================================================================
> --- include/llvm/Target/TargetLowering.h
> +++ include/llvm/Target/TargetLowering.h
> @@ -185,10 +185,15 @@
>    /// Return true if the target has BitExtract instructions.
>    bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
>  
> -  /// Return true if a vector of the given type should be split
> -  /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
> -  /// legalization.
> -  virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; }
> +  /// Return the preferred vector type legalization action.
> +  virtual TargetLoweringBase::LegalizeTypeAction
> +  getPreferredVectorAction(EVT VT) const {
> +    // The default action for one element vectors is to scalarize
> +    if (VT.getVectorNumElements() == 1)
> +      return TypeScalarizeVector;
> +    // The default action for other vectors is to promote
> +    return TypePromoteInteger;
> +  }
>  
>    // There are two general methods for expanding a BUILD_VECTOR node:
>    //  1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
> Index: lib/CodeGen/TargetLoweringBase.cpp
> ===================================================================
> --- lib/CodeGen/TargetLoweringBase.cpp
> +++ lib/CodeGen/TargetLoweringBase.cpp
> @@ -1084,24 +1084,25 @@
>    // Loop over all of the vector value types to see which need transformations.
>    for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
>         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
> -    MVT VT = (MVT::SimpleValueType)i;
> -    if (isTypeLegal(VT)) continue;
> +    MVT VT = (MVT::SimpleValueType) i;
> +    if (isTypeLegal(VT))
> +      continue;
>  
> -    // Determine if there is a legal wider type.  If so, we should promote to
> -    // that wider vector type.
>      MVT EltVT = VT.getVectorElementType();
>      unsigned NElts = VT.getVectorNumElements();
> -    if (NElts != 1 && !shouldSplitVectorType(VT)) {
> -      bool IsLegalWiderType = false;
> -      // First try to promote the elements of integer vectors. If no legal
> -      // promotion was found, fallback to the widen-vector method.
> -      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> -        MVT SVT = (MVT::SimpleValueType)nVT;
> +    bool IsLegalWiderType = false;
> +    LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
> +    switch (PreferredAction) {
> +    case TypePromoteInteger: {
> +      // Try to promote the elements of integer vectors. If no legal
> +      // promotion was found, fall through to the widen-vector method.
> +      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> +        MVT SVT = (MVT::SimpleValueType) nVT;
>          // Promote vectors of integers to vectors with the same number
>          // of elements, with a wider element type.
>          if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
> -            && SVT.getVectorNumElements() == NElts &&
> -            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
> +            && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
> +            && SVT.getScalarType().isInteger()) {
>            TransformToType[i] = SVT;
>            RegisterTypeForVT[i] = SVT;
>            NumRegistersForVT[i] = 1;
> @@ -1110,15 +1111,15 @@
>            break;
>          }
>        }
> -
> -      if (IsLegalWiderType) continue;
> -
> +      if (IsLegalWiderType)
> +        break;
> +    }
> +    case TypeWidenVector: {
>        // Try to widen the vector.
> -      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> -        MVT SVT = (MVT::SimpleValueType)nVT;
> -        if (SVT.getVectorElementType() == EltVT &&
> -            SVT.getVectorNumElements() > NElts &&
> -            isTypeLegal(SVT)) {
> +      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> +        MVT SVT = (MVT::SimpleValueType) nVT;
> +        if (SVT.getVectorElementType() == EltVT
> +            && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
>            TransformToType[i] = SVT;
>            RegisterTypeForVT[i] = SVT;
>            NumRegistersForVT[i] = 1;
> @@ -1127,27 +1128,34 @@
>            break;
>          }
>        }
> -      if (IsLegalWiderType) continue;
> +      if (IsLegalWiderType)
> +        break;
>      }
> -
> -    MVT IntermediateVT;
> -    MVT RegisterVT;
> -    unsigned NumIntermediates;
> -    NumRegistersForVT[i] =
> -      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
> -                                RegisterVT, this);
> -    RegisterTypeForVT[i] = RegisterVT;
> -
> -    MVT NVT = VT.getPow2VectorType();
> -    if (NVT == VT) {
> -      // Type is already a power of 2.  The default action is to split.
> -      TransformToType[i] = MVT::Other;
> -      unsigned NumElts = VT.getVectorNumElements();
> -      ValueTypeActions.setTypeAction(VT,
> -            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
> -    } else {
> -      TransformToType[i] = NVT;
> -      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
> +    case TypeSplitVector:
> +    case TypeScalarizeVector: {
> +      MVT IntermediateVT;
> +      MVT RegisterVT;
> +      unsigned NumIntermediates;
> +      NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
> +          NumIntermediates, RegisterVT, this);
> +      RegisterTypeForVT[i] = RegisterVT;
> +
> +      MVT NVT = VT.getPow2VectorType();
> +      if (NVT == VT) {
> +        // Type is already a power of 2.  The default action is to split.
> +        TransformToType[i] = MVT::Other;
> +        if (PreferredAction == TypeScalarizeVector)
> +          ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
> +        else
> +          ValueTypeActions.setTypeAction(VT, TypeSplitVector);
> +      } else {
> +        TransformToType[i] = NVT;
> +        ValueTypeActions.setTypeAction(VT, TypeWidenVector);
> +      }
> +      break;
> +    }
> +    default:
> +      llvm_unreachable("Unknown vector legalization action!");
>      }
>    }
>  
> Index: lib/Target/AArch64/AArch64ISelLowering.cpp
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.cpp
> +++ lib/Target/AArch64/AArch64ISelLowering.cpp
> @@ -7886,6 +7886,19 @@
>    return Inst->getType()->getPrimitiveSizeInBits() <= 128;
>  }
>  
> +TargetLoweringBase::LegalizeTypeAction
> +AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
> +  MVT SVT = VT.getSimpleVT();
> +  // During type legalization, we prefer to widen v1i8, v1i16, v1i32  to v8i8,
> +  // v4i16, v2i32 instead of to promote.
> +  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
> +      || SVT == MVT::v1f32)
> +    return TypeWidenVector;
> +  if (VT.getVectorNumElements() == 1)
> +    return TypeScalarizeVector;
> +  return TypePromoteInteger;
> +}
> +
>  Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
>                                               AtomicOrdering Ord) const {
>    Module *M = Builder.GetInsertBlock()->getParent()->getParent();
> Index: lib/Target/AArch64/AArch64ISelLowering.h
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.h
> +++ lib/Target/AArch64/AArch64ISelLowering.h
> @@ -324,6 +324,9 @@
>  
>    bool shouldExpandAtomicInIR(Instruction *Inst) const override;
>  
> +  TargetLoweringBase::LegalizeTypeAction
> +  getPreferredVectorAction(EVT VT) const override;
> +
>  private:
>    /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
>    /// make the right decision when generating code for different targets.
> Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
> ===================================================================
> --- lib/Target/NVPTX/NVPTXISelLowering.cpp
> +++ lib/Target/NVPTX/NVPTXISelLowering.cpp
> @@ -473,8 +473,13 @@
>    }
>  }
>  
> -bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
> -  return VT.getScalarType() == MVT::i1;
> +TargetLoweringBase::LegalizeTypeAction
> +NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
> +  if (VT.getVectorNumElements() == 1)
> +    return TypeScalarizeVector;
> +  if (VT.getScalarType() == MVT::i1)
> +    return TypeSplitVector;
> +  return TypePromoteInteger;
>  }
>  
>  SDValue
> Index: lib/Target/NVPTX/NVPTXISelLowering.h
> ===================================================================
> --- lib/Target/NVPTX/NVPTXISelLowering.h
> +++ lib/Target/NVPTX/NVPTXISelLowering.h
> @@ -242,7 +242,8 @@
>    // PTX always uses 32-bit shift amounts
>    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
>  
> -  bool shouldSplitVectorType(EVT VT) const override;
> +  TargetLoweringBase::LegalizeTypeAction
> +  getPreferredVectorAction(EVT VT) const override;
>  
>  private:
>    const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -271,8 +271,13 @@
>    return VT.bitsGT(MVT::i32);
>  }
>  
> -bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
> -  return VT.getScalarType().bitsLE(MVT::i16);
> +TargetLoweringBase::LegalizeTypeAction
> +SITargetLowering::getPreferredVectorAction(EVT VT) const {
> +  if (VT.getVectorNumElements() == 1)
> +    return TypeScalarizeVector;
> +  if (VT.getScalarType().bitsLE(MVT::i16))
> +    return TypeSplitVector;
> +  return TypePromoteInteger;
>  }
>  
>  bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
> Index: lib/Target/R600/SIISelLowering.h
> ===================================================================
> --- lib/Target/R600/SIISelLowering.h
> +++ lib/Target/R600/SIISelLowering.h
> @@ -50,7 +50,9 @@
>    SITargetLowering(TargetMachine &tm);
>    bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
>                                       bool *IsFast) const override;
> -  bool shouldSplitVectorType(EVT VT) const override;
> +
> +  TargetLoweringBase::LegalizeTypeAction
> +  getPreferredVectorAction(EVT VT) const override;
>  
>    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
>                                          Type *Ty) const override;
> Index: test/CodeGen/AArch64/arm64-neon-copy.ll
> ===================================================================
> --- test/CodeGen/AArch64/arm64-neon-copy.ll
> +++ test/CodeGen/AArch64/arm64-neon-copy.ll
> @@ -842,7 +842,7 @@
>  
>  define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
>  ; CHECK-LABEL: testDUP.v1i8:
> -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
> +; CHECK: dup v0.8b, v0.b[0]
>    %b = extractelement <1 x i8> %a, i32 0
>    %c = insertelement <8 x i8> undef, i8 %b, i32 0
>    %d = insertelement <8 x i8> %c, i8 %b, i32 1
> @@ -857,7 +857,7 @@
>  
>  define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
>  ; CHECK-LABEL: testDUP.v1i16:
> -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
> +; CHECK: dup v0.8h, v0.h[0]
>    %b = extractelement <1 x i16> %a, i32 0
>    %c = insertelement <8 x i16> undef, i16 %b, i32 0
>    %d = insertelement <8 x i16> %c, i16 %b, i32 1
> @@ -872,7 +872,7 @@
>  
>  define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
>  ; CHECK-LABEL: testDUP.v1i32:
> -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
> +; CHECK: dup v0.4s, v0.s[0]
>    %b = extractelement <1 x i32> %a, i32 0
>    %c = insertelement <4 x i32> undef, i32 %b, i32 0
>    %d = insertelement <4 x i32> %c, i32 %b, i32 1
> @@ -1411,35 +1411,35 @@
>  
>  define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
>  ; CHECK-LABEL: concat_vector_v4i16:
> -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
> +; CHECK: dup v0.4h, v0.h[0]
>   %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
>   ret <4 x i16> %r
>  }
>  
>  define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
>  ; CHECK-LABEL: concat_vector_v4i32:
> -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
> +; CHECK: dup v0.4s, v0.s[0]
>   %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
>   ret <4 x i32> %r
>  }
>  
>  define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
>  ; CHECK-LABEL: concat_vector_v8i8:
> -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
> +; CHECK: dup v0.8b, v0.b[0]
>   %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
>   ret <8 x i8> %r
>  }
>  
>  define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
>  ; CHECK-LABEL: concat_vector_v8i16:
> -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
> +; CHECK: dup v0.8h, v0.h[0]
>   %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
>   ret <8 x i16> %r
>  }
>  
>  define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
>  ; CHECK-LABEL: concat_vector_v16i8:
> -; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
> +; CHECK: dup v0.16b, v0.b[0]
>   %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
>   ret <16 x i8> %r
>  }
> Index: test/CodeGen/AArch64/arm64-neon-select_cc.ll
> ===================================================================
> --- test/CodeGen/AArch64/arm64-neon-select_cc.ll
> +++ test/CodeGen/AArch64/arm64-neon-select_cc.ll
> @@ -136,8 +136,8 @@
>  
>  define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
>  ; CHECK-LABEL: test_select_cc_v1f32:
> -; CHECK: fcmp s0, s1
> -; CHECK-NEXT: fcsel s0, s2, s3, eq
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
> +; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
>    %cmp31 = fcmp oeq float %a, %b
>    %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
>    ret <1 x float> %e
> Index: test/CodeGen/AArch64/trunc-v1i64.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/AArch64/trunc-v1i64.ll
> @@ -0,0 +1,63 @@
> +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
> +
> +; An optimization in DAG Combiner to fold
> +; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
> +; will generate nodes like:
> +;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
> +; And such nodes will be defaultly scalarized in type legalization. But such
> +; scalarization will cause an assertion failure, as v1i64 is a legal type in
> +; AArch64. We change the default behaviour from be scalarized to be widen.
> +
> +; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
> +; Just like v1i16 and v1i8, there is no XTN generated.
> +
> +define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i32_0:
> +; CHECK: xtn v0.2s, v0.2d
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
> +  %2 = trunc <2 x i64> %1 to <2 x i32>
> +  ret <2 x i32> %2
> +}
> +
> +define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i32_1:
> +; CHECK: xtn v0.2s, v0.2d
> +; CHECK-NEXT: dup v0.2s, v0.s[0]
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
> +  %2 = trunc <2 x i64> %1 to <2 x i32>
> +  ret <2 x i32> %2
> +}
> +
> +define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i16_0:
> +; CHECK-NOT: xtn
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
> +  %2 = trunc <4 x i64> %1 to <4 x i16>
> +  ret <4 x i16> %2
> +}
> +
> +define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i16_1:
> +; CHECK-NOT: xtn
> +; CHECK: dup v0.4h, v0.h[0]
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
> +  %2 = trunc <4 x i64> %1 to <4 x i16>
> +  ret <4 x i16> %2
> +}
> +
> +define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i8_0:
> +; CHECK-NOT: xtn
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> +  %2 = trunc <8 x i64> %1 to <8 x i8>
> +  ret <8 x i8> %2
> +}
> +
> +define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i8_1:
> +; CHECK-NOT: xtn
> +; CHECK: dup v0.8b, v0.b[0]
> +  %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> +  %2 = trunc <8 x i64> %1 to <8 x i8>
> +  ret <8 x i8> %2
> +}
> \ No newline at end of file

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits