[PATCH] [AArch64] Change default legalization behavior of v1i32 to be widen to v2i32 instead of scalarization
Tom Stellard
tom at stellard.net
Wed Jul 2 13:48:25 PDT 2014
On Wed, Jul 02, 2014 at 10:35:24AM +0000, Hao Liu wrote:
> Hi Tim,
>
> Your suggestion is very good.
> I've refactored the patch by using one hook to control all the legalization actions.
>
Hi Hao,
Thanks for working on this. On R600, we will probably want to split everything,
but I will do that as a follow on patch, since I think it is better if this
patch preserve the current behavior to minimize the impact.
The R600 changes LGTM.
> Thanks,
> -Hao
>
> http://reviews.llvm.org/D4322
>
> Files:
> include/llvm/Target/TargetLowering.h
> lib/CodeGen/TargetLoweringBase.cpp
> lib/Target/AArch64/AArch64ISelLowering.cpp
> lib/Target/AArch64/AArch64ISelLowering.h
> lib/Target/NVPTX/NVPTXISelLowering.cpp
> lib/Target/NVPTX/NVPTXISelLowering.h
> lib/Target/R600/SIISelLowering.cpp
> lib/Target/R600/SIISelLowering.h
> test/CodeGen/AArch64/arm64-neon-copy.ll
> test/CodeGen/AArch64/arm64-neon-select_cc.ll
> test/CodeGen/AArch64/trunc-v1i64.ll
> Index: include/llvm/Target/TargetLowering.h
> ===================================================================
> --- include/llvm/Target/TargetLowering.h
> +++ include/llvm/Target/TargetLowering.h
> @@ -185,10 +185,15 @@
> /// Return true if the target has BitExtract instructions.
> bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
>
> - /// Return true if a vector of the given type should be split
> - /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
> - /// legalization.
> - virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; }
> + /// Return the preferred vector type legalization action.
> + virtual TargetLoweringBase::LegalizeTypeAction
> + getPreferredVectorAction(EVT VT) const {
> + // The default action for one element vectors is to scalarize
> + if (VT.getVectorNumElements() == 1)
> + return TypeScalarizeVector;
> + // The default action for other vectors is to promote
> + return TypePromoteInteger;
> + }
>
> // There are two general methods for expanding a BUILD_VECTOR node:
> // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
> Index: lib/CodeGen/TargetLoweringBase.cpp
> ===================================================================
> --- lib/CodeGen/TargetLoweringBase.cpp
> +++ lib/CodeGen/TargetLoweringBase.cpp
> @@ -1084,24 +1084,25 @@
> // Loop over all of the vector value types to see which need transformations.
> for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
> i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
> - MVT VT = (MVT::SimpleValueType)i;
> - if (isTypeLegal(VT)) continue;
> + MVT VT = (MVT::SimpleValueType) i;
> + if (isTypeLegal(VT))
> + continue;
>
> - // Determine if there is a legal wider type. If so, we should promote to
> - // that wider vector type.
> MVT EltVT = VT.getVectorElementType();
> unsigned NElts = VT.getVectorNumElements();
> - if (NElts != 1 && !shouldSplitVectorType(VT)) {
> - bool IsLegalWiderType = false;
> - // First try to promote the elements of integer vectors. If no legal
> - // promotion was found, fallback to the widen-vector method.
> - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> - MVT SVT = (MVT::SimpleValueType)nVT;
> + bool IsLegalWiderType = false;
> + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
> + switch (PreferredAction) {
> + case TypePromoteInteger: {
> + // Try to promote the elements of integer vectors. If no legal
> + // promotion was found, fall through to the widen-vector method.
> + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> + MVT SVT = (MVT::SimpleValueType) nVT;
> // Promote vectors of integers to vectors with the same number
> // of elements, with a wider element type.
> if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
> - && SVT.getVectorNumElements() == NElts &&
> - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
> + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
> + && SVT.getScalarType().isInteger()) {
> TransformToType[i] = SVT;
> RegisterTypeForVT[i] = SVT;
> NumRegistersForVT[i] = 1;
> @@ -1110,15 +1111,15 @@
> break;
> }
> }
> -
> - if (IsLegalWiderType) continue;
> -
> + if (IsLegalWiderType)
> + break;
> + }
> + case TypeWidenVector: {
> // Try to widen the vector.
> - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> - MVT SVT = (MVT::SimpleValueType)nVT;
> - if (SVT.getVectorElementType() == EltVT &&
> - SVT.getVectorNumElements() > NElts &&
> - isTypeLegal(SVT)) {
> + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
> + MVT SVT = (MVT::SimpleValueType) nVT;
> + if (SVT.getVectorElementType() == EltVT
> + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
> TransformToType[i] = SVT;
> RegisterTypeForVT[i] = SVT;
> NumRegistersForVT[i] = 1;
> @@ -1127,27 +1128,34 @@
> break;
> }
> }
> - if (IsLegalWiderType) continue;
> + if (IsLegalWiderType)
> + break;
> }
> -
> - MVT IntermediateVT;
> - MVT RegisterVT;
> - unsigned NumIntermediates;
> - NumRegistersForVT[i] =
> - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
> - RegisterVT, this);
> - RegisterTypeForVT[i] = RegisterVT;
> -
> - MVT NVT = VT.getPow2VectorType();
> - if (NVT == VT) {
> - // Type is already a power of 2. The default action is to split.
> - TransformToType[i] = MVT::Other;
> - unsigned NumElts = VT.getVectorNumElements();
> - ValueTypeActions.setTypeAction(VT,
> - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
> - } else {
> - TransformToType[i] = NVT;
> - ValueTypeActions.setTypeAction(VT, TypeWidenVector);
> + case TypeSplitVector:
> + case TypeScalarizeVector: {
> + MVT IntermediateVT;
> + MVT RegisterVT;
> + unsigned NumIntermediates;
> + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
> + NumIntermediates, RegisterVT, this);
> + RegisterTypeForVT[i] = RegisterVT;
> +
> + MVT NVT = VT.getPow2VectorType();
> + if (NVT == VT) {
> + // Type is already a power of 2. The default action is to split.
> + TransformToType[i] = MVT::Other;
> + if (PreferredAction == TypeScalarizeVector)
> + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
> + else
> + ValueTypeActions.setTypeAction(VT, TypeSplitVector);
> + } else {
> + TransformToType[i] = NVT;
> + ValueTypeActions.setTypeAction(VT, TypeWidenVector);
> + }
> + break;
> + }
> + default:
> + llvm_unreachable("Unknown vector legalization action!");
> }
> }
>
> Index: lib/Target/AArch64/AArch64ISelLowering.cpp
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.cpp
> +++ lib/Target/AArch64/AArch64ISelLowering.cpp
> @@ -7886,6 +7886,19 @@
> return Inst->getType()->getPrimitiveSizeInBits() <= 128;
> }
>
> +TargetLoweringBase::LegalizeTypeAction
> +AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
> + MVT SVT = VT.getSimpleVT();
> + // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
> + // v4i16, v2i32 instead of to promote.
> + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
> + || SVT == MVT::v1f32)
> + return TypeWidenVector;
> + if (VT.getVectorNumElements() == 1)
> + return TypeScalarizeVector;
> + return TypePromoteInteger;
> +}
> +
> Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
> AtomicOrdering Ord) const {
> Module *M = Builder.GetInsertBlock()->getParent()->getParent();
> Index: lib/Target/AArch64/AArch64ISelLowering.h
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.h
> +++ lib/Target/AArch64/AArch64ISelLowering.h
> @@ -324,6 +324,9 @@
>
> bool shouldExpandAtomicInIR(Instruction *Inst) const override;
>
> + TargetLoweringBase::LegalizeTypeAction
> + getPreferredVectorAction(EVT VT) const override;
> +
> private:
> /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
> /// make the right decision when generating code for different targets.
> Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
> ===================================================================
> --- lib/Target/NVPTX/NVPTXISelLowering.cpp
> +++ lib/Target/NVPTX/NVPTXISelLowering.cpp
> @@ -473,8 +473,13 @@
> }
> }
>
> -bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
> - return VT.getScalarType() == MVT::i1;
> +TargetLoweringBase::LegalizeTypeAction
> +NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
> + if (VT.getVectorNumElements() == 1)
> + return TypeScalarizeVector;
> + if (VT.getScalarType() == MVT::i1)
> + return TypeSplitVector;
> + return TypePromoteInteger;
> }
>
> SDValue
> Index: lib/Target/NVPTX/NVPTXISelLowering.h
> ===================================================================
> --- lib/Target/NVPTX/NVPTXISelLowering.h
> +++ lib/Target/NVPTX/NVPTXISelLowering.h
> @@ -242,7 +242,8 @@
> // PTX always uses 32-bit shift amounts
> MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
>
> - bool shouldSplitVectorType(EVT VT) const override;
> + TargetLoweringBase::LegalizeTypeAction
> + getPreferredVectorAction(EVT VT) const override;
>
> private:
> const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -271,8 +271,13 @@
> return VT.bitsGT(MVT::i32);
> }
>
> -bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
> - return VT.getScalarType().bitsLE(MVT::i16);
> +TargetLoweringBase::LegalizeTypeAction
> +SITargetLowering::getPreferredVectorAction(EVT VT) const {
> + if (VT.getVectorNumElements() == 1)
> + return TypeScalarizeVector;
> + if (VT.getScalarType().bitsLE(MVT::i16))
> + return TypeSplitVector;
> + return TypePromoteInteger;
> }
>
> bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
> Index: lib/Target/R600/SIISelLowering.h
> ===================================================================
> --- lib/Target/R600/SIISelLowering.h
> +++ lib/Target/R600/SIISelLowering.h
> @@ -50,7 +50,9 @@
> SITargetLowering(TargetMachine &tm);
> bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
> bool *IsFast) const override;
> - bool shouldSplitVectorType(EVT VT) const override;
> +
> + TargetLoweringBase::LegalizeTypeAction
> + getPreferredVectorAction(EVT VT) const override;
>
> bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
> Type *Ty) const override;
> Index: test/CodeGen/AArch64/arm64-neon-copy.ll
> ===================================================================
> --- test/CodeGen/AArch64/arm64-neon-copy.ll
> +++ test/CodeGen/AArch64/arm64-neon-copy.ll
> @@ -842,7 +842,7 @@
>
> define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
> ; CHECK-LABEL: testDUP.v1i8:
> -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
> +; CHECK: dup v0.8b, v0.b[0]
> %b = extractelement <1 x i8> %a, i32 0
> %c = insertelement <8 x i8> undef, i8 %b, i32 0
> %d = insertelement <8 x i8> %c, i8 %b, i32 1
> @@ -857,7 +857,7 @@
>
> define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
> ; CHECK-LABEL: testDUP.v1i16:
> -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
> +; CHECK: dup v0.8h, v0.h[0]
> %b = extractelement <1 x i16> %a, i32 0
> %c = insertelement <8 x i16> undef, i16 %b, i32 0
> %d = insertelement <8 x i16> %c, i16 %b, i32 1
> @@ -872,7 +872,7 @@
>
> define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
> ; CHECK-LABEL: testDUP.v1i32:
> -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
> +; CHECK: dup v0.4s, v0.s[0]
> %b = extractelement <1 x i32> %a, i32 0
> %c = insertelement <4 x i32> undef, i32 %b, i32 0
> %d = insertelement <4 x i32> %c, i32 %b, i32 1
> @@ -1411,35 +1411,35 @@
>
> define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
> ; CHECK-LABEL: concat_vector_v4i16:
> -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
> +; CHECK: dup v0.4h, v0.h[0]
> %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
> ret <4 x i16> %r
> }
>
> define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
> ; CHECK-LABEL: concat_vector_v4i32:
> -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
> +; CHECK: dup v0.4s, v0.s[0]
> %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
> ret <4 x i32> %r
> }
>
> define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
> ; CHECK-LABEL: concat_vector_v8i8:
> -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
> +; CHECK: dup v0.8b, v0.b[0]
> %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
> ret <8 x i8> %r
> }
>
> define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
> ; CHECK-LABEL: concat_vector_v8i16:
> -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
> +; CHECK: dup v0.8h, v0.h[0]
> %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
> ret <8 x i16> %r
> }
>
> define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
> ; CHECK-LABEL: concat_vector_v16i8:
> -; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
> +; CHECK: dup v0.16b, v0.b[0]
> %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
> ret <16 x i8> %r
> }
> Index: test/CodeGen/AArch64/arm64-neon-select_cc.ll
> ===================================================================
> --- test/CodeGen/AArch64/arm64-neon-select_cc.ll
> +++ test/CodeGen/AArch64/arm64-neon-select_cc.ll
> @@ -136,8 +136,8 @@
>
> define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
> ; CHECK-LABEL: test_select_cc_v1f32:
> -; CHECK: fcmp s0, s1
> -; CHECK-NEXT: fcsel s0, s2, s3, eq
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
> +; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
> %cmp31 = fcmp oeq float %a, %b
> %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
> ret <1 x float> %e
> Index: test/CodeGen/AArch64/trunc-v1i64.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/AArch64/trunc-v1i64.ll
> @@ -0,0 +1,63 @@
> +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
> +
> +; An optimization in DAG Combiner to fold
> +; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
> +; will generate nodes like:
> +; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
> +; And such nodes will be defaultly scalarized in type legalization. But such
> +; scalarization will cause an assertion failure, as v1i64 is a legal type in
> +; AArch64. We change the default behaviour from be scalarized to be widen.
> +
> +; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
> +; Just like v1i16 and v1i8, there is no XTN generated.
> +
> +define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i32_0:
> +; CHECK: xtn v0.2s, v0.2d
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
> + %2 = trunc <2 x i64> %1 to <2 x i32>
> + ret <2 x i32> %2
> +}
> +
> +define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i32_1:
> +; CHECK: xtn v0.2s, v0.2d
> +; CHECK-NEXT: dup v0.2s, v0.s[0]
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
> + %2 = trunc <2 x i64> %1 to <2 x i32>
> + ret <2 x i32> %2
> +}
> +
> +define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i16_0:
> +; CHECK-NOT: xtn
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
> + %2 = trunc <4 x i64> %1 to <4 x i16>
> + ret <4 x i16> %2
> +}
> +
> +define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i16_1:
> +; CHECK-NOT: xtn
> +; CHECK: dup v0.4h, v0.h[0]
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
> + %2 = trunc <4 x i64> %1 to <4 x i16>
> + ret <4 x i16> %2
> +}
> +
> +define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i8_0:
> +; CHECK-NOT: xtn
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> + %2 = trunc <8 x i64> %1 to <8 x i8>
> + ret <8 x i8> %2
> +}
> +
> +define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
> +; CHECK-LABEL: test_v1i8_1:
> +; CHECK-NOT: xtn
> +; CHECK: dup v0.8b, v0.b[0]
> + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> + %2 = trunc <8 x i64> %1 to <8 x i8>
> + ret <8 x i8> %2
> +}
> \ No newline at end of file
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list