[PATCH] R600: Make sign_extend_inreg legal.
Tom Stellard
tom at stellard.net
Wed Apr 2 09:59:11 PDT 2014
On Tue, Apr 01, 2014 at 11:34:09AM -0700, Matt Arsenault wrote:
> I don't know why I didn't just do this before
>
> http://llvm-reviews.chandlerc.com/D3250
>
> Files:
> lib/Target/R600/AMDGPUISelLowering.cpp
> lib/Target/R600/EvergreenInstructions.td
> lib/Target/R600/SIISelLowering.cpp
> lib/Target/R600/SIInstructions.td
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -212,15 +212,21 @@
> setOperationAction(ISD::SELECT, VT, Expand);
> }
>
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
> + if (!Subtarget->hasBFE())
> + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
> +
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
>
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom);
> + if (!Subtarget->hasBFE())
> + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
> +
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
>
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom);
> + if (!Subtarget->hasBFE())
> + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
> +
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
>
> @@ -1029,81 +1035,22 @@
> MVT VT = Op.getSimpleValueType();
> MVT ScalarVT = VT.getScalarType();
>
> - unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits();
> - unsigned DestBits = ScalarVT.getSizeInBits();
> - unsigned BitsDiff = DestBits - SrcBits;
> -
> - if (!Subtarget->hasBFE())
> - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
> + if (!VT.isVector())
> + return SDValue();
>
> SDValue Src = Op.getOperand(0);
> - if (VT.isVector()) {
> - SDLoc DL(Op);
> - // Need to scalarize this, and revisit each of the scalars later.
> - // TODO: Don't scalarize on Evergreen?
> - unsigned NElts = VT.getVectorNumElements();
> - SmallVector<SDValue, 8> Args;
> - ExtractVectorElements(Src, DAG, Args, 0, NElts);
> -
> - SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
> - for (unsigned I = 0; I < NElts; ++I)
> - Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
> -
> - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
> - }
> -
> - if (SrcBits == 32) {
> - SDLoc DL(Op);
> -
> - // If the source is 32-bits, this is really half of a 2-register pair, and
> - // we need to discard the unused half of the pair.
> - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
> - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc);
> - }
> -
> - unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1;
> -
> - // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
> - // might not be worth the effort, and will need to expand to shifts when
> - // fixing SGPR copies.
> - if (SrcBits < 32 && DestBits <= 32) {
> - SDLoc DL(Op);
> - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
> -
> - if (DestBits != 32)
> - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src);
> -
> - // FIXME: This should use TargetConstant, but that hits assertions for
> - // Evergreen.
> - SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT,
> - Op.getOperand(0), // Operand
> - DAG.getConstant(0, ExtVT), // Offset
> - DAG.getConstant(SrcBits, ExtVT)); // Width
> -
> - // Truncate to the original type if necessary.
> - if (ScalarVT == MVT::i32)
> - return Ext;
> - return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext);
> - }
> -
> - // For small types, extend to 32-bits first.
> - if (SrcBits < 32) {
> - SDLoc DL(Op);
> - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
> + SDLoc DL(Op);
>
> - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src);
> - SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32,
> - DL,
> - ExtVT,
> - TruncSrc, // Operand
> - DAG.getConstant(0, ExtVT), // Offset
> - DAG.getConstant(SrcBits, ExtVT)); // Width
> + // TODO: Don't scalarize on Evergreen?
> + unsigned NElts = VT.getVectorNumElements();
> + SmallVector<SDValue, 8> Args;
> + ExtractVectorElements(Src, DAG, Args, 0, NElts);
>
> - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32);
> - }
> + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
> + for (unsigned I = 0; I < NElts; ++I)
> + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
>
> - // For everything else, use the standard bitshift expansion.
> - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
> + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
> }
>
> //===----------------------------------------------------------------------===//
> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -286,6 +286,13 @@
> VecALU
> >;
>
> +def : Pat<(i32 (sext_inreg i32:$src, i1)),
> + (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
> +def : Pat<(i32 (sext_inreg i32:$src, i8)),
> + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
> +def : Pat<(i32 (sext_inreg i32:$src, i16)),
> + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
> +
> defm : BFIPatterns <BFI_INT_eg>;
>
> def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -147,9 +147,6 @@
> setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
> setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
>
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
> -
> setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
> setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -71,6 +71,7 @@
> def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
> [(set i32:$dst, (sext_inreg i32:$src0, i16))]
> >;
> +
> ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
> ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
> ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
> @@ -125,21 +126,6 @@
> >;
> */
>
> -// Handle sext_inreg in i64
> -def : Pat <
> - (i64 (sext_inreg i64:$src, i8)),
> - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> - (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> - (S_MOV_B32 -1), sub1)
> ->;
> -
> -def : Pat <
> - (i64 (sext_inreg i64:$src, i16)),
> - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> - (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> - (S_MOV_B32 -1), sub1)
> ->;
> -
> let isCompare = 1 in {
> def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
> def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
> @@ -2267,6 +2253,32 @@
> >;
>
> //===----------------------------------------------------------------------===//
> +// Conversion Patterns
> +//===----------------------------------------------------------------------===//
> +
> +def : Pat<(i32 (sext_inreg i32:$src, i1)),
> + (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
> +
> +// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
> +// might not be worth the effort, and will need to expand to shifts when
> +// fixing SGPR copies.
> +
> +// Handle sext_inreg in i64
> +def : Pat <
> + (i64 (sext_inreg i64:$src, i8)),
> + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> + (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> + (S_MOV_B32 -1), sub1)
> +>;
> +
> +def : Pat <
> + (i64 (sext_inreg i64:$src, i16)),
> + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> + (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> + (S_MOV_B32 -1), sub1)
> +>;
> +
I want to move away from using patterns with INSERT_SUBREG, because
it just adds another case we need to handle in the SIFixSGPRCopies pass.
Can you move this pattern into AMDILISelDAGToDAG.cpp and use REG_SEQUENCE
instead of INSERT_SUBREG + IMPLICIT_DEF.
-Tom
> +//===----------------------------------------------------------------------===//
> // Miscellaneous Patterns
> //===----------------------------------------------------------------------===//
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list